diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cfaad76 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.pem diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..12398d7 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +cpython@3.11.3 diff --git a/Pipfile b/Pipfile deleted file mode 100644 index 475f157..0000000 --- a/Pipfile +++ /dev/null @@ -1,12 +0,0 @@ -[[source]] -url = "https://pypi.python.org/simple" -verify_ssl = true -name = "pypi" - -[packages] -requests = "*" - -[dev-packages] - -[requires] -python_version = "3.8" diff --git a/Pipfile.lock b/Pipfile.lock deleted file mode 100644 index 4bc7195..0000000 --- a/Pipfile.lock +++ /dev/null @@ -1,57 +0,0 @@ -{ - "_meta": { - "hash": { - "sha256": "d927fcadfe1cb7a3936766a19e9ff3dd822eca2b5d080bb479058a806e4fe234" - }, - "pipfile-spec": 6, - "requires": { - "python_version": "3.8" - }, - "sources": [ - { - "name": "pypi", - "url": "https://pypi.python.org/simple", - "verify_ssl": true - } - ] - }, - "default": { - "certifi": { - "hashes": [ - "sha256:2bbf76fd432960138b3ef6dda3dde0544f27cbf8546c458e60baf371917ba9ee", - "sha256:50b1e4f8446b06f41be7dd6338db18e0990601dce795c2b1686458aa7e8fa7d8" - ], - "version": "==2021.5.30" - }, - "chardet": { - "hashes": [ - "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa", - "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5" - ], - "version": "==4.0.0" - }, - "idna": { - "hashes": [ - "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", - "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" - ], - "version": "==2.10" - }, - "requests": { - "hashes": [ - "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804", - "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e" - ], - "index": "pypi", - "version": "==2.25.1" - }, - "urllib3": { - "hashes": [ - "sha256:39fb8672126159acb139a7718dd10806104dec1e2f0f6c88aab05d17df10c8d4", - "sha256:f57b4c16c62fa2760b7e3d97c35b255512fb6b59a259730f36ba32ce9f8e342f" - ], - "version": "==1.26.6" - } - }, - "develop": {} -} diff --git a/README.md b/README.md index ef3cabc..e95b1e3 100644 --- a/README.md +++ b/README.md @@ -12,4 +12,4 @@ It will be built over time and I will update the below checklist according to th - [ ] Clean up returned data into a per-team basis in MongoDB styling - [ ] Write script to push gathered information to MongoDB - [ ] Setup CI workflow for data comparison :partying_face: - - [ ] Choose a provider \ No newline at end of file + - [ ] Choose a provider diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..3dbd70c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,24 @@ +[project] +name = "plfixturesapi" +version = "0.1.0" +description = "Add your description here" +dependencies = [ + "requests>=2.31.0", + "black>=23.3.0", + "beautifulsoup4>=4.12.2", + "mechanicalsoup>=1.3.0", + "pymongo>=4.4.0", +] +readme = "README.md" +requires-python = ">= 3.8" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.rye] +managed = true +dev_dependencies = [] + +[tool.hatch.metadata] +allow-direct-references = true diff --git a/requirements-dev.lock b/requirements-dev.lock new file mode 100644 index 0000000..bdb3180 --- /dev/null +++ b/requirements-dev.lock @@ -0,0 +1,26 @@ +# generated by rye +# use `rye lock` or `rye sync` to update this lockfile +# +# last locked with the following flags: +# pre: false +# features: [] +# all-features: false + +-e file:. +beautifulsoup4==4.12.2 +black==23.3.0 +certifi==2023.5.7 +charset-normalizer==3.2.0 +click==8.1.4 +dnspython==2.3.0 +idna==3.4 +lxml==4.9.3 +mechanicalsoup==1.3.0 +mypy-extensions==1.0.0 +packaging==23.1 +pathspec==0.11.1 +platformdirs==3.8.1 +pymongo==4.4.0 +requests==2.31.0 +soupsieve==2.4.1 +urllib3==2.0.3 diff --git a/requirements.lock b/requirements.lock new file mode 100644 index 0000000..bdb3180 --- /dev/null +++ b/requirements.lock @@ -0,0 +1,26 @@ +# generated by rye +# use `rye lock` or `rye sync` to update this lockfile +# +# last locked with the following flags: +# pre: false +# features: [] +# all-features: false + +-e file:. +beautifulsoup4==4.12.2 +black==23.3.0 +certifi==2023.5.7 +charset-normalizer==3.2.0 +click==8.1.4 +dnspython==2.3.0 +idna==3.4 +lxml==4.9.3 +mechanicalsoup==1.3.0 +mypy-extensions==1.0.0 +packaging==23.1 +pathspec==0.11.1 +platformdirs==3.8.1 +pymongo==4.4.0 +requests==2.31.0 +soupsieve==2.4.1 +urllib3==2.0.3 diff --git a/src/plfixturesapi/__init__.py b/src/plfixturesapi/__init__.py new file mode 100644 index 0000000..4fe0a69 --- /dev/null +++ b/src/plfixturesapi/__init__.py @@ -0,0 +1,2 @@ +def hello(): + return "Hello from plfixturesapi!" diff --git a/utils/fullSchedule.py b/utils/fullSchedule.py index ec1da8f..08b5288 100644 --- a/utils/fullSchedule.py +++ b/utils/fullSchedule.py @@ -17,7 +17,7 @@ def final_score_calc(a_score, h_score): return "Final score was {0} - {1}".format(h_score, a_score) def main(team): - teams = {1:"Arsenal", 2:"Aston Villa", 3:"Brentford", 4:"Brighton and Hove Albion", 5:"Burnley", 6:"Chelsea", 7:"Crystal Palace", 8:"Everton", 9:"Leeds United", 10:"Leicester City", 11:"Liverpool", 12:"Manchester City", 13:"Manchester United", 14:"Newcastle United", 15:"Norwich City", 16:"Southampton", 17:"Tottenham Hotspur", 18:"Watford", 19:"West Ham United", 20:"Wolverhampton Wanderers"} + teams = {1:"Arsenal", 2:"Aston Villa", 3:"Bournemouth", 4:"Brentford", 5:"Brighton and Hove Albion", 6:"Burnley", 7:"Chelsea", 8:"Crystal Palace", 9:"Everton", 10:"Fulham", 11:"Liverpool", 12:"Luton Town", 13:"Manchester City", 14:"Manchester United", 15:"Newcastle United", 16:"Nottingham Forest", 17:"Sheffield United", 18:"Tottenham Hotspur", 19:"West Ham United", 20:"Wolverhampton Wanderers"} res = requests.get("https://fantasy.premierleague.com/api/fixtures/") result = res.json() @@ -46,4 +46,4 @@ def main(team): if __name__ == "__main__": team = "Arsenal" - main(team) \ No newline at end of file + main(team) diff --git a/utils/nextGame.py b/utils/nextGame.py index f41f5f1..99b5ad0 100644 --- a/utils/nextGame.py +++ b/utils/nextGame.py @@ -21,7 +21,7 @@ def nextMatch_calc(kickoff_time): return 1 def main(team): - teams = {1:"Arsenal", 2:"Aston Villa", 3:"Brentford", 4:"Brighton and Hove Albion", 5:"Burnley", 6:"Chelsea", 7:"Crystal Palace", 8:"Everton", 9:"Leeds United", 10:"Leicester City", 11:"Liverpool", 12:"Manchester City", 13:"Manchester United", 14:"Newcastle United", 15:"Norwich City", 16:"Southampton", 17:"Tottenham Hotspur", 18:"Watford", 19:"West Ham United", 20:"Wolverhampton Wanderers"} + teams = {1:"Arsenal", 2:"Aston Villa", 3:"Bournemouth", 4:"Brentford", 5:"Brighton and Hove Albion", 6:"Burnley", 7:"Chelsea", 8:"Crystal Palace", 9:"Everton", 10:"Fulham", 11:"Liverpool", 12:"Luton Town", 13:"Manchester City", 14:"Manchester United", 15:"Newcastle United", 16:"Nottingham Forest", 17:"Sheffield United", 18:"Tottenham Hotspur", 19:"West Ham United", 20:"Wolverhampton Wanderers"} res = requests.get("https://fantasy.premierleague.com/api/fixtures/") result = res.json() @@ -54,4 +54,4 @@ def main(team): if __name__ == "__main__": team = "Manchester United" - main(team) \ No newline at end of file + main(team) diff --git a/utils/output.txt b/utils/output.txt new file mode 100644 index 0000000..35941bf --- /dev/null +++ b/utils/output.txt @@ -0,0 +1,36 @@ +['Burnley', 'Manchester City'] +['Arsenal', 'Nottingham Forest'] +['Brentford', 'Tottenham Hotspur'] +['Manchester United', 'Wolverhampton Wanderers'] +['Nottingham Forest', 'Sheffield United'] +['Fulham', 'Brentford'] +['Aston Villa', 'Everton'] +['Crystal Palace', 'Arsenal'] +['Chelsea', 'Luton Town'] +['Bournemouth', 'Tottenham Hotspur'] +['Burnley', 'Aston Villa'] +['Luton Town', 'West Ham United'] +['Sheffield United', 'Everton'] +['Crystal Palace', 'Wolverhampton Wanderers'] +['Wolverhampton Wanderers', 'Liverpool'] +['Bournemouth', 'Chelsea'] +['Nottingham Forest', 'Burnley'] +['Chelsea', 'Aston Villa'] +['Arsenal', 'Tottenham Hotspur'] +['Aston Villa', 'Brighton and Hove Albion'] +['Nottingham Forest', 'Brentford'] +['Fulham', 'Chelsea'] +['Arsenal', 'Manchester City'] +['Aston Villa', 'West Ham United'] +['Arsenal', 'Sheffield United'] +['Brentford', 'West Ham United'] +['Arsenal', 'Burnley'] +['Brentford', 'Arsenal'] +['Arsenal', 'Wolverhampton Wanderers'] +['Aston Villa', 'Manchester City'] +['Manchester United', 'Chelsea'] +['Aston Villa', 'Arsenal'] +['Arsenal', 'Brighton and Hove Albion'] +['Aston Villa', 'Sheffield United'] +['Arsenal', 'West Ham United'] +['Aston Villa', 'Burnley'] \ No newline at end of file diff --git a/utils/siteScraper.py b/utils/siteScraper.py index 31f3dff..a6bb3fb 100644 --- a/utils/siteScraper.py +++ b/utils/siteScraper.py @@ -1,31 +1,62 @@ -import requests, datetime -def main(gameweek): - teams = {1:"Arsenal", 2:"Aston Villa", 3:"Brentford", 4:"Brighton and Hove Albion", 5:"Burnley", 6:"Chelsea", 7:"Crystal Palace", 8:"Everton", 9:"Leeds United", 10:"Leicester City", 11:"Liverpool", 12:"Manchester City", 13:"Manchester United", 14:"Newcastle United", 15:"Norwich City", 16:"Southampton", 17:"Tottenham Hotspur", 18:"Watford", 19:"West Ham United", 20:"Wolverhampton Wanderers"} - res = requests.get("https://fantasy.premierleague.com/api/fixtures/") - result = res.json() - i = 0 - j = 0 - while i < len(result): - if result[i]["event"] == gameweek: - team_a = result[i]["team_a"] - team_h = result[i]["team_h"] - kickoffDateTime = result[i]["kickoff_time"] - kickoffDateTime = datetime.datetime.fromisoformat(kickoffDateTime[:-1]) - kickoffDate = "{0}-{1}-{2}".format(kickoffDateTime.day, kickoffDateTime.month, kickoffDateTime.year) - kickoffTime2 = kickoffDateTime.time() - if team_a in teams.keys(): - away = team_a - else: - away = "Not in dict" - if team_h in teams.keys(): - home = team_h +import re,requests, datetime +from bs4 import BeautifulSoup +from urllib.request import urlopen +#from pymongo import MongoClient +#from pymongo.server_api import ServerApi + +def scrapeContent(): + url = "https://www.skysports.com/premier-league-fixtures" # use https://www.skysports.com/manchester-united-fixtures for just United games + page = urlopen(url) + html_bytes = page.read() + html = html_bytes.decode("utf-8") + soup = BeautifulSoup(html, "html.parser") + return soup + +def gameBreakdown(html): + # dict object : {"date": list of fixtures} + # progression to team based db + # mongouri = "mongodb+srv://plfixturesdb.l3h7281.mongodb.net/?authSource=%24external&authMechanism=MONGODB-X509&retryWrites=true&w=majority" + # client = MongoClient(mongouri, + # tls=True, + # tlsCertificateKeyFile='../X509-cert-5032269924842152828.pem', + # server_api=ServerApi('1')) + # fixturesdb = client["fixturesdb"] + # allfixtures = fixturesdb["allfixtures"] + # mongodict = {} + # {x: x**2 for x in (2, 4, 6)} <- example of dictionary comprehension3 + gameDates = html.find_all('h4', class_='fixres__header2') # Gets array of fixture dates from h4 tags + gamelistList = [] + for date in gameDates: + readableDate = date.get_text() + for detail in date.find_next_sibling("div", "fixres__item"): + # print(detail) + nonReadableDetails = detail.get_text(strip=True) + # print(nonReadableDetails) + readableDetails = re.split(r"(00|\d\d:\w\w)", nonReadableDetails, maxsplit=2) # This regex seems to work atm. Will see how it holds up + # print(readableDetails) + if len(readableDetails) < 4: + continue else: - home = "Not in dict" - game = "{0} vs {1} on {2} at {3}".format(teams[home], teams[away], kickoffDate, kickoffTime2) - print(game) - j+=1 - i+=1 + readableFixtures = gameListToString(readableDetails, readableDate) + print(readableFixtures) + # if len(readableDetails)>1: + # print(readableDate, readableFixtures) + # print({readableDate: detail.get_text(strip=True).split("00") for detail in date.find_next_siblings("div", "fixres__item")}) + # print(mongodict) + +def gameListToString(gameList, date): + # example list ['Burnley', '00', '', '20:00', 'Manchester City'] + # remove element 1 and 2 + gameList.remove('00') + gameList.remove('') + string1 = "{}\n{} v {} @ {}".format(date, gameList[0],gameList[2],gameList[1]) + return string1 + +def outputGameTeams(games_metadata): + print(games_metadata) + if __name__ == "__main__": - gameweek = 2 - main(gameweek) \ No newline at end of file + skySportsPageHTML=scrapeContent() + games_metadata=gameBreakdown(skySportsPageHTML) + # outputGameDetails(games_metadata)