-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbattingScrape.py
More file actions
47 lines (40 loc) · 1.44 KB
/
battingScrape.py
File metadata and controls
47 lines (40 loc) · 1.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from bs4 import BeautifulSoup
import csv
import requests
def scrape_batting(day):
response = requests.get('https://www.baseball-reference.com/leagues/MLB/2019-standard-batting.shtml')
data = str(response.content)
#data = data.replace("</div>\n-->\n","</div>\n\n")
data = data.replace("<!--\\n","\\n")
soup = BeautifulSoup(data,features="html.parser")
table = soup.find("table",attrs={"id" : "players_standard_batting"})
print("scraped batting!")
output_rows = []
printedTop = False
for table_row in table.findAll('tr'):
columns = table_row.findAll('td')
things = table_row.findAll('th')
if(len(things) > 0 and things[0].text != "Rk"):
output_row = []
for thing in things:
got = thing.find('a')
if(got != None):
output_row.append(got.text)
else:
output_row.append(thing.text)
for column in columns:
got = thing.find('a')
if(got != None):
output_row.append(got.text)
else:
output_row.append(column.text)
output_rows.append(output_row)
header_row = ['Rk','Name','Age','Tm','Lg','G','PA','AB','R','H','2B','3B','HR','RBI','SB','CS','BB','SO','BA','OBP','SLG','OPS','OPS+','TB','GDP','HBP','SH','SF','IBB','Pos Summary']
with open('mlbBattingData\\'+day+'.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(header_row)
for row in output_rows:
if(row[1] == "LgAvg per 600 PA"):
break
writer.writerow(row)
print("batting data written to file")