-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathStatcastPull.py
More file actions
86 lines (53 loc) · 2.82 KB
/
StatcastPull.py
File metadata and controls
86 lines (53 loc) · 2.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# Pulling Data using pybaseball Python Package
# Transforming Data into individual csv files by
# year and type of statistical Category
# THen combining Data by data Year and by Statistical Category
# Creating a combined Yearly Stat CSV FIle
# And creating a Start to End category list (ie. Batting Stats from 2018-2020)
# was able to get through WSH of 2013 before file got to big to process statcast.
# need to seperate out statcast and do each year first before combining
from pybaseball import statcast
import pandas as pd
import os
def statcast_Data(currentSeason):
# StatCast Data Started in 2008
# Some Features were not added until 2015
if currentSeason > 2007:
# Select Start and End Date to Pull data
Start_Day = "%s-04-01" % currentSeason
End_Day = "%s-10-03" % currentSeason
# Create Folder and File Path
foldername = "data/YearlyData/%s/StatCast/" % currentSeason
filename_part = "%s_Statcast_" % currentSeason
print(foldername)
# Check/Build Directory
if not os.path.exists(foldername): #adds directory is not already created
os.mkdir(foldername)
# Create Empty StatCast Master File for All teams for each year
df = pd.DataFrame(list())
All_Stat_Foldername = 'data/YearlyData/%s/' % currentSeason
All_Stat_Filename = '%s_All_StatCast.csv' % currentSeason
All_Stat_Path = (All_Stat_Foldername + All_Stat_Filename)
df.to_csv(All_Stat_Path)
#Create Team List to cycle Through
Team_List = ['ARI', 'ATL', 'BAL', 'BOS', 'CHC', 'CWS', 'CIN', 'CLE', 'COL', 'DET', 'HOU', 'KC', 'LAA', 'LAD', 'MIA', 'MIL', 'MIN', 'NYM', 'NYY', 'OAK', 'PHI', 'PIT', 'SD', 'SEA', 'SF', 'STL', 'TB', 'TEX', 'TOR', 'WSH']
# Loop Through all Teams pulling data from selected dates
# Saved as individual Team Files
# Also Saved as 1 master file
for i in Team_List:
# Get all data for each team in the current season
statcast_data = statcast(Start_Day, End_Day, team=i)
# Build File Name for Statcast Data for individual Team
teamname = "%s.csv" % i
filename = (filename_part + teamname)
statcast_data.to_csv(foldername + filename)
# Read Master Statcast File for Year
statcast_data_file_all = pd.read_csv(All_Stat_Path)
# Add New Team Data to Statcast Year Data
All_StatCast_File = statcast_data.append(statcast_data_file_all, ignore_index=True)
# Save new Data to File
All_StatCast_File.to_csv(All_Stat_Path)
print(str(currentSeason) + " Statcast Data for : " + str(i) + " : Successful")
print(str(currentSeason) + " All Statcast Data for : Successful")
statcast_Data(2020)
print("Successful")