USAPL/Competition.py at master · YuliaZamriy/USAPL · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 12 15:59:45 2017

@author: yzamriy

Goal:           Create Competition object
                Containing all details about the competition

Input:          Competition url reference, string

Main methods:

    1. return_dict: returns the competition dictionary
    2. get_lifter_ref: returns lifter url reference based on specified name
    3. get_comp_name: returns competition name
    4. build_filename: returns filename for the csv file containing the dictionary
    5. get_col_names: returns column names corresponding to keys in the dictionary
    6. get_comp_lifter_history: get full history of competitions for each lifter
    7. prior_history: remove records from the history dictionary that happened
                    after the competition
    8. build_hist_filename: returns filename for the csv file containing history dictionary
    9. return_hist_dict: returns the competition history dictionary
    10. get_hist_col_names: returns column names corresponding to keys in history dictionary

Requirements:   CompetitionList.py to get Beautiful Soup objects
                Lifter.py to get lifter history
"""

import random
import datetime
from CompetitionList import CompetitionList
from Lifter import Lifter
from Soup import *

class Competition(object):
    def __init__(self, reference):
        '''
        Goal:    Initializes Competition object
        Details: Competition object has following attributes:
                        self.reference: url reference for the competition
                        self.comp_dict: nested dictionary with data scraped from
                                        the competition specific page:
                                            Level 1 keys: text within 'a' tag
                                            Level 2 keys -> text within 'th' tags
                                            Values -> text within 'td' tags
        '''
        self.reference = reference
        self.comp_dict = self.build_comp_dict()
        self.history_dict = self.prior_history()

    def build_col_names(self):
        '''
        Goal: Get names for the columns of competition results table
              from Soup object
        Returns: Competition table column names, list
        '''
        soup = getSoup(self.reference)
        soup_colnames = soup.find("table", id="competition_view_results").find('thead').find_all('th')

        # First column in the scraped table doesn't have a name, but it
        # contains lister weight class data
        col_names = ['Weightclass']
        for cn in soup_colnames:
            # Each lift covers 3 columns (one for each attmept)
            # Raw scraped table has three columns but only one name
            # We create column names for each attempt column
            # Squat1, Squat2, Squat3, Bench Press1, etc.
            if cn.get_text() in ['Squat','Bench press','Deadlift']:
                for i in '123':
                    col_names.append(cn.get_text()+i)
            else:
               col_names.append(cn.get_text())
        return col_names

    def build_comp_dict(self):
        '''
        Goal:       Builds nested dictionary with data scraped from the target
                    competition page
                    First, the data from the target page is processed with
                    Beautiful Soup and put into a soup objest
                    Then we extract the data within "competition_view_results" tag
                    The data itself is contained within 'tr' tags
                    Table headers between 'th' tags will be the keys
        Returns: Competition results, nested dictionary
        '''
        comp_dict = {}

        soup = getSoup(self.reference)
        soup_table = soup.find("table", id="competition_view_results").find('tbody').find_all('tr')
        col_names = self.build_col_names()

        for comp in soup_table:
            if comp.find('a'):
                lifter_name = comp.find('a').get_text()
                comp_dict[lifter_name] = {}
                for cl, cv in zip(col_names, comp.find_all('td')):
                    col_value = cv.get_text().strip()
                    comp_dict[lifter_name][cl] = col_value
                    comp_dict[lifter_name]['Link'] = comp.find('a')["href"]

        return comp_dict

    def return_dict(self):
        return self.comp_dict

    def get_lifter_ref(self, lifter_name):
        '''
        Returns: lifter url reference based on specified name
        '''
        return self.comp_dict[lifter_name]['Link']

    def get_comp_name(self):
        '''
        Returns: competition name
        '''
        return CompetitionList().get_comp_name(self.reference)

    def build_filename(self):
        '''
        Returns: string, filename for the csv file containing the dictionary
        '''
        return self.get_comp_name()+".csv"

    def get_col_names(self):
        """
        Goal:       Return column names corresponding to keys in the dictionary
        Returns:    list of key names
        """
        lifter = random.choice(list(self.comp_dict))
        col_names = list(self.comp_dict[lifter].keys())
        # Create a name for the top level keys in the dictionary
        # that contain lifter's name
        col_names.insert(0, 'Name')
        return col_names

    def get_comp_lifter_history(self):
        """
        Goal:       get full history of competitions for each lifter
        Returns:    nested dictionary
        """
        lifters_history = {}
        for lifter in self.comp_dict:
            lifter_ref = self.get_lifter_ref(lifter)
            lifters_history[lifter] = Lifter(lifter_ref).return_dict()
        return lifters_history

    def prior_history(self):
        """
        Goal:       remove records from the lifter history dictionary
                    that happened after the competition
        Returns:    nested dictionary
        """
        comp_date = CompetitionList().get_comp_date(self.reference)
        comp_date = datetime.datetime.strptime(comp_date, '%m/%d/%Y') + datetime.timedelta(days=7)
        lifters_history = self.get_comp_lifter_history()
        lifters_history_prior = {}
        for lifter in lifters_history:
            lifters_history_prior[lifter] = {}
            for comp in lifters_history[lifter]:
                comp_date_hist = datetime.datetime.strptime(lifters_history[lifter][comp]['Date'], '%m/%d/%Y')
                if comp_date_hist < comp_date:
                    lifters_history_prior[lifter][comp] = lifters_history[lifter][comp]
        return lifters_history_prior

    def build_hist_filename(self):
        '''
        Returns: string, filename for the csv file containing history dictionary
        '''
        return self.get_comp_name()+"_lifter_history.csv"

    def return_hist_dict(self):
        return self.history_dict

    def get_hist_col_names(self):
        """
        Goal:       Return column names corresponding to keys in history dictionary
        Returns:    list of key names
        """
        # Top level in the dictionary is lifter's name
        # Next level is competition reference
        col_names = ['Name','Link']
        hist_dict = self.prior_history()
        # To extract column names we need a non-empty dictionary
        # A lot of lifters compete for the first time, hence,
        # their history is empty
        # we'll keep looking for a lifter with history with a while loop
        while True:
            lifter = random.choice(list(hist_dict))
            if len(hist_dict[lifter]) > 0:
                comp = random.choice(list(hist_dict[lifter]))
                col_names.extend(list(hist_dict[lifter][comp].keys()))
                return col_names
            print("first comp for", lifter)
        return col_names

#test = Competition('competitions-view?id=1622')