forked from udacity/pdsnd_github
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbikeshare.py
More file actions
456 lines (341 loc) · 13.8 KB
/
bikeshare.py
File metadata and controls
456 lines (341 loc) · 13.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
import time
import pandas as pd
import numpy as np
import csv
CITY_DATA = {'chicago': 'chicago.csv',
'new york city': 'new_york_city.csv',
'washington': 'washington.csv'}
DAYS_IN_VALS = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
MONTHS_IN_VALS = ['january', 'february', 'march', 'april', 'may', 'june', 'july', 'august',
'september', 'october', 'november', 'december']
def get_filters():
"""
Asks user to specify a city, month, and day to analyze.
Returns:
(str) city - name of the city to analyze
(str) month - name of the month to filter by, or "all" to apply no month filter
(str) day - name of the day of week to filter by, or "all" to apply no day filter
"""
print('Hello! Let\'s explore some US bikeshare data!')
# get user input for city (chicago, new york city, washington).
city = input('Would you like to see data for Chicago, New York City, or Washington?\n').strip().lower()
while city not in CITY_DATA.keys():
city = input('Wrong input, please enter the right city \
(Chicago, New York City, or Washington): \n').strip().lower()
filtering_key = input('Would you like to filter the data by month, day, both, or not at all? \
Type (none) for no time filter.\n').strip().lower()
while filtering_key not in {'month', 'day', 'none', 'both'}:
filtering_key = input('wrong input! Would you like to filter the data by month, day, both, or not at all? \
Type (none) for no time filter.\n').strip().lower()
if filtering_key == 'month':
month = get_month()
day = 'all'
elif filtering_key == 'day':
month = 'all'
day = get_day()
elif filtering_key == 'both':
month = get_month()
day = get_day()
elif filtering_key == 'none':
month = 'all'
day = 'all'
print('-' * 40)
return city, month, day
def get_month():
"""
Get user input for the month and validate it.
Args:
None
Returns:
str: The user-inputted and validated month.
Example:
month = get_month()
"""
month = input('(If they chose month) Which month \
- January, February, March, April, May, or June?\n').strip().lower()
months = set(MONTHS_IN_VALS)
while month not in months:
month = input('Wrong input, \
please try to enter the month name again - January, \
February, March, April, May, or June?\n').strip().lower()
return month
def get_day():
"""
Get user input for the day of the week and validate it.
Args:
None
Returns:
str: The user-inputted and validated day of the week.
Example:
day = get_day()
"""
day = input('(If they chose day) Which day - \
Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, or Sunday?\n').strip().lower()
days = set(DAYS_IN_VALS)
while day not in days:
day = input('Wrong input, please try to enter the day name again - \
Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, or Sunday?\n').strip().lower()
return day
def load_data(city, month, day):
"""
Loads data for the specified city and filters by month and day if applicable.
Args:
(str) city - name of the city to analyze
(str) month - name of the month to filter by, or "all" to apply no month filter
(str) day - name of the day of week to filter by, or "all" to apply no day filter
Returns:
df - Pandas DataFrame containing city data filtered by month and day
"""
# load data file into a dataframe
df = pd.read_csv(CITY_DATA[city])
# convert the Start Time column to datetime
df['Start Time'] = pd.to_datetime(df['Start Time'])
# extract month and day of week from Start Time to create new columns
df['month'] = df['Start Time'].dt.month
df['day_of_week'] = df['Start Time'].dt.day_name()
# filter by month if applicable
if month != 'all':
# use the index of the months list to get the corresponding int
months = ['january', 'february', 'march', 'april', 'may', 'june']
month = months.index(month) + 1
# filter by month to create the new dataframe
df = df[df['month'] == month]
# filter by day of week if applicable
if day != 'all':
# filter by day of week to create the new dataframe
df = df[df['day_of_week'] == day.title()]
return df
def time_stats(df):
"""
Displays statistics on the most frequent times of travel.
Args:
df (pandas.DataFrame): The DataFrame containing time-related data.
Returns:
None
Prints:
- Most popular month for traveling
- Most popular day of the week for traveling
- Most popular hour of the day to start traveling
- Time taken to calculate the statistics
Example:
time_stats(time_data)
"""
print('\nCalculating The Most Frequent Times of Travel...\n')
start_time = time.time()
# display the most common month
print('What is the most popular month for travelling?\n', MONTHS_IN_VALS[df['month'].mode()[0] - 1].title())
# display the most common day of week
print('what is the most popular day of week for travelling?\n', df['day_of_week'].mode()[0].title())
# display the most common start hour
df['hour'] = df['Start Time'].dt.hour
print('what is the most popular hour of the day to start travelling?\n', df['hour'].mode()[0])
execution_time =(time.time() - start_time)
print(f"\nThis took {execution_time} seconds.")
print('-' * 40)
def station_stats(df):
"""
Displays statistics on the most popular stations and trips.
Args:
df (pandas.DataFrame): The DataFrame containing station data.
Returns:
None
Prints:
- Most commonly used start station
- Most commonly used end station
- Most frequent combination of start station and end station trip
- Time taken to calculate the statistics
Example:
station_stats(station_data)
"""
print('\nCalculating The Most Popular Stations and Trip...\n')
start_time = time.time()
# display most commonly used start station
m_start_station = df['Start Station'].mode()[0]
print('The most commonly used start station: ', m_start_station)
# display most commonly used end station
m_end_station = df['End Station'].mode()[0]
print('The most commonly used end station: ', m_end_station)
# display most frequent combination of start station and end station trip
df['start_to_end_station'] = df['Start Station'] + " To " + df['End Station']
m_start_to_end_station = df['start_to_end_station'].mode()[0]
print('The most frequent combination of start station and end station trip:\nFrom ', m_start_to_end_station)
execution_time =(time.time() - start_time)
print(f"\nThis took {execution_time} seconds.")
print('-' * 40)
def trip_duration_stats(df):
"""
Displays statistics on the total and average trip duration.
Args:
df (pandas.DataFrame): The DataFrame containing trip duration data.
Returns:
None
Prints:
- Total travel time
- Mean travel time
- Time taken to calculate the statistics
Example:
trip_duration_stats(trip_data)
"""
print('\nCalculating Trip Duration...\n')
start_time = time.time()
# display total travel time
total_travel_time = np.sum(df['Trip Duration'])
print('The total travel time: ', convert_seconds(int(total_travel_time)))
# display mean travel time
mean_travel_time = np.mean(df['Trip Duration'])
print('The mean travel time: ', convert_seconds(int(mean_travel_time)))
execution_time =(time.time() - start_time)
print(f"\nThis took {execution_time} seconds.")
print('-' * 40)
def user_stats(df):
"""
Displays statistics on bikeshare users.
Args:
df (pandas.DataFrame): The DataFrame containing bikeshare data.
Returns:
None
Prints:
- Counts of user types
- Counts of gender (if available)
- Earliest, most recent, and most common year of birth (if available)
- Time taken to calculate the statistics
Example:
user_stats(bikeshare_data)
"""
print('\nCalculating User Stats...\n')
start_time = time.time()
# Display counts of user types
user_types = df['User Type'].value_counts()
print("What is the breakdown of users?\n", user_types)
print()
# Display counts of gender
if 'Gender' in df.columns:
gender_count = df['Gender'].value_counts()
print('What is the breakdown of gender?\n', gender_count)
print()
else:
print('No gender data to show!')
# Display earliest, most recent, and most common year of birth
if 'Birth Year' in df.columns:
earliest_y_o_b = int(df['Birth Year'].min())
most_recent_y_o_b = int(df['Birth Year'].max())
most_common_y_o_b = int(df['Birth Year'].mode()[0])
print('Year of birth breakdown:\nearliest: {}\n most recent: \
{}\nmost common: {}'.format(earliest_y_o_b, most_recent_y_o_b, most_common_y_o_b))
else:
print('No Birth Year data to show!')
execution_time =(time.time() - start_time)
print(f"\nThis took {execution_time} seconds.")
print('-' * 40)
def read_csv_in_rows(filename):
"""
Reads a CSV file and processes it row by row.
Args:
filename (str): The name of the CSV file to be read.
Returns:
None
Description:
This function reads a CSV file and processes it row by row using the csv.DictReader.
It accumulates the rows in a list of 5 and calls the 'print_rows' function to handle the list.
After processing each batch, it prompts the user to decide whether to view new individual trip data or not.
If the user chooses 'no', the function breaks out of the loop and stops processing the file.
Example:
read_csv_in_rows('data.csv')
"""
with open(filename, 'r') as file:
reader = csv.DictReader(file)
rows = []
for row in reader:
rows.append(row)
if len(rows) == 5:
print_rows(rows)
rows = []
continue_printing = input('\nWould you like to view individual trip data?\
Type \'yes\' or \'no\'.\n').strip().lower()
while continue_printing not in ['yes', 'no']:
continue_printing = input('\nwrong input!!, Would you like to view individual\
trip data? Type \'yes\' or \'no\'.\n').strip().lower()
if continue_printing == 'no':
break
if rows:
print_rows(rows)
print('\nThe end of row data!')
def print_rows(rows):
"""
Prints the rows of data.
Args:
rows (list): A list of rows(dictionary rows) to be printed.
Returns:
None
Description:
This function takes a list of rows as input and prints each row.
It iterates over the provided rows and prints each row using the 'print' function.
"""
for row in rows:
print(row)
def get_most_common_season(city):
"""
Determines the most popular season for traveling in a specified city.
Args:
city (str): The name of the city for which to determine the most popular season.
Returns:
str: The most common season for traveling in the specified city.
"""
start_time = time.time()
df = load_data(city, 'all', 'all')
most_common_month = df['month'].mode()[0]
if most_common_month in [12, 1, 2]:
most_common_season = 'Winter'
elif most_common_month in [3, 4, 5]:
most_common_season = 'Spring'
elif most_common_month in [6, 7, 8]:
most_common_season = 'Summer'
else:
most_common_season = 'Fall'
# Displaying the most common season
print('What is the most popular season for travelling in {}?\nThe most common season is {}.'
.format(city, most_common_season))
print("\nThis took %s seconds." % (time.time() - start_time))
print('-' * 40)
def convert_seconds(seconds):
"""
Converts a number of seconds into the format of (n days, s hours, x minutes, y seconds).
Args:
seconds (int): The total number of seconds to be converted.
Returns:
str: The formatted string representing the converted time.
"""
days = seconds // (24 * 60 * 60)
remaining_seconds = seconds % (24 * 60 * 60)
hours = remaining_seconds // (60 * 60)
remaining_seconds = remaining_seconds % (60 * 60)
minutes = remaining_seconds // 60
seconds = remaining_seconds % 60
result = "{} days, {} hours, {} minutes, {} seconds.".format(days, hours, minutes, seconds)
return result
def main():
while True:
city, month, day = get_filters()
df = load_data(city, month, day)
# Some information from the data
time_stats(df)
station_stats(df)
trip_duration_stats(df)
user_stats(df)
get_most_common_season(city)
# Ask the user if he wants to see the 5 lines row data or not.
show_row_data = input('Do you want to see 5 lines of raw data? Enter "yes" or "no".\n').strip().lower()
while show_row_data not in ['yes', 'no']:
show_row_data = input('\nWrong input!!, Do you want to see 5 lines of raw data?\
Enter "yes" or "no".\n').strip().lower()
if show_row_data == 'yes':
print('Printing 5 lines of row data:\n')
read_csv_in_rows(CITY_DATA[city])
# Ask the user if he wants to restart the program again or not.
restart = input('\nWould you like to restart? Enter yes or no.\n').strip().lower()
while restart not in ['yes', 'no']:
restart = input('\nWrong input!!, Would you like to restart? Enter yes or no.\n').strip().lower()
if restart.lower() != 'yes':
break
if __name__ == "__main__":
main()