Skip to content

Commit f5a032d

Browse files
committed
bike analysis scripts
1 parent 3eb5565 commit f5a032d

8 files changed

Lines changed: 1098 additions & 0 deletions

File tree

misc/bike_analysis/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
src/__pycache__/
Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,241 @@
1+
import pandas as pd
2+
import numpy as np
3+
import os
4+
pd.set_option('display.max_columns', None)
5+
os.chdir(os.path.dirname(os.path.dirname(__file__)))
6+
7+
# %%
8+
# Directories
9+
data_dir = 'data'
10+
raw_dir = f'{data_dir}/raw'
11+
interim_dir = f'{data_dir}/interim'
12+
processed_dir = f'{data_dir}/processed'
13+
14+
# %%
15+
# Load data
16+
hh = pd.read_csv(f'{raw_dir}/ex_hh.csv')
17+
# day = pd.read_csv(f'{raw_dir}/ex_day.csv')
18+
person = pd.read_csv(f'{raw_dir}/ex_person.csv')
19+
trips = pd.read_csv(f'{raw_dir}/ex_trip_unlinked.csv')
20+
ltrips = pd.read_csv(f'{raw_dir}/ex_trip_linked.csv')
21+
22+
# %% Rename variables
23+
ltrips.rename(
24+
columns=
25+
{'linked_trip_weight': 'ltrip_weight'},
26+
inplace=True
27+
)
28+
29+
# Re-map values to labels
30+
person_mapping = {
31+
'gender': {1: 'Female', 2: 'Male', 3:'Non-binary', 995:'Missing', 997:'Other', 999:'PNTA'},
32+
'age': {1:'Under5', 2:'5-10', 3:'11-15', 4:'16-17', 5:'18-24', 6:'25-34', 7:'35-44', 8:'45-54', 9:'55-64', 10:'65-74', 11:'75-84', 12:'85plus'},
33+
'bike_freq': {1: '6-7days', 2: '5days', 3: '4days', 4: '3days', 5: '2days', 6: '1day', 7: '1-3month', 8: '<1month', 996: 'Never', 995: 'Missing'},
34+
'bike_attitude': {1:'PhysicallyUnable', 2:'DoesNotKnowHow', 3:'NotInterested', 4:'WantBikeLess', 5:'HappyWithCurrent', 6:'WantBikeMore', 995:'Missing'}
35+
}
36+
37+
# Re-map to broader categories
38+
bike_freq_map = {
39+
'6-7days': 'VeryFrequent',
40+
'5days': 'VeryFrequent',
41+
'4days': 'VeryFrequent',
42+
'3days': 'Occasional',
43+
'2days': 'Occasional',
44+
'1day': 'Occasional',
45+
'1-3month': 'Infrequent',
46+
'<1month': 'Infrequent',
47+
'Never': 'Never',
48+
'Missing': 'Missing'
49+
}
50+
51+
bike_att_bin = {
52+
'PhysicallyUnable': 'NotInterestedCant',
53+
'DoesNotKnowHow': 'NotInterestedCant',
54+
'NotInterested': 'NotInterestedCant',
55+
'WantBikeLess': 'WantBikeLess',
56+
'HappyWithCurrent': 'HappyWithCurrent',
57+
'WantBikeMore': 'WantBikeMore',
58+
'Missing': 'Missing'
59+
}
60+
61+
income_bin = {
62+
1: 'Low',
63+
2: 'Low',
64+
3: 'Mid',
65+
4: 'Mid',
66+
5: 'High',
67+
6: 'High',
68+
995: 'Missing',
69+
999: 'Missing'
70+
}
71+
72+
income_num = {
73+
1: 25000,
74+
2: 37500,
75+
3: 62500,
76+
4: 87500,
77+
5: 150000,
78+
6: 200000,
79+
995: np.nan,
80+
999: np.nan
81+
}
82+
83+
age_bin = {
84+
'Under5': 'Under10',
85+
'5-10': 'Under10',
86+
'11-15': '10-17',
87+
'16-17': '10-17',
88+
'18-24': '18-34',
89+
'25-34': '18-34',
90+
'35-44': '35-64',
91+
'45-54': '35-64',
92+
'55-64': '35-64',
93+
'65-74': '65+',
94+
'75-84': '65+',
95+
'85plus': '65+'
96+
}
97+
98+
age_num = {
99+
'Under5': 2.5,
100+
'5_10': 8,
101+
'11-15': 13,
102+
'16-17': 16,
103+
'18-24': 21,
104+
'25-34': 30,
105+
'35-44': 40,
106+
'45-54': 50,
107+
'55-64': 60,
108+
'65-74': 70,
109+
'75-84': 80,
110+
'85plus': 85
111+
}
112+
113+
person['gender_num'] = np.where(
114+
person['gender'].isin([1,3,997]), 1, # Female & Others
115+
np.where(person['gender'] == 2, 2, 3) # Male and Missing
116+
)
117+
person['gender_lab'] = person['gender'].map(person_mapping['gender'])
118+
person['gender_bin'] = person['gender_num'].map({1:'Female & Others', 2: 'Male', 3: 'Missing'})
119+
120+
person['age_lab'] = person['age'].map(person_mapping['age'])
121+
person['age_bin'] = person['age_lab'].map(age_bin)
122+
person['age_num'] = person['age_lab'].map(age_num)
123+
124+
person['bike_freq_lab'] = person['bike_freq'].map(person_mapping['bike_freq'])
125+
person['bike_freq_bin'] = person['bike_freq_lab'].map(bike_freq_map)
126+
127+
person['bike_att'] = person['bike_attitude'].map(person_mapping['bike_attitude'])
128+
person['bike_att_bin'] = person['bike_att'].map(bike_att_bin)
129+
130+
hh['income_bin'] = hh['income_broad'].map(income_bin)
131+
hh['income_num'] = hh['income_broad'].map(income_num)
132+
133+
# %%
134+
# Define comfort variable
135+
comfort_cols = [col for col in person.columns if 'bike_comfort' in col]
136+
likert_scale = {1:4, 2:3, 3:2, 4:1} # Reverse scoring for comfort
137+
person[comfort_cols] = person[comfort_cols].replace(likert_scale)
138+
person['avg_comfort'] = person[comfort_cols].replace(995, pd.NA).mean(axis=1, skipna=True).fillna(0)
139+
comfort_labels = ['Uncomfortable', 'Comfortable']
140+
person['comfort_bin'] = np.where(
141+
person['avg_comfort'] >= 3, 'Comfortable',
142+
np.where(person['avg_comfort'] != 0 , 'Uncomfortable', pd.NA)
143+
)
144+
145+
# %%
146+
# Identify complete rmove households (for comfort classification - 04_classify.py)
147+
print(f"Total households: {hh['hh_weight'].sum():,.2f}")
148+
149+
hh_rmove = hh[hh['diary_platform'] == 'rmove']
150+
print(f"Total households with rmove: {hh_rmove['hh_weight'].sum():,.2f}")
151+
152+
hh_rmove_complete = hh_rmove[hh_rmove['num_days_complete'] == 7]
153+
print(f"Total number of households with 7 complete days: {hh_rmove_complete['hh_weight'].sum():,.2f}")
154+
print(f"Total sample size of households with 7 complete days: {hh_rmove_complete.shape[0]:,}")
155+
156+
bike_complete_hh_ids = hh_rmove_complete['hh_id'].unique().tolist()
157+
158+
# Add flag to identify complete records
159+
person['bike_complete_flag'] = 0
160+
person.loc[person['hh_id'].isin(bike_complete_hh_ids), 'bike_complete_flag'] = 1
161+
162+
# %% Merge bike trips with person table
163+
# Count bike trips
164+
bike_trip_mask = (ltrips['linked_trip_mode'] == 11)
165+
bike_trips= ltrips[bike_trip_mask].copy()
166+
bike_counts = bike_trips.groupby('person_id').size().reset_index(name='bike_trips')
167+
168+
# Keep relevant cols
169+
per_cols = ['hh_id', 'person_id', 'age_lab', 'age_bin', 'age_num',
170+
'gender_bin', 'num_days_complete', 'bike_complete_flag',
171+
'avg_comfort', 'comfort_bin',
172+
'student', 'person_weight']
173+
bike_cols_per = [col for col in person.columns if 'bike' in col]
174+
175+
# Merge bike trip info with person data
176+
person_btrips = pd.merge(
177+
person[per_cols + bike_cols_per],
178+
bike_counts,
179+
on='person_id',
180+
how='left',
181+
validate='1:1'
182+
).fillna({'bike_trips': 0})
183+
184+
# Merge with hh data
185+
hh_cols = ['hh_id', 'num_bicycle_adult', 'num_bicycle_child', 'income_broad', 'income_bin', 'income_num']
186+
person_btrips = pd.merge(
187+
person_btrips,
188+
hh[hh_cols],
189+
on = 'hh_id',
190+
how = 'left'
191+
)
192+
193+
# Add dummy if person reported bike trip
194+
person_btrips['recorded_btrip'] = np.where(person_btrips['bike_trips'] > 0, 1, 0)
195+
196+
# %% Merge bike trips with person info
197+
btrips_person = pd.merge(
198+
bike_trips.drop(columns = ['hh_id']),
199+
person[per_cols + bike_cols_per],
200+
how = 'left',
201+
on = 'person_id'
202+
)
203+
204+
# Add hh data
205+
btrips_person = pd.merge(
206+
btrips_person,
207+
hh[hh_cols],
208+
how = 'left',
209+
on = 'hh_id'
210+
)
211+
212+
# Add bike type to bike trip info
213+
ltrip_bike_ids = bike_trips['linked_trip_id'].to_list()
214+
subset = trips[trips['linked_trip_id'].isin(ltrip_bike_ids)] # bike type is unlinked trip table
215+
216+
bike_modes = [2, 3, 4, 5, 69, 70, 82, 103, 300] # bike modes and hierarchy
217+
218+
def get_highest_bike_mode(modes):
219+
found_bikes = [m for m in modes if m in bike_modes]
220+
if found_bikes:
221+
return max(found_bikes)
222+
else:
223+
return None
224+
225+
modes = subset.groupby('linked_trip_id')['mode_1'].agg(list).reset_index().rename(columns = {'mode_1':'mode_list'})
226+
modes['bike_mode'] = modes['mode_list'].apply(get_highest_bike_mode)
227+
228+
# Add bike type
229+
btrips_person = pd.merge(
230+
btrips_person,
231+
modes[['linked_trip_id', 'bike_mode']],
232+
how = 'left',
233+
on = 'linked_trip_id',
234+
)
235+
236+
ebikes = [70, 82] # ebike from bikeshare, ebike in household
237+
btrips_person['bike_type'] = np.where(btrips_person['bike_mode'].isin(ebikes), 'ebike', 'standard')
238+
239+
# %% Export
240+
person_btrips.to_csv(f'{interim_dir}/person_btrips.csv', index=False)
241+
btrips_person.to_csv(f'{interim_dir}/btrips_person.csv', index=False)

0 commit comments

Comments
 (0)