-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathutilities.py
More file actions
57 lines (41 loc) · 2.49 KB
/
utilities.py
File metadata and controls
57 lines (41 loc) · 2.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import pandas as pd
def create_csv(routeplans, stoplocations, driverpositions, estimated_durations):
df = routeplans.merge(stoplocations[['stoplocationid', 'deliverystatus']], on='stoplocationid', how='left')
df = df.merge(estimated_durations[['routeplanid', 'estimated_duration']], on='routeplanid', how='left')
print(df)
df.to_csv('estimated_delivery_times.csv', index = False)
def remove_tzinfo(date_string):
return date_string.split("+")[0]
def remove_microseconds(date_string):
return date_string.split(".")[0]
def routeplans():
csv = "routeplans_test.csv"
routeplans = pd.read_csv(csv, sep='\t', header=0, na_values=["(null)"])
print('%s read shape: %s' % (csv, routeplans.shape))
routeplans.dropna(subset=["driverid"], inplace=True) # ignoring routes without driver
print(routeplans.ftypes)
return routeplans
def get_routeids(stoplocations):
return stoplocations.groupby(['routeid', 'stoplocationid'])
def stoplocations():
csv = "stoplocations_test.csv"
stoplocations = pd.read_csv(csv, sep='\t', header=0, na_values=["(null)"])
print('%s read shape: %s' % (csv, stoplocations.shape))
stoplocations.dropna(subset=["latitude", "longitude"], inplace=True) # ignoring locations without latitude or longitude
stoplocations['position'] = stoplocations.apply(lambda x: (x['latitude'], x['longitude']), axis=1)
stoplocations['deliverystatustimestamp'] = pd.to_datetime(stoplocations['deliverystatustimestamp'].apply(lambda x: remove_microseconds(remove_tzinfo(x))))
print(stoplocations.ftypes)
return stoplocations
def get_drivers(driverpositions):
return driverpositions.groupby(['routeid', 'driverid'])
def driverpositions():
csv = "driverpositions_test.csv"
driverpositions = pd.read_csv(csv, sep='\t', header=0, na_values=["(null)"]) # use "," when using Terje's file
print('%s read shape: %s' % (csv, driverpositions.shape))
driverpositions.dropna(subset=["latitude", "longitude"], inplace=True) # ignoring positions without latitude or longitude
driverpositions['position'] = driverpositions.apply(lambda x: (x['latitude'], x['longitude']), axis=1)
driverpositions['logtime'] = pd.to_datetime(driverpositions['logtime'].apply(lambda x: remove_microseconds(remove_tzinfo(x))))
driverpositions['speed'] = driverpositions['speed'].fillna(value=0)
driverpositions = driverpositions.sort_values(by=['routeid', 'driverid', 'logtime'])
print(driverpositions.ftypes)
return driverpositions