-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdriver.py
More file actions
22 lines (19 loc) · 1.38 KB
/
driver.py
File metadata and controls
22 lines (19 loc) · 1.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# -*- coding: utf-8 -*-
"""
Created on Tue Feb 15 20:16:52 2022
File used to handle CSV sorting and streaming. Now only used to sort and merge CSVs.
Note: this is not run in the submission due to the inability to submit the entire NYC dataset.
@author: raska
"""
from lib import streamSimulator
dataLoc = 'sorted_data.csv'
headernames = ["medallion", "hack_license", "pickup_datetime", "dropoff_datetime", "trip_time_in_secs", "trip_distance", "pickup_longitude", "pickup_latitude", "dropoff_longitude", "dropoff_latitude", "payment_type", "fare_amount", "surcharge", "mta_tax", "tip_amount", "tolls_amount", "total_amount"]
dropped_cols =["medallion", "hack_license", "trip_time_in_secs", "trip_distance", "payment_type", "fare_amount", "surcharge", "mta_tax", "tip_amount", "tolls_amount", "total_amount"]
writeTo = 'StreamOut'
take=["dropoff_datetime", "pickup_longitude", "pickup_latitude", "dropoff_longitude", "dropoff_latitude"]
farePath='C:\\Users\\raska\\Cranfield data\\cloud computing\\trip_fare'
tripPath = 'C:\\Users\\raska\\Cranfield data\\cloud computing\\trip_data'
# streamSimulator.splitStreamCSV(data, headernames, takeHeaders=take, batchSize = 3, timeInterval=3, fileWindow=100)
testDat = streamSimulator.sortCSV(farePath, tripPath, readRows=10000)
testDat.to_csv(dataLoc, index=False, header=False)
# streamSimulator.streamCSV(dataLoc, writeTo, batchSize=100, timeInterval=2, fileWindow=None)