From e1a2c8eb53049799a241fb8de5ca515f41841c54 Mon Sep 17 00:00:00 2001 From: smikawa-ucsc Date: Mon, 5 Jun 2023 22:04:23 +0000 Subject: [PATCH 1/9] First working draft --- w1/data_processor.py | 23 ++++++++++++++++++++--- w1/main.py | 23 ++++++++++++++++++++--- w1/utils.py | 15 ++++++++++++--- 3 files changed, 52 insertions(+), 9 deletions(-) diff --git a/w1/data_processor.py b/w1/data_processor.py index b6d0bfd..a25a229 100644 --- a/w1/data_processor.py +++ b/w1/data_processor.py @@ -66,7 +66,8 @@ def aggregate(self, column_name: str) -> float: Input : List[str] Output : Dict - This method should use the generator method assigned to seld.data_reader and return aggregate + This method should use the generator method assigned to + self.data_reader and return aggregate of the column mentioned in the `column_name` variable For example if the `column_name` -> 'TotalPrice' and the file format is as below: @@ -78,6 +79,22 @@ def aggregate(self, column_name: str) -> float: aggregate should be 105.58 """ - ######################################## YOUR CODE HERE ################################################## - ######################################## YOUR CODE HERE ################################################## + # get the value at column name in the current row + # iterate over the entire file + # return the total as a float + + # get generator from data_reader + data_reader_gen = (row for row in self.data_reader) + + # skip first row as it is the column name + _ = next(data_reader_gen) + + total = float(0) + # add to the total as we iterate through the file + for row in tqdm(data_reader_gen): + for column in self._col_names: + if column == column_name: + total += float(row[column_name]) + + return total diff --git a/w1/main.py b/w1/main.py index 8d7dbfa..c99c19c 100644 --- a/w1/main.py +++ b/w1/main.py @@ -18,7 +18,7 @@ def revenue_per_region(dp: DataProcessor) -> Dict: Input : object of instance type Class DataProcessor Output : Dict - The method should find the aggregate revenue per region + The method should find the aggregate revenue per counry For example if the file format is as below: @@ -43,10 +43,27 @@ def revenue_per_region(dp: DataProcessor) -> Dict: 'United States': 121.499 } """ - ######################################## YOUR CODE HERE ################################################## - ######################################## YOUR CODE HERE ################################################## + # file is open and pass to data processor. + # Need to aggregate totalprice, grouping by country + # + # get generator from data_reader + data_reader_gen = (row for row in dp.data_reader) + + # skip first row as it is the column name + _ = next(data_reader_gen) + + # kickoff the new dictionary + country_revenue = {} + + # update stats as we iterate through the file + for row in tqdm(data_reader_gen): + if row['Country'] in country_revenue: + country_revenue[row['Country']] += float(row['TotalPrice']) + else: + country_revenue[row['Country']] = float(row['TotalPrice']) + return country_revenue def get_sales_information(file_path: str) -> Dict: # Initialize diff --git a/w1/utils.py b/w1/utils.py index f9bccf4..d338b45 100644 --- a/w1/utils.py +++ b/w1/utils.py @@ -85,9 +85,9 @@ def update_stats(self, val) -> None: class DataReader: def __init__(self, fp: str, sep: str, col_names: List) -> None: - self._fp = fp - self._sep = sep - self._col_names = col_names + self._fp = fp #filepath + self._sep = sep #delimiter? + self._col_names = col_names # column names def __iter__(self) -> Generator: """ @@ -111,6 +111,15 @@ def __iter__(self) -> Generator: 'Country': 'Russia', } """ + + for row in open(self._fp, 'r'): + row = [ x.strip() for x in row.split(',')] + + datarow = {} + for x in range(len(self._col_names)): + datarow[self._col_names[x]] = row[x] + yield datarow + # file has already been defined. Generator function, read a row, create a dictionary, return it. ######################################## YOUR CODE HERE ################################################## ######################################## YOUR CODE HERE ################################################## From 79020e996d6f6a1473d8ac0bfd09fbc6ca316517 Mon Sep 17 00:00:00 2001 From: smikawa-ucsc Date: Mon, 5 Jun 2023 22:19:28 +0000 Subject: [PATCH 2/9] Added comprehensions --- w1/data_processor.py | 3 ++- w1/main.py | 5 +++-- w1/utils.py | 14 ++++++-------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/w1/data_processor.py b/w1/data_processor.py index a25a229..0cb2970 100644 --- a/w1/data_processor.py +++ b/w1/data_processor.py @@ -79,7 +79,7 @@ def aggregate(self, column_name: str) -> float: aggregate should be 105.58 """ - + ######################################## YOUR CODE HERE ################################################## # get the value at column name in the current row # iterate over the entire file # return the total as a float @@ -98,3 +98,4 @@ def aggregate(self, column_name: str) -> float: total += float(row[column_name]) return total + ######################################## YOUR CODE HERE ################################################## \ No newline at end of file diff --git a/w1/main.py b/w1/main.py index c99c19c..4be06f8 100644 --- a/w1/main.py +++ b/w1/main.py @@ -43,7 +43,7 @@ def revenue_per_region(dp: DataProcessor) -> Dict: 'United States': 121.499 } """ - + ######################################## YOUR CODE HERE ################################################## # file is open and pass to data processor. # Need to aggregate totalprice, grouping by country # @@ -64,7 +64,8 @@ def revenue_per_region(dp: DataProcessor) -> Dict: else: country_revenue[row['Country']] = float(row['TotalPrice']) return country_revenue - + ######################################## YOUR CODE HERE ################################################## + def get_sales_information(file_path: str) -> Dict: # Initialize dp = DataProcessor(file_path=file_path) diff --git a/w1/utils.py b/w1/utils.py index d338b45..73f61da 100644 --- a/w1/utils.py +++ b/w1/utils.py @@ -111,17 +111,15 @@ def __iter__(self) -> Generator: 'Country': 'Russia', } """ - + ######################################## YOUR CODE HERE ################################################## for row in open(self._fp, 'r'): row = [ x.strip() for x in row.split(',')] - - datarow = {} - for x in range(len(self._col_names)): - datarow[self._col_names[x]] = row[x] - yield datarow - # file has already been defined. Generator function, read a row, create a dictionary, return it. - ######################################## YOUR CODE HERE ################################################## + #dictionary comprehension + datarow = { self._col_names[x]:row[x] for x in range(len(self._col_names))} + #for x in range(len(self._col_names)): + # datarow[self._col_names[x]] = row[x] + yield datarow ######################################## YOUR CODE HERE ################################################## def get_file_path(self): From 89635b017074de30de12e0c51500a698deee986f Mon Sep 17 00:00:00 2001 From: smikawa-ucsc Date: Mon, 5 Jun 2023 22:42:53 +0000 Subject: [PATCH 3/9] Removed unecessary column iteration --- w1/data_processor.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/w1/data_processor.py b/w1/data_processor.py index 0cb2970..4e6b905 100644 --- a/w1/data_processor.py +++ b/w1/data_processor.py @@ -93,9 +93,7 @@ def aggregate(self, column_name: str) -> float: total = float(0) # add to the total as we iterate through the file for row in tqdm(data_reader_gen): - for column in self._col_names: - if column == column_name: - total += float(row[column_name]) + total += float(row[column_name]) return total ######################################## YOUR CODE HERE ################################################## \ No newline at end of file From f2c83bd71cbb4b6cbaec9fc30270a8921a29d66b Mon Sep 17 00:00:00 2001 From: smikawa-ucsc Date: Mon, 5 Jun 2023 22:44:39 +0000 Subject: [PATCH 4/9] Replaced static with proper separator --- w1/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/w1/utils.py b/w1/utils.py index 73f61da..ce3311b 100644 --- a/w1/utils.py +++ b/w1/utils.py @@ -113,7 +113,7 @@ def __iter__(self) -> Generator: """ ######################################## YOUR CODE HERE ################################################## for row in open(self._fp, 'r'): - row = [ x.strip() for x in row.split(',')] + row = [ x.strip() for x in row.split(sep)] #dictionary comprehension datarow = { self._col_names[x]:row[x] for x in range(len(self._col_names))} From 6d13c4d89a03876c54898cc1d7ac3a3cfedb60af Mon Sep 17 00:00:00 2001 From: smikawa-ucsc Date: Tue, 6 Jun 2023 06:02:44 +0000 Subject: [PATCH 5/9] First draft of week 2 --- w2/server.py | 16 ++++++++++++++-- w2/utils/database.py | 21 ++++++++++++++++++--- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/w2/server.py b/w2/server.py index 9ee44a0..205bef7 100644 --- a/w2/server.py +++ b/w2/server.py @@ -42,7 +42,7 @@ async def get() -> Dict: """ ######################################## YOUR CODE HERE ################################################## - + return {"status": "ok"} ######################################## YOUR CODE HERE ################################################## @@ -53,7 +53,10 @@ async def get() -> HTMLResponse: should render the HTML file - index.html when a user goes to http://127.0.0.1:8000/ """ ######################################## YOUR CODE HERE ################################################## - + html_file = open('index.html', 'r') + html_content = html_file.read() + html_file.close() + return HTMLResponse(content=html_content, status_code=200) ######################################## YOUR CODE HERE ################################################## @@ -64,5 +67,14 @@ async def get() -> List[ProcessStatus]: Get all the records from the process table and return it using the pydantic model ProcessStatus """ ######################################## YOUR CODE HERE ################################################## + process_list = [] + db = DB() + db_results = db.read_all() #returns a dict with all the elements + + for row in db_results: + proc_item = ProcessStatus(process_id=row['process_id'], file_name=row['file_name'], file_path=row['file_path'], description=row['description'] , start_time=row['start_time'], end_time=row['end_time'], percentage=row['percentage']) + process_list.append(proc_item) + + return process_list ######################################## YOUR CODE HERE ################################################## diff --git a/w2/utils/database.py b/w2/utils/database.py index 86af309..7e36852 100644 --- a/w2/utils/database.py +++ b/w2/utils/database.py @@ -45,7 +45,12 @@ def create_table(self) -> None: Read more about datatypes in Sqlite here -> https://www.sqlite.org/datatype3.html """ ######################################## YOUR CODE HERE ################################################## - + cursor = self._connection.cursor() + cursor.execute('CREATE TABLE if not exists ' + self._table_name + ''' + (process_id TEXT NOT NULL, file_name TEXT , file_path TEXT, description TEXT, start_time TEXT NOT NULL, end_time TEXT, percentage REAL) + ''' + ) + self._connection.commit() ######################################## YOUR CODE HERE ################################################## def insert(self, process_id, start_time, file_name=None, file_path=None, @@ -63,7 +68,14 @@ def insert(self, process_id, start_time, file_name=None, file_path=None, :return: None """ ######################################## YOUR CODE HERE ################################################## - + cursor = self._connection.cursor() + columns = ','.join(str(item) for item in self._col_order) + #'process_id', 'file_name', 'file_path', 'description', 'start_time', 'end_time', 'percentage + sqlstring = 'INSERT INTO ' + self._table_name + ' (' + columns + ') VALUES (?,?,?,?,?,?,?)' + data_params = (process_id, file_name, file_path, description, start_time, end_time, percentage) + #print(' SQL STRING ' + sqlstring) + cursor.execute(sqlstring, data_params) + self._connection.commit() ######################################## YOUR CODE HERE ################################################## def read_all(self) -> List[Dict]: @@ -95,7 +107,10 @@ def update_percentage(self, process_id, percentage): :return: None """ ######################################## YOUR CODE HERE ################################################## - + cursor = self._connection.cursor() + #values = [value for key,value in locals() if key != self] + cursor.execute('UPDATE ' + self._table_name + ' SET percentage = ' + str(percentage) + ' WHERE process_id = ' + "'" + process_id + "'" ) + self._connection.commit() ######################################## YOUR CODE HERE ################################################## From e4329da13485e0a5767c1b6da1634060e6ec1774 Mon Sep 17 00:00:00 2001 From: smikawa-ucsc Date: Tue, 6 Jun 2023 06:03:00 +0000 Subject: [PATCH 6/9] fixed sep reference --- w1/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/w1/utils.py b/w1/utils.py index ce3311b..e010750 100644 --- a/w1/utils.py +++ b/w1/utils.py @@ -113,7 +113,7 @@ def __iter__(self) -> Generator: """ ######################################## YOUR CODE HERE ################################################## for row in open(self._fp, 'r'): - row = [ x.strip() for x in row.split(sep)] + row = [ x.strip() for x in row.split(self._sep)] #dictionary comprehension datarow = { self._col_names[x]:row[x] for x in range(len(self._col_names))} From a30b2e7204ebc85a187ff778d9733a169b2c3f67 Mon Sep 17 00:00:00 2001 From: smikawa-ucsc Date: Wed, 7 Jun 2023 01:49:30 +0000 Subject: [PATCH 7/9] W3 assignment --- w3/main.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/w3/main.py b/w3/main.py index 1e53962..384a326 100644 --- a/w3/main.py +++ b/w3/main.py @@ -62,7 +62,7 @@ def get_sales_information(file_path: str) -> Dict: # batches the files based on the number of processes def batch_files(file_paths: List[str], n_processes: int) -> List[set]: - if n_processes > len(file_paths): + if n_processes > len(file_paths): # if there are more processes than filepaths, then exit - 1 file per process return [] n_per_batch = len(file_paths) // n_processes @@ -164,14 +164,22 @@ def main() -> List[Dict]: batches = batch_files(file_paths=file_paths, n_processes=n_processes) ######################################## YOUR CODE HERE ################################################## - + with multiprocessing.Pool(processes=n_processes) as pool: + params = [] + for i in range(len(batches)): + params.append((batches[i],i)) + + results = pool.starmap(run, params) + pool.close() + pool.join() ######################################## YOUR CODE HERE ################################################## en = time.time() print("Overall time taken : {}".format(en-st)) # should return revenue data - return [{}] + #return [{}] + return results if __name__ == '__main__': From b7645953425df244297e1a3843f6c3cdff9ca6ae Mon Sep 17 00:00:00 2001 From: smikawa-ucsc Date: Wed, 7 Jun 2023 21:06:29 +0000 Subject: [PATCH 8/9] Week 4 --- w4/logger_config.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/w4/logger_config.py b/w4/logger_config.py index d305501..778cf21 100644 --- a/w4/logger_config.py +++ b/w4/logger_config.py @@ -20,30 +20,39 @@ def __init__(self, log_file_name: str, module_name: str): # Create formatters and add it to handlers ######################################## YOUR CODE HERE ################################################## # set the logging formatter to the f_handler + self.formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + + self.f_handler.setFormatter(self.formatter) ######################################## YOUR CODE HERE ################################################## ######################################## YOUR CODE HERE ################################################## # Add handlers to the logger and setlevel to DEBUG + self.logger.addHandler(self.f_handler) + self.logger.setLevel(logging.DEBUG) ######################################## YOUR CODE HERE ################################################## def warning(self, msg): pass ######################################## YOUR CODE HERE ################################################## + self.logger.warning(msg) ######################################## YOUR CODE HERE ################################################## def error(self, msg): pass ######################################## YOUR CODE HERE ################################################## + self.logger.error(msg) ######################################## YOUR CODE HERE ################################################## def info(self, msg): pass ######################################## YOUR CODE HERE ################################################## + self.logger.info(msg) ######################################## YOUR CODE HERE ################################################## def debug(self, msg): pass ######################################## YOUR CODE HERE ################################################## + self.logger.debug(msg) ######################################## YOUR CODE HERE ################################################## From 9dba5d8a762ebb6781b96dd78dbe835187840823 Mon Sep 17 00:00:00 2001 From: smikawa-ucsc Date: Wed, 7 Jun 2023 21:18:27 +0000 Subject: [PATCH 9/9] Added comprehension --- w2/server.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/w2/server.py b/w2/server.py index 205bef7..60be6d6 100644 --- a/w2/server.py +++ b/w2/server.py @@ -71,10 +71,14 @@ async def get() -> List[ProcessStatus]: db = DB() db_results = db.read_all() #returns a dict with all the elements + ''' for row in db_results: proc_item = ProcessStatus(process_id=row['process_id'], file_name=row['file_name'], file_path=row['file_path'], description=row['description'] , start_time=row['start_time'], end_time=row['end_time'], percentage=row['percentage']) process_list.append(proc_item) - return process_list + ''' + + #comprehension answer instead + return [ProcessStatus(**process) for process in db_results] ######################################## YOUR CODE HERE ##################################################