diff --git a/w1/data_processor.py b/w1/data_processor.py index b6d0bfd..4e6b905 100644 --- a/w1/data_processor.py +++ b/w1/data_processor.py @@ -66,7 +66,8 @@ def aggregate(self, column_name: str) -> float: Input : List[str] Output : Dict - This method should use the generator method assigned to seld.data_reader and return aggregate + This method should use the generator method assigned to + self.data_reader and return aggregate of the column mentioned in the `column_name` variable For example if the `column_name` -> 'TotalPrice' and the file format is as below: @@ -79,5 +80,20 @@ def aggregate(self, column_name: str) -> float: aggregate should be 105.58 """ ######################################## YOUR CODE HERE ################################################## + # get the value at column name in the current row + # iterate over the entire file + # return the total as a float - ######################################## YOUR CODE HERE ################################################## + # get generator from data_reader + data_reader_gen = (row for row in self.data_reader) + + # skip first row as it is the column name + _ = next(data_reader_gen) + + total = float(0) + # add to the total as we iterate through the file + for row in tqdm(data_reader_gen): + total += float(row[column_name]) + + return total + ######################################## YOUR CODE HERE ################################################## \ No newline at end of file diff --git a/w1/main.py b/w1/main.py index 8d7dbfa..4be06f8 100644 --- a/w1/main.py +++ b/w1/main.py @@ -18,7 +18,7 @@ def revenue_per_region(dp: DataProcessor) -> Dict: Input : object of instance type Class DataProcessor Output : Dict - The method should find the aggregate revenue per region + The method should find the aggregate revenue per counry For example if the file format is as below: @@ -44,10 +44,28 @@ def revenue_per_region(dp: DataProcessor) -> Dict: } """ ######################################## YOUR CODE HERE ################################################## - + # file is open and pass to data processor. + # Need to aggregate totalprice, grouping by country + # + + # get generator from data_reader + data_reader_gen = (row for row in dp.data_reader) + + # skip first row as it is the column name + _ = next(data_reader_gen) + + # kickoff the new dictionary + country_revenue = {} + + # update stats as we iterate through the file + for row in tqdm(data_reader_gen): + if row['Country'] in country_revenue: + country_revenue[row['Country']] += float(row['TotalPrice']) + else: + country_revenue[row['Country']] = float(row['TotalPrice']) + return country_revenue ######################################## YOUR CODE HERE ################################################## - - + def get_sales_information(file_path: str) -> Dict: # Initialize dp = DataProcessor(file_path=file_path) diff --git a/w1/utils.py b/w1/utils.py index f9bccf4..e010750 100644 --- a/w1/utils.py +++ b/w1/utils.py @@ -85,9 +85,9 @@ def update_stats(self, val) -> None: class DataReader: def __init__(self, fp: str, sep: str, col_names: List) -> None: - self._fp = fp - self._sep = sep - self._col_names = col_names + self._fp = fp #filepath + self._sep = sep #delimiter? + self._col_names = col_names # column names def __iter__(self) -> Generator: """ @@ -112,7 +112,14 @@ def __iter__(self) -> Generator: } """ ######################################## YOUR CODE HERE ################################################## - + for row in open(self._fp, 'r'): + row = [ x.strip() for x in row.split(self._sep)] + + #dictionary comprehension + datarow = { self._col_names[x]:row[x] for x in range(len(self._col_names))} + #for x in range(len(self._col_names)): + # datarow[self._col_names[x]] = row[x] + yield datarow ######################################## YOUR CODE HERE ################################################## def get_file_path(self): diff --git a/w2/server.py b/w2/server.py index 9ee44a0..60be6d6 100644 --- a/w2/server.py +++ b/w2/server.py @@ -42,7 +42,7 @@ async def get() -> Dict: """ ######################################## YOUR CODE HERE ################################################## - + return {"status": "ok"} ######################################## YOUR CODE HERE ################################################## @@ -53,7 +53,10 @@ async def get() -> HTMLResponse: should render the HTML file - index.html when a user goes to http://127.0.0.1:8000/ """ ######################################## YOUR CODE HERE ################################################## - + html_file = open('index.html', 'r') + html_content = html_file.read() + html_file.close() + return HTMLResponse(content=html_content, status_code=200) ######################################## YOUR CODE HERE ################################################## @@ -64,5 +67,18 @@ async def get() -> List[ProcessStatus]: Get all the records from the process table and return it using the pydantic model ProcessStatus """ ######################################## YOUR CODE HERE ################################################## + process_list = [] + db = DB() + db_results = db.read_all() #returns a dict with all the elements + + ''' + for row in db_results: + proc_item = ProcessStatus(process_id=row['process_id'], file_name=row['file_name'], file_path=row['file_path'], description=row['description'] , start_time=row['start_time'], end_time=row['end_time'], percentage=row['percentage']) + process_list.append(proc_item) + return process_list + ''' + + #comprehension answer instead + return [ProcessStatus(**process) for process in db_results] ######################################## YOUR CODE HERE ################################################## diff --git a/w2/utils/database.py b/w2/utils/database.py index 86af309..7e36852 100644 --- a/w2/utils/database.py +++ b/w2/utils/database.py @@ -45,7 +45,12 @@ def create_table(self) -> None: Read more about datatypes in Sqlite here -> https://www.sqlite.org/datatype3.html """ ######################################## YOUR CODE HERE ################################################## - + cursor = self._connection.cursor() + cursor.execute('CREATE TABLE if not exists ' + self._table_name + ''' + (process_id TEXT NOT NULL, file_name TEXT , file_path TEXT, description TEXT, start_time TEXT NOT NULL, end_time TEXT, percentage REAL) + ''' + ) + self._connection.commit() ######################################## YOUR CODE HERE ################################################## def insert(self, process_id, start_time, file_name=None, file_path=None, @@ -63,7 +68,14 @@ def insert(self, process_id, start_time, file_name=None, file_path=None, :return: None """ ######################################## YOUR CODE HERE ################################################## - + cursor = self._connection.cursor() + columns = ','.join(str(item) for item in self._col_order) + #'process_id', 'file_name', 'file_path', 'description', 'start_time', 'end_time', 'percentage + sqlstring = 'INSERT INTO ' + self._table_name + ' (' + columns + ') VALUES (?,?,?,?,?,?,?)' + data_params = (process_id, file_name, file_path, description, start_time, end_time, percentage) + #print(' SQL STRING ' + sqlstring) + cursor.execute(sqlstring, data_params) + self._connection.commit() ######################################## YOUR CODE HERE ################################################## def read_all(self) -> List[Dict]: @@ -95,7 +107,10 @@ def update_percentage(self, process_id, percentage): :return: None """ ######################################## YOUR CODE HERE ################################################## - + cursor = self._connection.cursor() + #values = [value for key,value in locals() if key != self] + cursor.execute('UPDATE ' + self._table_name + ' SET percentage = ' + str(percentage) + ' WHERE process_id = ' + "'" + process_id + "'" ) + self._connection.commit() ######################################## YOUR CODE HERE ################################################## diff --git a/w3/main.py b/w3/main.py index 1e53962..384a326 100644 --- a/w3/main.py +++ b/w3/main.py @@ -62,7 +62,7 @@ def get_sales_information(file_path: str) -> Dict: # batches the files based on the number of processes def batch_files(file_paths: List[str], n_processes: int) -> List[set]: - if n_processes > len(file_paths): + if n_processes > len(file_paths): # if there are more processes than filepaths, then exit - 1 file per process return [] n_per_batch = len(file_paths) // n_processes @@ -164,14 +164,22 @@ def main() -> List[Dict]: batches = batch_files(file_paths=file_paths, n_processes=n_processes) ######################################## YOUR CODE HERE ################################################## - + with multiprocessing.Pool(processes=n_processes) as pool: + params = [] + for i in range(len(batches)): + params.append((batches[i],i)) + + results = pool.starmap(run, params) + pool.close() + pool.join() ######################################## YOUR CODE HERE ################################################## en = time.time() print("Overall time taken : {}".format(en-st)) # should return revenue data - return [{}] + #return [{}] + return results if __name__ == '__main__': diff --git a/w4/logger_config.py b/w4/logger_config.py index d305501..778cf21 100644 --- a/w4/logger_config.py +++ b/w4/logger_config.py @@ -20,30 +20,39 @@ def __init__(self, log_file_name: str, module_name: str): # Create formatters and add it to handlers ######################################## YOUR CODE HERE ################################################## # set the logging formatter to the f_handler + self.formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + + self.f_handler.setFormatter(self.formatter) ######################################## YOUR CODE HERE ################################################## ######################################## YOUR CODE HERE ################################################## # Add handlers to the logger and setlevel to DEBUG + self.logger.addHandler(self.f_handler) + self.logger.setLevel(logging.DEBUG) ######################################## YOUR CODE HERE ################################################## def warning(self, msg): pass ######################################## YOUR CODE HERE ################################################## + self.logger.warning(msg) ######################################## YOUR CODE HERE ################################################## def error(self, msg): pass ######################################## YOUR CODE HERE ################################################## + self.logger.error(msg) ######################################## YOUR CODE HERE ################################################## def info(self, msg): pass ######################################## YOUR CODE HERE ################################################## + self.logger.info(msg) ######################################## YOUR CODE HERE ################################################## def debug(self, msg): pass ######################################## YOUR CODE HERE ################################################## + self.logger.debug(msg) ######################################## YOUR CODE HERE ##################################################