From e1a2c8eb53049799a241fb8de5ca515f41841c54 Mon Sep 17 00:00:00 2001
From: smikawa-ucsc <smikawa@ucsc.edu>
Date: Mon, 5 Jun 2023 22:04:23 +0000
Subject: [PATCH 1/9] First working draft

---
 w1/data_processor.py | 23 ++++++++++++++++++++---
 w1/main.py           | 23 ++++++++++++++++++++---
 w1/utils.py          | 15 ++++++++++++---
 3 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/w1/data_processor.py b/w1/data_processor.py
index b6d0bfd..a25a229 100644
--- a/w1/data_processor.py
+++ b/w1/data_processor.py
@@ -66,7 +66,8 @@ def aggregate(self, column_name: str) -> float:
         Input : List[str]
         Output : Dict
 
-        This method should use the generator method assigned to seld.data_reader and return aggregate
+        This method should use the generator method assigned to 
+        self.data_reader and return aggregate
         of the column mentioned in the `column_name` variable
 
         For example if the `column_name` -> 'TotalPrice' and the file format is as below:
@@ -78,6 +79,22 @@ def aggregate(self, column_name: str) -> float:
 
         aggregate should be 105.58
         """
-        ######################################## YOUR CODE HERE ##################################################
 
-        ######################################## YOUR CODE HERE ##################################################
+        # get the value at column name in the current row
+        # iterate over the entire file
+        # return the total as a float
+
+        # get generator from data_reader
+        data_reader_gen = (row for row in self.data_reader)
+
+        # skip first row as it is the column name
+        _ = next(data_reader_gen)
+
+        total = float(0)
+        # add to the total as we iterate through the file
+        for row in tqdm(data_reader_gen):
+            for column in self._col_names:
+                if column == column_name:
+                    total += float(row[column_name])
+
+        return total
diff --git a/w1/main.py b/w1/main.py
index 8d7dbfa..c99c19c 100644
--- a/w1/main.py
+++ b/w1/main.py
@@ -18,7 +18,7 @@ def revenue_per_region(dp: DataProcessor) -> Dict:
     Input : object of instance type Class DataProcessor
     Output : Dict
 
-    The method should find the aggregate revenue per region
+    The method should find the aggregate revenue per counry
 
     For example if the file format is as below:
 
@@ -43,10 +43,27 @@ def revenue_per_region(dp: DataProcessor) -> Dict:
         'United States': 121.499
     }
     """
-    ######################################## YOUR CODE HERE ##################################################
 
-    ######################################## YOUR CODE HERE ##################################################
+    # file is open and pass to data processor.
+    # Need to aggregate totalprice, grouping by country
+    # 
 
+    # get generator from data_reader
+    data_reader_gen = (row for row in dp.data_reader)
+
+    # skip first row as it is the column name
+    _ = next(data_reader_gen)
+
+    # kickoff the new dictionary
+    country_revenue = {}
+
+    # update stats as we iterate through the file
+    for row in tqdm(data_reader_gen):
+        if row['Country'] in country_revenue:
+            country_revenue[row['Country']] += float(row['TotalPrice'])
+        else:
+            country_revenue[row['Country']] = float(row['TotalPrice'])
+    return country_revenue    
 
 def get_sales_information(file_path: str) -> Dict:
     # Initialize
diff --git a/w1/utils.py b/w1/utils.py
index f9bccf4..d338b45 100644
--- a/w1/utils.py
+++ b/w1/utils.py
@@ -85,9 +85,9 @@ def update_stats(self, val) -> None:
 
 class DataReader:
     def __init__(self, fp: str, sep: str, col_names: List) -> None:
-        self._fp = fp
-        self._sep = sep
-        self._col_names = col_names
+        self._fp = fp #filepath
+        self._sep = sep #delimiter?
+        self._col_names = col_names # column names
 
     def __iter__(self) -> Generator:
         """
@@ -111,6 +111,15 @@ def __iter__(self) -> Generator:
             'Country': 'Russia',
         }
         """
+
+        for row in open(self._fp, 'r'):
+            row = [ x.strip() for x in row.split(',')]
+        
+            datarow = {}
+            for x in range(len(self._col_names)):
+                datarow[self._col_names[x]] = row[x]
+            yield datarow
+        # file has already been defined.  Generator function, read a row, create a dictionary, return it.
     ######################################## YOUR CODE HERE ##################################################
 
     ######################################## YOUR CODE HERE ##################################################

From 79020e996d6f6a1473d8ac0bfd09fbc6ca316517 Mon Sep 17 00:00:00 2001
From: smikawa-ucsc <smikawa@ucsc.edu>
Date: Mon, 5 Jun 2023 22:19:28 +0000
Subject: [PATCH 2/9] Added comprehensions

---
 w1/data_processor.py |  3 ++-
 w1/main.py           |  5 +++--
 w1/utils.py          | 14 ++++++--------
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/w1/data_processor.py b/w1/data_processor.py
index a25a229..0cb2970 100644
--- a/w1/data_processor.py
+++ b/w1/data_processor.py
@@ -79,7 +79,7 @@ def aggregate(self, column_name: str) -> float:
 
         aggregate should be 105.58
         """
-
+        ######################################## YOUR CODE HERE ##################################################
         # get the value at column name in the current row
         # iterate over the entire file
         # return the total as a float
@@ -98,3 +98,4 @@ def aggregate(self, column_name: str) -> float:
                     total += float(row[column_name])
 
         return total
+        ######################################## YOUR CODE HERE ##################################################
\ No newline at end of file
diff --git a/w1/main.py b/w1/main.py
index c99c19c..4be06f8 100644
--- a/w1/main.py
+++ b/w1/main.py
@@ -43,7 +43,7 @@ def revenue_per_region(dp: DataProcessor) -> Dict:
         'United States': 121.499
     }
     """
-
+    ######################################## YOUR CODE HERE ##################################################
     # file is open and pass to data processor.
     # Need to aggregate totalprice, grouping by country
     # 
@@ -64,7 +64,8 @@ def revenue_per_region(dp: DataProcessor) -> Dict:
         else:
             country_revenue[row['Country']] = float(row['TotalPrice'])
     return country_revenue    
-
+    ######################################## YOUR CODE HERE ##################################################
+    
 def get_sales_information(file_path: str) -> Dict:
     # Initialize
     dp = DataProcessor(file_path=file_path)
diff --git a/w1/utils.py b/w1/utils.py
index d338b45..73f61da 100644
--- a/w1/utils.py
+++ b/w1/utils.py
@@ -111,17 +111,15 @@ def __iter__(self) -> Generator:
             'Country': 'Russia',
         }
         """
-
+    ######################################## YOUR CODE HERE ##################################################
         for row in open(self._fp, 'r'):
             row = [ x.strip() for x in row.split(',')]
-        
-            datarow = {}
-            for x in range(len(self._col_names)):
-                datarow[self._col_names[x]] = row[x]
-            yield datarow
-        # file has already been defined.  Generator function, read a row, create a dictionary, return it.
-    ######################################## YOUR CODE HERE ##################################################
 
+            #dictionary comprehension
+            datarow = { self._col_names[x]:row[x] for x in range(len(self._col_names))}
+            #for x in range(len(self._col_names)):
+            #    datarow[self._col_names[x]] = row[x]
+            yield datarow
     ######################################## YOUR CODE HERE ##################################################
 
     def get_file_path(self):

From 89635b017074de30de12e0c51500a698deee986f Mon Sep 17 00:00:00 2001
From: smikawa-ucsc <smikawa@ucsc.edu>
Date: Mon, 5 Jun 2023 22:42:53 +0000
Subject: [PATCH 3/9] Removed unecessary column iteration

---
 w1/data_processor.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/w1/data_processor.py b/w1/data_processor.py
index 0cb2970..4e6b905 100644
--- a/w1/data_processor.py
+++ b/w1/data_processor.py
@@ -93,9 +93,7 @@ def aggregate(self, column_name: str) -> float:
         total = float(0)
         # add to the total as we iterate through the file
         for row in tqdm(data_reader_gen):
-            for column in self._col_names:
-                if column == column_name:
-                    total += float(row[column_name])
+            total += float(row[column_name])
 
         return total
         ######################################## YOUR CODE HERE ##################################################
\ No newline at end of file

From f2c83bd71cbb4b6cbaec9fc30270a8921a29d66b Mon Sep 17 00:00:00 2001
From: smikawa-ucsc <smikawa@ucsc.edu>
Date: Mon, 5 Jun 2023 22:44:39 +0000
Subject: [PATCH 4/9] Replaced static with proper separator

---
 w1/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/w1/utils.py b/w1/utils.py
index 73f61da..ce3311b 100644
--- a/w1/utils.py
+++ b/w1/utils.py
@@ -113,7 +113,7 @@ def __iter__(self) -> Generator:
         """
     ######################################## YOUR CODE HERE ##################################################
         for row in open(self._fp, 'r'):
-            row = [ x.strip() for x in row.split(',')]
+            row = [ x.strip() for x in row.split(sep)]
 
             #dictionary comprehension
             datarow = { self._col_names[x]:row[x] for x in range(len(self._col_names))}

From 6d13c4d89a03876c54898cc1d7ac3a3cfedb60af Mon Sep 17 00:00:00 2001
From: smikawa-ucsc <smikawa@ucsc.edu>
Date: Tue, 6 Jun 2023 06:02:44 +0000
Subject: [PATCH 5/9] First draft of week 2

---
 w2/server.py         | 16 ++++++++++++++--
 w2/utils/database.py | 21 ++++++++++++++++++---
 2 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/w2/server.py b/w2/server.py
index 9ee44a0..205bef7 100644
--- a/w2/server.py
+++ b/w2/server.py
@@ -42,7 +42,7 @@ async def get() -> Dict:
     """
 
     ######################################## YOUR CODE HERE ##################################################
-
+    return {"status": "ok"}
     ######################################## YOUR CODE HERE ##################################################
 
 
@@ -53,7 +53,10 @@ async def get() -> HTMLResponse:
     should render the HTML file - index.html when a user goes to http://127.0.0.1:8000/
     """
     ######################################## YOUR CODE HERE ##################################################
-
+    html_file = open('index.html', 'r')
+    html_content = html_file.read()
+    html_file.close()
+    return HTMLResponse(content=html_content, status_code=200)
     ######################################## YOUR CODE HERE ##################################################
 
 
@@ -64,5 +67,14 @@ async def get() -> List[ProcessStatus]:
     Get all the records from the process table and return it using the pydantic model ProcessStatus
     """
     ######################################## YOUR CODE HERE ##################################################
+    process_list = []
+    db = DB()
+    db_results = db.read_all() #returns a dict with all the elements
+
+    for row in db_results:
+        proc_item = ProcessStatus(process_id=row['process_id'], file_name=row['file_name'], file_path=row['file_path'], description=row['description'] , start_time=row['start_time'], end_time=row['end_time'], percentage=row['percentage'])
+        process_list.append(proc_item)
+
+    return process_list
 
     ######################################## YOUR CODE HERE ##################################################
diff --git a/w2/utils/database.py b/w2/utils/database.py
index 86af309..7e36852 100644
--- a/w2/utils/database.py
+++ b/w2/utils/database.py
@@ -45,7 +45,12 @@ def create_table(self) -> None:
         Read more about datatypes in Sqlite here -> https://www.sqlite.org/datatype3.html
         """
     ######################################## YOUR CODE HERE ##################################################
-
+        cursor = self._connection.cursor()
+        cursor.execute('CREATE TABLE if not exists ' + self._table_name + '''
+            (process_id TEXT NOT NULL, file_name TEXT , file_path TEXT, description TEXT, start_time TEXT NOT NULL, end_time TEXT, percentage REAL)
+        '''
+                )
+        self._connection.commit()
     ######################################## YOUR CODE HERE ##################################################
 
     def insert(self, process_id, start_time, file_name=None, file_path=None,
@@ -63,7 +68,14 @@ def insert(self, process_id, start_time, file_name=None, file_path=None,
         :return: None
         """
     ######################################## YOUR CODE HERE ##################################################
-
+        cursor = self._connection.cursor()
+        columns = ','.join(str(item) for item in self._col_order)
+        #'process_id', 'file_name', 'file_path', 'description', 'start_time', 'end_time', 'percentage
+        sqlstring = 'INSERT INTO ' + self._table_name + ' (' + columns + ') VALUES (?,?,?,?,?,?,?)'
+        data_params = (process_id, file_name, file_path, description, start_time, end_time, percentage)
+        #print(' SQL STRING ' + sqlstring)
+        cursor.execute(sqlstring, data_params)
+        self._connection.commit()
     ######################################## YOUR CODE HERE ##################################################
 
     def read_all(self) -> List[Dict]:
@@ -95,7 +107,10 @@ def update_percentage(self, process_id, percentage):
         :return: None
         """
     ######################################## YOUR CODE HERE ##################################################
-
+        cursor = self._connection.cursor()
+        #values = [value for key,value in locals() if key != self]
+        cursor.execute('UPDATE ' + self._table_name + ' SET percentage = ' + str(percentage) + ' WHERE process_id = ' + "'" + process_id + "'" )
+        self._connection.commit()
     ######################################## YOUR CODE HERE ##################################################
 
 

From e4329da13485e0a5767c1b6da1634060e6ec1774 Mon Sep 17 00:00:00 2001
From: smikawa-ucsc <smikawa@ucsc.edu>
Date: Tue, 6 Jun 2023 06:03:00 +0000
Subject: [PATCH 6/9] fixed sep reference

---
 w1/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/w1/utils.py b/w1/utils.py
index ce3311b..e010750 100644
--- a/w1/utils.py
+++ b/w1/utils.py
@@ -113,7 +113,7 @@ def __iter__(self) -> Generator:
         """
     ######################################## YOUR CODE HERE ##################################################
         for row in open(self._fp, 'r'):
-            row = [ x.strip() for x in row.split(sep)]
+            row = [ x.strip() for x in row.split(self._sep)]
 
             #dictionary comprehension
             datarow = { self._col_names[x]:row[x] for x in range(len(self._col_names))}

From a30b2e7204ebc85a187ff778d9733a169b2c3f67 Mon Sep 17 00:00:00 2001
From: smikawa-ucsc <smikawa@ucsc.edu>
Date: Wed, 7 Jun 2023 01:49:30 +0000
Subject: [PATCH 7/9] W3 assignment

---
 w3/main.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/w3/main.py b/w3/main.py
index 1e53962..384a326 100644
--- a/w3/main.py
+++ b/w3/main.py
@@ -62,7 +62,7 @@ def get_sales_information(file_path: str) -> Dict:
 
 # batches the files based on the number of processes
 def batch_files(file_paths: List[str], n_processes: int) -> List[set]:
-    if n_processes > len(file_paths):
+    if n_processes > len(file_paths): # if there are more processes than filepaths, then exit - 1 file per process
         return []
 
     n_per_batch = len(file_paths) // n_processes
@@ -164,14 +164,22 @@ def main() -> List[Dict]:
     batches = batch_files(file_paths=file_paths, n_processes=n_processes)
 
     ######################################## YOUR CODE HERE ##################################################
-
+    with multiprocessing.Pool(processes=n_processes) as pool:
+        params = []
+        for i in range(len(batches)):
+            params.append((batches[i],i))
+
+        results = pool.starmap(run, params)
+        pool.close()
+        pool.join()
     ######################################## YOUR CODE HERE ##################################################
 
     en = time.time()
     print("Overall time taken : {}".format(en-st))
 
     # should return revenue data
-    return [{}]
+    #return [{}]
+    return results
 
 
 if __name__ == '__main__':

From b7645953425df244297e1a3843f6c3cdff9ca6ae Mon Sep 17 00:00:00 2001
From: smikawa-ucsc <smikawa@ucsc.edu>
Date: Wed, 7 Jun 2023 21:06:29 +0000
Subject: [PATCH 8/9] Week 4

---
 w4/logger_config.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/w4/logger_config.py b/w4/logger_config.py
index d305501..778cf21 100644
--- a/w4/logger_config.py
+++ b/w4/logger_config.py
@@ -20,30 +20,39 @@ def __init__(self, log_file_name: str, module_name: str):
         # Create formatters and add it to handlers
         ######################################## YOUR CODE HERE ##################################################
         # set the logging formatter to the f_handler
+        self.formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+
+        self.f_handler.setFormatter(self.formatter)
         ######################################## YOUR CODE HERE ##################################################
 
         ######################################## YOUR CODE HERE ##################################################
         # Add handlers to the logger and setlevel to DEBUG
+        self.logger.addHandler(self.f_handler)
+        self.logger.setLevel(logging.DEBUG)
         ######################################## YOUR CODE HERE ##################################################
 
     def warning(self, msg):
         pass
         ######################################## YOUR CODE HERE ##################################################
+        self.logger.warning(msg)
         ######################################## YOUR CODE HERE ##################################################
 
     def error(self, msg):
         pass
         ######################################## YOUR CODE HERE ##################################################
+        self.logger.error(msg)
         ######################################## YOUR CODE HERE ##################################################
 
     def info(self, msg):
         pass
         ######################################## YOUR CODE HERE ##################################################
+        self.logger.info(msg)
         ######################################## YOUR CODE HERE ##################################################
 
     def debug(self, msg):
         pass
         ######################################## YOUR CODE HERE ##################################################
+        self.logger.debug(msg)
         ######################################## YOUR CODE HERE ##################################################
 
 

From 9dba5d8a762ebb6781b96dd78dbe835187840823 Mon Sep 17 00:00:00 2001
From: smikawa-ucsc <smikawa@ucsc.edu>
Date: Wed, 7 Jun 2023 21:18:27 +0000
Subject: [PATCH 9/9] Added comprehension

---
 w2/server.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/w2/server.py b/w2/server.py
index 205bef7..60be6d6 100644
--- a/w2/server.py
+++ b/w2/server.py
@@ -71,10 +71,14 @@ async def get() -> List[ProcessStatus]:
     db = DB()
     db_results = db.read_all() #returns a dict with all the elements
 
+    '''
     for row in db_results:
         proc_item = ProcessStatus(process_id=row['process_id'], file_name=row['file_name'], file_path=row['file_path'], description=row['description'] , start_time=row['start_time'], end_time=row['end_time'], percentage=row['percentage'])
         process_list.append(proc_item)
-
     return process_list
+    '''
+
+    #comprehension answer instead
+    return [ProcessStatus(**process) for process in db_results]
 
     ######################################## YOUR CODE HERE ##################################################