From 250fe6c1befbe0e947839aa8f13f4791d86a2326 Mon Sep 17 00:00:00 2001
From: jakubmisterka <jakubwmisterka@gmail.com>
Date: Wed, 12 Mar 2025 15:25:59 +0100
Subject: [PATCH 1/2] solution

---
 calculate_largest_expensors.sql     | 17 +++++++
 create_employees.sql                | 24 +++++++++
 create_expenses.sql                 | 34 +++++++++++++
 create_invoices.sql                 | 53 ++++++++++++++++++++
 find_manager_cycles.sql             | 24 +++++++++
 generate_insert_query.py            | 75 +++++++++++++++++++++++++++++
 generate_supplier_payment_plans.sql | 43 +++++++++++++++++
 7 files changed, 270 insertions(+)
 create mode 100644 generate_insert_query.py

diff --git a/calculate_largest_expensors.sql b/calculate_largest_expensors.sql
index e69de29..88b8906 100644
--- a/calculate_largest_expensors.sql
+++ b/calculate_largest_expensors.sql
@@ -0,0 +1,17 @@
+SELECT 
+     em.employee_id
+    ,em.first_name || ' ' || em.last_name AS employee_name
+    ,m.employee_id                        AS manager_id
+    ,m.first_name || ' ' || m.last_name   AS manager_name
+    ,ex.total_expensed_amount
+FROM (
+        SELECT 
+             employee_id
+            ,SUM(unit_price * quantity) AS total_expensed_amount 
+        FROM EXPENSES 
+        GROUP BY employee_id
+     ) ex
+LEFT JOIN EMPLOYEE em ON em.employee_id = ex.employee_id
+LEFT JOIN EMPLOYEE m  ON em.manager_id  = m.employee_id
+ORDER BY total_expensed_amount DESC
+;
\ No newline at end of file
diff --git a/create_employees.sql b/create_employees.sql
index e69de29..e1c8016 100644
--- a/create_employees.sql
+++ b/create_employees.sql
@@ -0,0 +1,24 @@
+CREATE TABLE IF NOT EXISTS EMPLOYEE (
+employee_id TINYINT,
+first_name  VARCHAR,
+last_name   VARCHAR,
+job_title   VARCHAR,
+manager_id  TINYINT
+)
+;
+
+TRUNCATE TABLE EMPLOYEE
+;
+
+INSERT INTO EMPLOYEE (employee_id, first_name, last_name, job_title, manager_id) 
+VALUES
+(1,'Ian','James','CEO',4),
+(2,'Umberto','Torrielli','CSO',1),
+(3,'Alex','Jacobson','MD EMEA',2),
+(4,'Darren','Poynton','CFO',2),
+(5,'Tim','Beard','MD APAC',2),
+(6,'Gemma','Dodd','COS',1),
+(7,'Lisa','Platten','CHR',6),
+(8,'Stefano','Camisaca','GM Activation',2),
+(9,'Andrea','Ghibaudi','MD NAM',2)
+;
\ No newline at end of file
diff --git a/create_expenses.sql b/create_expenses.sql
index e69de29..930eaac 100644
--- a/create_expenses.sql
+++ b/create_expenses.sql
@@ -0,0 +1,34 @@
+CREATE TABLE IF NOT EXISTS EXPENSES (
+employee_id TINYINT,
+unit_price  DECIMAL(8, 2),
+quantity    TINYINT
+);
+
+CREATE TABLE IF NOT EXISTS EXPENSES_tmp (
+employee_name VARCHAR,
+unit_price    DECIMAL(8, 2),
+quantity      TINYINT
+);
+
+INSERT INTO EXPENSES_tmp (employee_name, unit_price, quantity)
+VALUES
+('Alex Jacobson',6.50,14),
+('Alex Jacobson',11.00,20),
+('Alex Jacobson',22.00,18),
+('Alex Jacobson',13.00,75),
+('Andrea Ghibaudi',300,1),
+('Darren Poynton',40.00,9),
+('Umberto Torrielli',17.50,4);
+
+TRUNCATE TABLE EXPENSES;
+
+INSERT INTO EXPENSES
+SELECT 
+     em.employee_id
+    ,ex.unit_price
+    ,ex.quantity    
+FROM EXPENSES_tmp ex
+INNER JOIN EMPLOYEE em ON ex.employee_name = em.first_name || ' ' || em.last_name
+;
+
+DROP TABLE EXPENSES_tmp;
\ No newline at end of file
diff --git a/create_invoices.sql b/create_invoices.sql
index e69de29..626827e 100644
--- a/create_invoices.sql
+++ b/create_invoices.sql
@@ -0,0 +1,53 @@
+CREATE TABLE IF NOT EXISTS SUPPLIER (
+supplier_id TINYINT,
+NAME        VARCHAR
+);
+
+CREATE TABLE IF NOT EXISTS INVOICE (
+supplier_id     TINYINT,
+invoice_ammount DECIMAL(8, 2),
+due_date        date
+);
+
+CREATE TABLE IF NOT EXISTS INVOICE_tmp (
+supplier        VARCHAR,
+invoice_ammount DECIMAL(8, 2),
+months_due      TINYINT
+);
+
+INSERT INTO INVOICE_tmp (supplier,invoice_ammount,months_due)
+VALUES
+('Party Animals',6000,3),
+('Catering Plus',2000,2),
+('Catering Plus',1500,3),
+('Dave''s Discos',500,1),
+('Entertainment tonight',6000,3),
+('Ice Ice Baby',4000,6);
+
+TRUNCATE TABLE INVOICE;
+TRUNCATE TABLE SUPPLIER;
+
+INSERT INTO SUPPLIER
+SELECT
+     ROW_NUMBER() OVER (ORDER BY name) AS supplier_id
+    ,name
+FROM
+    (SELECT
+        supplier     AS name
+    FROM INVOICE_tmp
+    GROUP BY supplier
+    ) a
+;
+
+
+INSERT INTO INVOICE
+SELECT
+     s.supplier_id
+    ,tmp.invoice_ammount
+    ,last_day_of_month(date_add('month', tmp.months_due-1, now())) AS due_date
+FROM INVOICE_tmp tmp
+LEFT JOIN SUPPLIER s ON s.name = tmp.supplier
+;
+
+DROP TABLE INVOICE_tmp;
+
diff --git a/find_manager_cycles.sql b/find_manager_cycles.sql
index e69de29..8f8e6f7 100644
--- a/find_manager_cycles.sql
+++ b/find_manager_cycles.sql
@@ -0,0 +1,24 @@
+SELECT
+     employee_id
+    ,concat(array_except(manager_cycle, array[NULL]), array[employee_id]) manager_cycle
+FROM
+   (
+    SELECT 
+        e1.employee_id
+        ,array[e1.employee_id, e2.employee_id, e3.employee_id, e4.employee_id, e5.employee_id, e6.employee_id, e7.employee_id, e8.employee_id, e9.employee_id] manager_cycle
+
+    FROM EMPLOYEE e1
+    LEFT JOIN EMPLOYEE e2 on e1.manager_id = e2.employee_id AND e1.employee_id <> e2.employee_id
+    LEFT JOIN EMPLOYEE e3 on e2.manager_id = e3.employee_id AND e1.employee_id <> e3.employee_id
+    LEFT JOIN EMPLOYEE e4 on e3.manager_id = e4.employee_id AND e1.employee_id <> e4.employee_id
+    LEFT JOIN EMPLOYEE e5 on e4.manager_id = e5.employee_id AND e1.employee_id <> e5.employee_id
+    LEFT JOIN EMPLOYEE e6 on e5.manager_id = e6.employee_id AND e1.employee_id <> e6.employee_id
+    LEFT JOIN EMPLOYEE e7 on e6.manager_id = e7.employee_id AND e1.employee_id <> e7.employee_id
+    LEFT JOIN EMPLOYEE e8 on e7.manager_id = e8.employee_id AND e1.employee_id <> e8.employee_id
+    LEFT JOIN EMPLOYEE e9 on e8.manager_id = e9.employee_id AND e1.employee_id <> e9.employee_id
+    LEFT JOIN EMPLOYEE e0 on e8.manager_id = e0.employee_id AND e1.employee_id <> e0.employee_id
+    WHERE e0.employee_id is NULL
+   )
+;
+
+
diff --git a/generate_insert_query.py b/generate_insert_query.py
new file mode 100644
index 0000000..8d28de7
--- /dev/null
+++ b/generate_insert_query.py
@@ -0,0 +1,75 @@
+import os              
+
+def check_column(msg, column_mapping, key, delimiter = ":"):
+    if delimiter in msg:
+        if msg[0:len(column_mapping[key])] == column_mapping[key]:
+            return msg[len(column_mapping[key])+2:].replace('\n', '')
+    return False
+
+def gen_sql_query(file_path, table_name, column_mapping, column_trim, column_with_qm):
+    wd = os.path.abspath(os.getcwd())
+    wd += file_path
+
+    sql_query  = f"""INSERT INTO {table_name} ({','.join(column_mapping.keys())}) 
+    VALUES"""
+    vals = []
+    for file in os.listdir(wd):
+        f = open(f"{wd}/{file}", "r")
+        val = {}
+        for line in f:
+            for k in column_mapping.keys(): 
+                if check_column(line, column_mapping, k):
+                    val[k] = check_column(line, column_mapping, k)
+        vals.append(val)
+        f.close()
+    print(vals)
+    for val in vals:
+        sql_query += f"""
+("""
+        for k in column_mapping.keys():
+            column_value = val[k]
+
+            if k in column_with_qm:
+                column_value = column_value.replace("'", "''")
+                sql_query += "'"
+
+            
+            if k in  column_trim:
+                column_value = val[k].split(" ")[0] 
+            sql_query += column_value 
+
+            if k in column_with_qm:
+                sql_query += "'"
+            
+            sql_query += ","
+
+        sql_query = sql_query[:-1]
+        sql_query += f"),"
+    sql_query = sql_query[:-1] + ';'
+    return sql_query
+
+
+
+file_path = "/finance/receipts_from_last_night"
+table_name = "EXPENSES_tmp"
+column_mapping = { "employee_name": "Employee"
+                  ,"unit_price": "Unit Price"
+                  ,"quantity": "Quantity"   
+                    } 
+column_trim = []
+column_with_qm = ["employee_name"] 
+
+print(gen_sql_query(file_path, table_name, column_mapping, column_trim, column_with_qm))
+
+
+
+file_path = "/finance/invoices_due"
+table_name = "INVOICE_tmp"
+column_mapping = { "supplier": "Company Name"
+                  ,"invoice_ammount": "Invoice Amount"
+                  ,"months_due": "Due Date"   
+                }
+column_trim = ["months_due"]
+column_with_qm = ["supplier"] 
+
+print(gen_sql_query(file_path, table_name, column_mapping, column_trim, column_with_qm))
diff --git a/generate_supplier_payment_plans.sql b/generate_supplier_payment_plans.sql
index e69de29..4893729 100644
--- a/generate_supplier_payment_plans.sql
+++ b/generate_supplier_payment_plans.sql
@@ -0,0 +1,43 @@
+CREATE OR REPLACE VIEW payment_dates AS
+SELECT
+     supplier_id
+    ,payment_ammount
+    ,due_date
+    ,payment_date
+FROM (
+        SELECT 
+             supplier_id
+            ,invoice_ammount/cardinality(payment_dates) AS payment_ammount
+            ,due_date
+            ,payment_dates
+        FROM
+                (
+                    SELECT 
+                         supplier_id
+                        ,due_date
+                        ,invoice_ammount
+                        ,sequence(last_day_of_month(date(now())), due_date, interval '1' month) AS payment_dates
+                    FROM INVOICE
+                ) invoices_with_payment_days
+     ) AS invoices (supplier_id,payment_ammount,due_date,payment_dates)
+CROSS JOIN UNNEST(payment_dates) AS t(payment_date)
+;
+
+SELECT 
+     p.supplier_id
+    ,s.name AS supplier_name
+    ,p.payment_ammount
+    ,SUM(p.payment_ammount) OVER (PARTITION BY p.supplier_id ORDER BY p.payment_date DESC) - p.payment_ammount AS balance_outstanding
+    ,p.payment_date 
+    
+FROM (
+      SELECT 
+         supplier_id
+        ,payment_date
+        ,sum(payment_ammount) AS payment_ammount 
+      FROM  payment_dates 
+      GROUP BY supplier_id, payment_date
+      ) p
+LEFT JOIN SUPPLIER s ON p.supplier_id = s.supplier_id
+ORDER BY p.supplier_id, p.payment_date
+;
\ No newline at end of file

From b09a4de83e3c46bcf1b81a891489ec421ee6c0d8 Mon Sep 17 00:00:00 2001
From: jakubmisterka <jakubwmisterka@gmail.com>
Date: Wed, 12 Mar 2025 16:25:50 +0100
Subject: [PATCH 2/2] Add comments to code

---
 calculate_largest_expensors.sql     |  8 ++++++++
 create_employees.sql                |  8 ++++++++
 create_expenses.sql                 | 12 ++++++++++++
 create_invoices.sql                 | 12 ++++++++++++
 find_manager_cycles.sql             | 19 ++++++++++++++++++-
 generate_insert_query.py            | 10 ++++++++++
 generate_supplier_payment_plans.sql | 16 +++++++++++++---
 7 files changed, 81 insertions(+), 4 deletions(-)

diff --git a/calculate_largest_expensors.sql b/calculate_largest_expensors.sql
index 88b8906..a24bc30 100644
--- a/calculate_largest_expensors.sql
+++ b/calculate_largest_expensors.sql
@@ -1,3 +1,11 @@
+/*
+Approach:
+1. Aggregate data from EXPENSES table to calcualte total expenses per employee
+2. Join EMPLOYEE table twice to display info about both employee and their manager
+3. Order output as per request
+
+*/
+
 SELECT 
      em.employee_id
     ,em.first_name || ' ' || em.last_name AS employee_name
diff --git a/create_employees.sql b/create_employees.sql
index e1c8016..45c04d3 100644
--- a/create_employees.sql
+++ b/create_employees.sql
@@ -1,3 +1,11 @@
+/*
+Approach:
+1. Create DDL for table
+2. Truncate target table to avoid duplication when script is run multiple times
+3. Insert data to target table as no transformations are needed
+
+*/
+
 CREATE TABLE IF NOT EXISTS EMPLOYEE (
 employee_id TINYINT,
 first_name  VARCHAR,
diff --git a/create_expenses.sql b/create_expenses.sql
index 930eaac..a8bc5ff 100644
--- a/create_expenses.sql
+++ b/create_expenses.sql
@@ -1,3 +1,15 @@
+/*
+Approach:
+1. Create DDL for table
+2. Create temp table that will contain data extracted from raw files without any transformations
+3. Insert data to temp table
+4. Truncate target table to avoid duplication when script is run multiple times
+5. Transform data from temp table so that it is ready for target table
+6. Load data to target table
+7. Remove not needed temp table
+
+*/
+
 CREATE TABLE IF NOT EXISTS EXPENSES (
 employee_id TINYINT,
 unit_price  DECIMAL(8, 2),
diff --git a/create_invoices.sql b/create_invoices.sql
index 626827e..b032c67 100644
--- a/create_invoices.sql
+++ b/create_invoices.sql
@@ -1,3 +1,15 @@
+/*
+Approach:
+1. Create DDL for tables
+2. Create temp table that will contain data extracted from raw files without any transformations
+3. Insert data to temp table
+4. Truncate target tables to avoid duplication when script is run multiple times
+5. Transform data from temp table so that it is ready for target tables
+6. Load data to target table
+7. Remove not needed temp table
+
+*/
+
 CREATE TABLE IF NOT EXISTS SUPPLIER (
 supplier_id TINYINT,
 NAME        VARCHAR
diff --git a/find_manager_cycles.sql b/find_manager_cycles.sql
index 8f8e6f7..d23918c 100644
--- a/find_manager_cycles.sql
+++ b/find_manager_cycles.sql
@@ -1,10 +1,27 @@
+/*
+Approach:
+1. Define multiple joins to EMPLOYEE table. Each subsequent join point to manager of employee in previous step.
+2. Collect path into array
+3. Filter only those paths that are cycles
+4. Ensure path ends in original employee
+
+Theoretical explaination:
+If employee is part of managerial cycle then after some number of steps cycle would go back to initial state, in this case to original employee. 
+After that the cycle will only repeat itself so there is no need to continue the search, hence the additional condition halting the process
+before chain reverts itself.
+In the most extreme case it will be a cycle of all 9 employees and it will go back to initial state at 10th step.
+Thus, for all employees that are in managerial cycle e0.employee_id is NULL.
+For employees that are not part of managerial cycle it's impossible to go back to original employee_id by following subsequent managers
+Thus, for these employees e0.employee_id is not NULL.
+*/
+
 SELECT
      employee_id
     ,concat(array_except(manager_cycle, array[NULL]), array[employee_id]) manager_cycle
 FROM
    (
     SELECT 
-        e1.employee_id
+         e1.employee_id
         ,array[e1.employee_id, e2.employee_id, e3.employee_id, e4.employee_id, e5.employee_id, e6.employee_id, e7.employee_id, e8.employee_id, e9.employee_id] manager_cycle
 
     FROM EMPLOYEE e1
diff --git a/generate_insert_query.py b/generate_insert_query.py
index 8d28de7..38b3f8f 100644
--- a/generate_insert_query.py
+++ b/generate_insert_query.py
@@ -1,12 +1,22 @@
+#Aid used to extract data from files and to prepare SQL insert values queries.
+
 import os              
 
 def check_column(msg, column_mapping, key, delimiter = ":"):
+    '''
+    Given a string (msg) verify that it contains data (key - value separated by delimiter) 
+    and if it starts with desired value (column_mapping[key])
+    Output value that is stored in that string
+    '''
     if delimiter in msg:
         if msg[0:len(column_mapping[key])] == column_mapping[key]:
             return msg[len(column_mapping[key])+2:].replace('\n', '')
     return False
 
 def gen_sql_query(file_path, table_name, column_mapping, column_trim, column_with_qm):
+    '''
+    Function that goes through all files in location given by relative path (file_path) and compiles from each file row to be inserted into desired table
+    '''
     wd = os.path.abspath(os.getcwd())
     wd += file_path
 
diff --git a/generate_supplier_payment_plans.sql b/generate_supplier_payment_plans.sql
index 4893729..cc3a78c 100644
--- a/generate_supplier_payment_plans.sql
+++ b/generate_supplier_payment_plans.sql
@@ -1,14 +1,24 @@
+/*
+Approach:
+1. For each invoice create array that will contain all dates at which payments will be done
+2. Explode this array so that there is a seperate row for each payment x invoice
+3. Calcualte the amount that should be paid out per this invoice
+4. Wrap up this part in view for readibility
+5. Aggregate all payments that are to be done on each payment date (in case there are multiple invoices to be paid out)
+6. Use window function to calculate all remaining payments to be done for a given supplier
+7. Add info about supplier
+
+*/
+
 CREATE OR REPLACE VIEW payment_dates AS
 SELECT
      supplier_id
     ,payment_ammount
-    ,due_date
     ,payment_date
 FROM (
         SELECT 
              supplier_id
             ,invoice_ammount/cardinality(payment_dates) AS payment_ammount
-            ,due_date
             ,payment_dates
         FROM
                 (
@@ -19,7 +29,7 @@ FROM (
                         ,sequence(last_day_of_month(date(now())), due_date, interval '1' month) AS payment_dates
                     FROM INVOICE
                 ) invoices_with_payment_days
-     ) AS invoices (supplier_id,payment_ammount,due_date,payment_dates)
+     ) AS invoices (supplier_id,payment_ammount,payment_dates)
 CROSS JOIN UNNEST(payment_dates) AS t(payment_date)
 ;