From 250fe6c1befbe0e947839aa8f13f4791d86a2326 Mon Sep 17 00:00:00 2001 From: jakubmisterka Date: Wed, 12 Mar 2025 15:25:59 +0100 Subject: [PATCH 1/2] solution --- calculate_largest_expensors.sql | 17 +++++++ create_employees.sql | 24 +++++++++ create_expenses.sql | 34 +++++++++++++ create_invoices.sql | 53 ++++++++++++++++++++ find_manager_cycles.sql | 24 +++++++++ generate_insert_query.py | 75 +++++++++++++++++++++++++++++ generate_supplier_payment_plans.sql | 43 +++++++++++++++++ 7 files changed, 270 insertions(+) create mode 100644 generate_insert_query.py diff --git a/calculate_largest_expensors.sql b/calculate_largest_expensors.sql index e69de29..88b8906 100644 --- a/calculate_largest_expensors.sql +++ b/calculate_largest_expensors.sql @@ -0,0 +1,17 @@ +SELECT + em.employee_id + ,em.first_name || ' ' || em.last_name AS employee_name + ,m.employee_id AS manager_id + ,m.first_name || ' ' || m.last_name AS manager_name + ,ex.total_expensed_amount +FROM ( + SELECT + employee_id + ,SUM(unit_price * quantity) AS total_expensed_amount + FROM EXPENSES + GROUP BY employee_id + ) ex +LEFT JOIN EMPLOYEE em ON em.employee_id = ex.employee_id +LEFT JOIN EMPLOYEE m ON em.manager_id = m.employee_id +ORDER BY total_expensed_amount DESC +; \ No newline at end of file diff --git a/create_employees.sql b/create_employees.sql index e69de29..e1c8016 100644 --- a/create_employees.sql +++ b/create_employees.sql @@ -0,0 +1,24 @@ +CREATE TABLE IF NOT EXISTS EMPLOYEE ( +employee_id TINYINT, +first_name VARCHAR, +last_name VARCHAR, +job_title VARCHAR, +manager_id TINYINT +) +; + +TRUNCATE TABLE EMPLOYEE +; + +INSERT INTO EMPLOYEE (employee_id, first_name, last_name, job_title, manager_id) +VALUES +(1,'Ian','James','CEO',4), +(2,'Umberto','Torrielli','CSO',1), +(3,'Alex','Jacobson','MD EMEA',2), +(4,'Darren','Poynton','CFO',2), +(5,'Tim','Beard','MD APAC',2), +(6,'Gemma','Dodd','COS',1), +(7,'Lisa','Platten','CHR',6), +(8,'Stefano','Camisaca','GM Activation',2), +(9,'Andrea','Ghibaudi','MD NAM',2) +; \ No newline at end of file diff --git a/create_expenses.sql b/create_expenses.sql index e69de29..930eaac 100644 --- a/create_expenses.sql +++ b/create_expenses.sql @@ -0,0 +1,34 @@ +CREATE TABLE IF NOT EXISTS EXPENSES ( +employee_id TINYINT, +unit_price DECIMAL(8, 2), +quantity TINYINT +); + +CREATE TABLE IF NOT EXISTS EXPENSES_tmp ( +employee_name VARCHAR, +unit_price DECIMAL(8, 2), +quantity TINYINT +); + +INSERT INTO EXPENSES_tmp (employee_name, unit_price, quantity) +VALUES +('Alex Jacobson',6.50,14), +('Alex Jacobson',11.00,20), +('Alex Jacobson',22.00,18), +('Alex Jacobson',13.00,75), +('Andrea Ghibaudi',300,1), +('Darren Poynton',40.00,9), +('Umberto Torrielli',17.50,4); + +TRUNCATE TABLE EXPENSES; + +INSERT INTO EXPENSES +SELECT + em.employee_id + ,ex.unit_price + ,ex.quantity +FROM EXPENSES_tmp ex +INNER JOIN EMPLOYEE em ON ex.employee_name = em.first_name || ' ' || em.last_name +; + +DROP TABLE EXPENSES_tmp; \ No newline at end of file diff --git a/create_invoices.sql b/create_invoices.sql index e69de29..626827e 100644 --- a/create_invoices.sql +++ b/create_invoices.sql @@ -0,0 +1,53 @@ +CREATE TABLE IF NOT EXISTS SUPPLIER ( +supplier_id TINYINT, +NAME VARCHAR +); + +CREATE TABLE IF NOT EXISTS INVOICE ( +supplier_id TINYINT, +invoice_ammount DECIMAL(8, 2), +due_date date +); + +CREATE TABLE IF NOT EXISTS INVOICE_tmp ( +supplier VARCHAR, +invoice_ammount DECIMAL(8, 2), +months_due TINYINT +); + +INSERT INTO INVOICE_tmp (supplier,invoice_ammount,months_due) +VALUES +('Party Animals',6000,3), +('Catering Plus',2000,2), +('Catering Plus',1500,3), +('Dave''s Discos',500,1), +('Entertainment tonight',6000,3), +('Ice Ice Baby',4000,6); + +TRUNCATE TABLE INVOICE; +TRUNCATE TABLE SUPPLIER; + +INSERT INTO SUPPLIER +SELECT + ROW_NUMBER() OVER (ORDER BY name) AS supplier_id + ,name +FROM + (SELECT + supplier AS name + FROM INVOICE_tmp + GROUP BY supplier + ) a +; + + +INSERT INTO INVOICE +SELECT + s.supplier_id + ,tmp.invoice_ammount + ,last_day_of_month(date_add('month', tmp.months_due-1, now())) AS due_date +FROM INVOICE_tmp tmp +LEFT JOIN SUPPLIER s ON s.name = tmp.supplier +; + +DROP TABLE INVOICE_tmp; + diff --git a/find_manager_cycles.sql b/find_manager_cycles.sql index e69de29..8f8e6f7 100644 --- a/find_manager_cycles.sql +++ b/find_manager_cycles.sql @@ -0,0 +1,24 @@ +SELECT + employee_id + ,concat(array_except(manager_cycle, array[NULL]), array[employee_id]) manager_cycle +FROM + ( + SELECT + e1.employee_id + ,array[e1.employee_id, e2.employee_id, e3.employee_id, e4.employee_id, e5.employee_id, e6.employee_id, e7.employee_id, e8.employee_id, e9.employee_id] manager_cycle + + FROM EMPLOYEE e1 + LEFT JOIN EMPLOYEE e2 on e1.manager_id = e2.employee_id AND e1.employee_id <> e2.employee_id + LEFT JOIN EMPLOYEE e3 on e2.manager_id = e3.employee_id AND e1.employee_id <> e3.employee_id + LEFT JOIN EMPLOYEE e4 on e3.manager_id = e4.employee_id AND e1.employee_id <> e4.employee_id + LEFT JOIN EMPLOYEE e5 on e4.manager_id = e5.employee_id AND e1.employee_id <> e5.employee_id + LEFT JOIN EMPLOYEE e6 on e5.manager_id = e6.employee_id AND e1.employee_id <> e6.employee_id + LEFT JOIN EMPLOYEE e7 on e6.manager_id = e7.employee_id AND e1.employee_id <> e7.employee_id + LEFT JOIN EMPLOYEE e8 on e7.manager_id = e8.employee_id AND e1.employee_id <> e8.employee_id + LEFT JOIN EMPLOYEE e9 on e8.manager_id = e9.employee_id AND e1.employee_id <> e9.employee_id + LEFT JOIN EMPLOYEE e0 on e8.manager_id = e0.employee_id AND e1.employee_id <> e0.employee_id + WHERE e0.employee_id is NULL + ) +; + + diff --git a/generate_insert_query.py b/generate_insert_query.py new file mode 100644 index 0000000..8d28de7 --- /dev/null +++ b/generate_insert_query.py @@ -0,0 +1,75 @@ +import os + +def check_column(msg, column_mapping, key, delimiter = ":"): + if delimiter in msg: + if msg[0:len(column_mapping[key])] == column_mapping[key]: + return msg[len(column_mapping[key])+2:].replace('\n', '') + return False + +def gen_sql_query(file_path, table_name, column_mapping, column_trim, column_with_qm): + wd = os.path.abspath(os.getcwd()) + wd += file_path + + sql_query = f"""INSERT INTO {table_name} ({','.join(column_mapping.keys())}) + VALUES""" + vals = [] + for file in os.listdir(wd): + f = open(f"{wd}/{file}", "r") + val = {} + for line in f: + for k in column_mapping.keys(): + if check_column(line, column_mapping, k): + val[k] = check_column(line, column_mapping, k) + vals.append(val) + f.close() + print(vals) + for val in vals: + sql_query += f""" +(""" + for k in column_mapping.keys(): + column_value = val[k] + + if k in column_with_qm: + column_value = column_value.replace("'", "''") + sql_query += "'" + + + if k in column_trim: + column_value = val[k].split(" ")[0] + sql_query += column_value + + if k in column_with_qm: + sql_query += "'" + + sql_query += "," + + sql_query = sql_query[:-1] + sql_query += f")," + sql_query = sql_query[:-1] + ';' + return sql_query + + + +file_path = "/finance/receipts_from_last_night" +table_name = "EXPENSES_tmp" +column_mapping = { "employee_name": "Employee" + ,"unit_price": "Unit Price" + ,"quantity": "Quantity" + } +column_trim = [] +column_with_qm = ["employee_name"] + +print(gen_sql_query(file_path, table_name, column_mapping, column_trim, column_with_qm)) + + + +file_path = "/finance/invoices_due" +table_name = "INVOICE_tmp" +column_mapping = { "supplier": "Company Name" + ,"invoice_ammount": "Invoice Amount" + ,"months_due": "Due Date" + } +column_trim = ["months_due"] +column_with_qm = ["supplier"] + +print(gen_sql_query(file_path, table_name, column_mapping, column_trim, column_with_qm)) diff --git a/generate_supplier_payment_plans.sql b/generate_supplier_payment_plans.sql index e69de29..4893729 100644 --- a/generate_supplier_payment_plans.sql +++ b/generate_supplier_payment_plans.sql @@ -0,0 +1,43 @@ +CREATE OR REPLACE VIEW payment_dates AS +SELECT + supplier_id + ,payment_ammount + ,due_date + ,payment_date +FROM ( + SELECT + supplier_id + ,invoice_ammount/cardinality(payment_dates) AS payment_ammount + ,due_date + ,payment_dates + FROM + ( + SELECT + supplier_id + ,due_date + ,invoice_ammount + ,sequence(last_day_of_month(date(now())), due_date, interval '1' month) AS payment_dates + FROM INVOICE + ) invoices_with_payment_days + ) AS invoices (supplier_id,payment_ammount,due_date,payment_dates) +CROSS JOIN UNNEST(payment_dates) AS t(payment_date) +; + +SELECT + p.supplier_id + ,s.name AS supplier_name + ,p.payment_ammount + ,SUM(p.payment_ammount) OVER (PARTITION BY p.supplier_id ORDER BY p.payment_date DESC) - p.payment_ammount AS balance_outstanding + ,p.payment_date + +FROM ( + SELECT + supplier_id + ,payment_date + ,sum(payment_ammount) AS payment_ammount + FROM payment_dates + GROUP BY supplier_id, payment_date + ) p +LEFT JOIN SUPPLIER s ON p.supplier_id = s.supplier_id +ORDER BY p.supplier_id, p.payment_date +; \ No newline at end of file From b09a4de83e3c46bcf1b81a891489ec421ee6c0d8 Mon Sep 17 00:00:00 2001 From: jakubmisterka Date: Wed, 12 Mar 2025 16:25:50 +0100 Subject: [PATCH 2/2] Add comments to code --- calculate_largest_expensors.sql | 8 ++++++++ create_employees.sql | 8 ++++++++ create_expenses.sql | 12 ++++++++++++ create_invoices.sql | 12 ++++++++++++ find_manager_cycles.sql | 19 ++++++++++++++++++- generate_insert_query.py | 10 ++++++++++ generate_supplier_payment_plans.sql | 16 +++++++++++++--- 7 files changed, 81 insertions(+), 4 deletions(-) diff --git a/calculate_largest_expensors.sql b/calculate_largest_expensors.sql index 88b8906..a24bc30 100644 --- a/calculate_largest_expensors.sql +++ b/calculate_largest_expensors.sql @@ -1,3 +1,11 @@ +/* +Approach: +1. Aggregate data from EXPENSES table to calcualte total expenses per employee +2. Join EMPLOYEE table twice to display info about both employee and their manager +3. Order output as per request + +*/ + SELECT em.employee_id ,em.first_name || ' ' || em.last_name AS employee_name diff --git a/create_employees.sql b/create_employees.sql index e1c8016..45c04d3 100644 --- a/create_employees.sql +++ b/create_employees.sql @@ -1,3 +1,11 @@ +/* +Approach: +1. Create DDL for table +2. Truncate target table to avoid duplication when script is run multiple times +3. Insert data to target table as no transformations are needed + +*/ + CREATE TABLE IF NOT EXISTS EMPLOYEE ( employee_id TINYINT, first_name VARCHAR, diff --git a/create_expenses.sql b/create_expenses.sql index 930eaac..a8bc5ff 100644 --- a/create_expenses.sql +++ b/create_expenses.sql @@ -1,3 +1,15 @@ +/* +Approach: +1. Create DDL for table +2. Create temp table that will contain data extracted from raw files without any transformations +3. Insert data to temp table +4. Truncate target table to avoid duplication when script is run multiple times +5. Transform data from temp table so that it is ready for target table +6. Load data to target table +7. Remove not needed temp table + +*/ + CREATE TABLE IF NOT EXISTS EXPENSES ( employee_id TINYINT, unit_price DECIMAL(8, 2), diff --git a/create_invoices.sql b/create_invoices.sql index 626827e..b032c67 100644 --- a/create_invoices.sql +++ b/create_invoices.sql @@ -1,3 +1,15 @@ +/* +Approach: +1. Create DDL for tables +2. Create temp table that will contain data extracted from raw files without any transformations +3. Insert data to temp table +4. Truncate target tables to avoid duplication when script is run multiple times +5. Transform data from temp table so that it is ready for target tables +6. Load data to target table +7. Remove not needed temp table + +*/ + CREATE TABLE IF NOT EXISTS SUPPLIER ( supplier_id TINYINT, NAME VARCHAR diff --git a/find_manager_cycles.sql b/find_manager_cycles.sql index 8f8e6f7..d23918c 100644 --- a/find_manager_cycles.sql +++ b/find_manager_cycles.sql @@ -1,10 +1,27 @@ +/* +Approach: +1. Define multiple joins to EMPLOYEE table. Each subsequent join point to manager of employee in previous step. +2. Collect path into array +3. Filter only those paths that are cycles +4. Ensure path ends in original employee + +Theoretical explaination: +If employee is part of managerial cycle then after some number of steps cycle would go back to initial state, in this case to original employee. +After that the cycle will only repeat itself so there is no need to continue the search, hence the additional condition halting the process +before chain reverts itself. +In the most extreme case it will be a cycle of all 9 employees and it will go back to initial state at 10th step. +Thus, for all employees that are in managerial cycle e0.employee_id is NULL. +For employees that are not part of managerial cycle it's impossible to go back to original employee_id by following subsequent managers +Thus, for these employees e0.employee_id is not NULL. +*/ + SELECT employee_id ,concat(array_except(manager_cycle, array[NULL]), array[employee_id]) manager_cycle FROM ( SELECT - e1.employee_id + e1.employee_id ,array[e1.employee_id, e2.employee_id, e3.employee_id, e4.employee_id, e5.employee_id, e6.employee_id, e7.employee_id, e8.employee_id, e9.employee_id] manager_cycle FROM EMPLOYEE e1 diff --git a/generate_insert_query.py b/generate_insert_query.py index 8d28de7..38b3f8f 100644 --- a/generate_insert_query.py +++ b/generate_insert_query.py @@ -1,12 +1,22 @@ +#Aid used to extract data from files and to prepare SQL insert values queries. + import os def check_column(msg, column_mapping, key, delimiter = ":"): + ''' + Given a string (msg) verify that it contains data (key - value separated by delimiter) + and if it starts with desired value (column_mapping[key]) + Output value that is stored in that string + ''' if delimiter in msg: if msg[0:len(column_mapping[key])] == column_mapping[key]: return msg[len(column_mapping[key])+2:].replace('\n', '') return False def gen_sql_query(file_path, table_name, column_mapping, column_trim, column_with_qm): + ''' + Function that goes through all files in location given by relative path (file_path) and compiles from each file row to be inserted into desired table + ''' wd = os.path.abspath(os.getcwd()) wd += file_path diff --git a/generate_supplier_payment_plans.sql b/generate_supplier_payment_plans.sql index 4893729..cc3a78c 100644 --- a/generate_supplier_payment_plans.sql +++ b/generate_supplier_payment_plans.sql @@ -1,14 +1,24 @@ +/* +Approach: +1. For each invoice create array that will contain all dates at which payments will be done +2. Explode this array so that there is a seperate row for each payment x invoice +3. Calcualte the amount that should be paid out per this invoice +4. Wrap up this part in view for readibility +5. Aggregate all payments that are to be done on each payment date (in case there are multiple invoices to be paid out) +6. Use window function to calculate all remaining payments to be done for a given supplier +7. Add info about supplier + +*/ + CREATE OR REPLACE VIEW payment_dates AS SELECT supplier_id ,payment_ammount - ,due_date ,payment_date FROM ( SELECT supplier_id ,invoice_ammount/cardinality(payment_dates) AS payment_ammount - ,due_date ,payment_dates FROM ( @@ -19,7 +29,7 @@ FROM ( ,sequence(last_day_of_month(date(now())), due_date, interval '1' month) AS payment_dates FROM INVOICE ) invoices_with_payment_days - ) AS invoices (supplier_id,payment_ammount,due_date,payment_dates) + ) AS invoices (supplier_id,payment_ammount,payment_dates) CROSS JOIN UNNEST(payment_dates) AS t(payment_date) ;