diff --git a/calculate_largest_expensors.sql b/calculate_largest_expensors.sql index e69de29..a24bc30 100644 --- a/calculate_largest_expensors.sql +++ b/calculate_largest_expensors.sql @@ -0,0 +1,25 @@ +/* +Approach: +1. Aggregate data from EXPENSES table to calcualte total expenses per employee +2. Join EMPLOYEE table twice to display info about both employee and their manager +3. Order output as per request + +*/ + +SELECT + em.employee_id + ,em.first_name || ' ' || em.last_name AS employee_name + ,m.employee_id AS manager_id + ,m.first_name || ' ' || m.last_name AS manager_name + ,ex.total_expensed_amount +FROM ( + SELECT + employee_id + ,SUM(unit_price * quantity) AS total_expensed_amount + FROM EXPENSES + GROUP BY employee_id + ) ex +LEFT JOIN EMPLOYEE em ON em.employee_id = ex.employee_id +LEFT JOIN EMPLOYEE m ON em.manager_id = m.employee_id +ORDER BY total_expensed_amount DESC +; \ No newline at end of file diff --git a/create_employees.sql b/create_employees.sql index e69de29..45c04d3 100644 --- a/create_employees.sql +++ b/create_employees.sql @@ -0,0 +1,32 @@ +/* +Approach: +1. Create DDL for table +2. Truncate target table to avoid duplication when script is run multiple times +3. Insert data to target table as no transformations are needed + +*/ + +CREATE TABLE IF NOT EXISTS EMPLOYEE ( +employee_id TINYINT, +first_name VARCHAR, +last_name VARCHAR, +job_title VARCHAR, +manager_id TINYINT +) +; + +TRUNCATE TABLE EMPLOYEE +; + +INSERT INTO EMPLOYEE (employee_id, first_name, last_name, job_title, manager_id) +VALUES +(1,'Ian','James','CEO',4), +(2,'Umberto','Torrielli','CSO',1), +(3,'Alex','Jacobson','MD EMEA',2), +(4,'Darren','Poynton','CFO',2), +(5,'Tim','Beard','MD APAC',2), +(6,'Gemma','Dodd','COS',1), +(7,'Lisa','Platten','CHR',6), +(8,'Stefano','Camisaca','GM Activation',2), +(9,'Andrea','Ghibaudi','MD NAM',2) +; \ No newline at end of file diff --git a/create_expenses.sql b/create_expenses.sql index e69de29..a8bc5ff 100644 --- a/create_expenses.sql +++ b/create_expenses.sql @@ -0,0 +1,46 @@ +/* +Approach: +1. Create DDL for table +2. Create temp table that will contain data extracted from raw files without any transformations +3. Insert data to temp table +4. Truncate target table to avoid duplication when script is run multiple times +5. Transform data from temp table so that it is ready for target table +6. Load data to target table +7. Remove not needed temp table + +*/ + +CREATE TABLE IF NOT EXISTS EXPENSES ( +employee_id TINYINT, +unit_price DECIMAL(8, 2), +quantity TINYINT +); + +CREATE TABLE IF NOT EXISTS EXPENSES_tmp ( +employee_name VARCHAR, +unit_price DECIMAL(8, 2), +quantity TINYINT +); + +INSERT INTO EXPENSES_tmp (employee_name, unit_price, quantity) +VALUES +('Alex Jacobson',6.50,14), +('Alex Jacobson',11.00,20), +('Alex Jacobson',22.00,18), +('Alex Jacobson',13.00,75), +('Andrea Ghibaudi',300,1), +('Darren Poynton',40.00,9), +('Umberto Torrielli',17.50,4); + +TRUNCATE TABLE EXPENSES; + +INSERT INTO EXPENSES +SELECT + em.employee_id + ,ex.unit_price + ,ex.quantity +FROM EXPENSES_tmp ex +INNER JOIN EMPLOYEE em ON ex.employee_name = em.first_name || ' ' || em.last_name +; + +DROP TABLE EXPENSES_tmp; \ No newline at end of file diff --git a/create_invoices.sql b/create_invoices.sql index e69de29..b032c67 100644 --- a/create_invoices.sql +++ b/create_invoices.sql @@ -0,0 +1,65 @@ +/* +Approach: +1. Create DDL for tables +2. Create temp table that will contain data extracted from raw files without any transformations +3. Insert data to temp table +4. Truncate target tables to avoid duplication when script is run multiple times +5. Transform data from temp table so that it is ready for target tables +6. Load data to target table +7. Remove not needed temp table + +*/ + +CREATE TABLE IF NOT EXISTS SUPPLIER ( +supplier_id TINYINT, +NAME VARCHAR +); + +CREATE TABLE IF NOT EXISTS INVOICE ( +supplier_id TINYINT, +invoice_ammount DECIMAL(8, 2), +due_date date +); + +CREATE TABLE IF NOT EXISTS INVOICE_tmp ( +supplier VARCHAR, +invoice_ammount DECIMAL(8, 2), +months_due TINYINT +); + +INSERT INTO INVOICE_tmp (supplier,invoice_ammount,months_due) +VALUES +('Party Animals',6000,3), +('Catering Plus',2000,2), +('Catering Plus',1500,3), +('Dave''s Discos',500,1), +('Entertainment tonight',6000,3), +('Ice Ice Baby',4000,6); + +TRUNCATE TABLE INVOICE; +TRUNCATE TABLE SUPPLIER; + +INSERT INTO SUPPLIER +SELECT + ROW_NUMBER() OVER (ORDER BY name) AS supplier_id + ,name +FROM + (SELECT + supplier AS name + FROM INVOICE_tmp + GROUP BY supplier + ) a +; + + +INSERT INTO INVOICE +SELECT + s.supplier_id + ,tmp.invoice_ammount + ,last_day_of_month(date_add('month', tmp.months_due-1, now())) AS due_date +FROM INVOICE_tmp tmp +LEFT JOIN SUPPLIER s ON s.name = tmp.supplier +; + +DROP TABLE INVOICE_tmp; + diff --git a/find_manager_cycles.sql b/find_manager_cycles.sql index e69de29..d23918c 100644 --- a/find_manager_cycles.sql +++ b/find_manager_cycles.sql @@ -0,0 +1,41 @@ +/* +Approach: +1. Define multiple joins to EMPLOYEE table. Each subsequent join point to manager of employee in previous step. +2. Collect path into array +3. Filter only those paths that are cycles +4. Ensure path ends in original employee + +Theoretical explaination: +If employee is part of managerial cycle then after some number of steps cycle would go back to initial state, in this case to original employee. +After that the cycle will only repeat itself so there is no need to continue the search, hence the additional condition halting the process +before chain reverts itself. +In the most extreme case it will be a cycle of all 9 employees and it will go back to initial state at 10th step. +Thus, for all employees that are in managerial cycle e0.employee_id is NULL. +For employees that are not part of managerial cycle it's impossible to go back to original employee_id by following subsequent managers +Thus, for these employees e0.employee_id is not NULL. +*/ + +SELECT + employee_id + ,concat(array_except(manager_cycle, array[NULL]), array[employee_id]) manager_cycle +FROM + ( + SELECT + e1.employee_id + ,array[e1.employee_id, e2.employee_id, e3.employee_id, e4.employee_id, e5.employee_id, e6.employee_id, e7.employee_id, e8.employee_id, e9.employee_id] manager_cycle + + FROM EMPLOYEE e1 + LEFT JOIN EMPLOYEE e2 on e1.manager_id = e2.employee_id AND e1.employee_id <> e2.employee_id + LEFT JOIN EMPLOYEE e3 on e2.manager_id = e3.employee_id AND e1.employee_id <> e3.employee_id + LEFT JOIN EMPLOYEE e4 on e3.manager_id = e4.employee_id AND e1.employee_id <> e4.employee_id + LEFT JOIN EMPLOYEE e5 on e4.manager_id = e5.employee_id AND e1.employee_id <> e5.employee_id + LEFT JOIN EMPLOYEE e6 on e5.manager_id = e6.employee_id AND e1.employee_id <> e6.employee_id + LEFT JOIN EMPLOYEE e7 on e6.manager_id = e7.employee_id AND e1.employee_id <> e7.employee_id + LEFT JOIN EMPLOYEE e8 on e7.manager_id = e8.employee_id AND e1.employee_id <> e8.employee_id + LEFT JOIN EMPLOYEE e9 on e8.manager_id = e9.employee_id AND e1.employee_id <> e9.employee_id + LEFT JOIN EMPLOYEE e0 on e8.manager_id = e0.employee_id AND e1.employee_id <> e0.employee_id + WHERE e0.employee_id is NULL + ) +; + + diff --git a/generate_insert_query.py b/generate_insert_query.py new file mode 100644 index 0000000..38b3f8f --- /dev/null +++ b/generate_insert_query.py @@ -0,0 +1,85 @@ +#Aid used to extract data from files and to prepare SQL insert values queries. + +import os + +def check_column(msg, column_mapping, key, delimiter = ":"): + ''' + Given a string (msg) verify that it contains data (key - value separated by delimiter) + and if it starts with desired value (column_mapping[key]) + Output value that is stored in that string + ''' + if delimiter in msg: + if msg[0:len(column_mapping[key])] == column_mapping[key]: + return msg[len(column_mapping[key])+2:].replace('\n', '') + return False + +def gen_sql_query(file_path, table_name, column_mapping, column_trim, column_with_qm): + ''' + Function that goes through all files in location given by relative path (file_path) and compiles from each file row to be inserted into desired table + ''' + wd = os.path.abspath(os.getcwd()) + wd += file_path + + sql_query = f"""INSERT INTO {table_name} ({','.join(column_mapping.keys())}) + VALUES""" + vals = [] + for file in os.listdir(wd): + f = open(f"{wd}/{file}", "r") + val = {} + for line in f: + for k in column_mapping.keys(): + if check_column(line, column_mapping, k): + val[k] = check_column(line, column_mapping, k) + vals.append(val) + f.close() + print(vals) + for val in vals: + sql_query += f""" +(""" + for k in column_mapping.keys(): + column_value = val[k] + + if k in column_with_qm: + column_value = column_value.replace("'", "''") + sql_query += "'" + + + if k in column_trim: + column_value = val[k].split(" ")[0] + sql_query += column_value + + if k in column_with_qm: + sql_query += "'" + + sql_query += "," + + sql_query = sql_query[:-1] + sql_query += f")," + sql_query = sql_query[:-1] + ';' + return sql_query + + + +file_path = "/finance/receipts_from_last_night" +table_name = "EXPENSES_tmp" +column_mapping = { "employee_name": "Employee" + ,"unit_price": "Unit Price" + ,"quantity": "Quantity" + } +column_trim = [] +column_with_qm = ["employee_name"] + +print(gen_sql_query(file_path, table_name, column_mapping, column_trim, column_with_qm)) + + + +file_path = "/finance/invoices_due" +table_name = "INVOICE_tmp" +column_mapping = { "supplier": "Company Name" + ,"invoice_ammount": "Invoice Amount" + ,"months_due": "Due Date" + } +column_trim = ["months_due"] +column_with_qm = ["supplier"] + +print(gen_sql_query(file_path, table_name, column_mapping, column_trim, column_with_qm)) diff --git a/generate_supplier_payment_plans.sql b/generate_supplier_payment_plans.sql index e69de29..cc3a78c 100644 --- a/generate_supplier_payment_plans.sql +++ b/generate_supplier_payment_plans.sql @@ -0,0 +1,53 @@ +/* +Approach: +1. For each invoice create array that will contain all dates at which payments will be done +2. Explode this array so that there is a seperate row for each payment x invoice +3. Calcualte the amount that should be paid out per this invoice +4. Wrap up this part in view for readibility +5. Aggregate all payments that are to be done on each payment date (in case there are multiple invoices to be paid out) +6. Use window function to calculate all remaining payments to be done for a given supplier +7. Add info about supplier + +*/ + +CREATE OR REPLACE VIEW payment_dates AS +SELECT + supplier_id + ,payment_ammount + ,payment_date +FROM ( + SELECT + supplier_id + ,invoice_ammount/cardinality(payment_dates) AS payment_ammount + ,payment_dates + FROM + ( + SELECT + supplier_id + ,due_date + ,invoice_ammount + ,sequence(last_day_of_month(date(now())), due_date, interval '1' month) AS payment_dates + FROM INVOICE + ) invoices_with_payment_days + ) AS invoices (supplier_id,payment_ammount,payment_dates) +CROSS JOIN UNNEST(payment_dates) AS t(payment_date) +; + +SELECT + p.supplier_id + ,s.name AS supplier_name + ,p.payment_ammount + ,SUM(p.payment_ammount) OVER (PARTITION BY p.supplier_id ORDER BY p.payment_date DESC) - p.payment_ammount AS balance_outstanding + ,p.payment_date + +FROM ( + SELECT + supplier_id + ,payment_date + ,sum(payment_ammount) AS payment_ammount + FROM payment_dates + GROUP BY supplier_id, payment_date + ) p +LEFT JOIN SUPPLIER s ON p.supplier_id = s.supplier_id +ORDER BY p.supplier_id, p.payment_date +; \ No newline at end of file