Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions calculate_largest_expensors.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
USE memory.default;

-- List of employees that expended more than 1000 units with their manager information.
CREATE OR REPLACE VIEW largest_expensors AS (
SELECT
emp.employee_id
, concat(emp.first_name, ' ', emp.last_name) AS employee_name
, emp.manager_id
, concat(man.first_name, ' ', man.last_name) AS manager_name
, sum(exp.unit_price * exp.quantity) AS total_expensed_amount
FROM EMPLOYEE AS emp
INNER JOIN EXPENSE AS exp
ON emp.employee_id = exp.employee_id
LEFT JOIN EMPLOYEE AS man
ON man.employee_id = emp.manager_id
GROUP BY
emp.employee_id
, concat(emp.first_name, ' ', emp.last_name)
, emp.manager_id
, concat(man.first_name, ' ', man.last_name)
HAVING sum(exp.unit_price * exp.quantity) > 1000
);

SELECT * FROM largest_expensors ORDER BY total_expensed_amount DESC;
14 changes: 14 additions & 0 deletions create_employees.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
USE memory.default;

-- EMPLOYEE table definition
CREATE TABLE EMPLOYEE (
employee_id TINYINT NOT NULL,
first_name VARCHAR,
last_name VARCHAR,
job_title VARCHAR,
manager_id TINYINT
);

-- Populate EMPLOYEE table with manual data from hr/employee_index.csv
INSERT INTO EMPLOYEE SELECT * FROM brz_employees
;
20 changes: 20 additions & 0 deletions create_expenses.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
USE memory.default;

-- EXPENSE table definition
CREATE TABLE EXPENSE (
employee_id TINYINT,
unit_price DECIMAL(8, 2),
quantity TINYINT
);

-- Populate EXPENSE table with data from brz_expenses and employee IDs from EMPLOYEE dimension table.
INSERT INTO EXPENSE
SELECT
emp.employee_id AS employee_id
, exp_t.unit_price AS unit_price
, exp_t.quantity AS quantity
FROM brz_expenses AS exp_t
INNER JOIN EMPLOYEE AS emp
ON lower(exp_t.employee_full_name) = lower(concat(emp.first_name, ' ', emp.last_name))
;

33 changes: 33 additions & 0 deletions create_invoices.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
USE memory.default;

-- INVOICE table definition
CREATE TABLE INVOICE (
supplier_id TINYINT,
invoice_amount DECIMAL(8, 2),
due_date DATE
);

-- SUPPLIER table definition
CREATE TABLE SUPPLIER (
supplier_id TINYINT,
name VARCHAR
);

-- Populate SUPPLIER table with
INSERT INTO SUPPLIER
SELECT
row_number() over(order by company_name) AS supplier_id
, company_name AS supplier_name
FROM (SELECT DISTINCT company_name FROM brz_invoices)
;

-- Populate INVOICE table with data from brz_invoices and new supplier_id from SUPPLIER
INSERT INTO INVOICE
SELECT
sup.supplier_id AS supplier_id
, inv_t.invoice_amount AS invoice_amount
, last_day_of_month(date_add('month', inv_t.due_date_in_months, now())) AS due_date
FROM brz_invoices AS inv_t
INNER JOIN SUPPLIER AS sup
ON inv_t.company_name = sup.name
;
48 changes: 48 additions & 0 deletions data_loading/employee_data_loading.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import pandas as pd
import os

def generate_sql_from_csv():
"""
Reads employee data from CSV and generates SQL INSERT statements
to save in a .sql file.
"""
# Get the absolute path to the CSV file
file_path = os.path.join('hr', 'employee_index.csv')

# Check if the file exists
if not os.path.exists(file_path):
print(f"Error: File not found at {file_path}")
print(f"Current working directory: {os.getcwd()}")
return False

try:
# Read the CSV file into a pandas DataFrame
df = pd.read_csv(file_path)

# Generate SQL INSERT statements
insert_statements = []
for _, row in df.iterrows():
values = f"({row['employee_id']}, '{row['first_name']}', '{row['last_name']}', '{row['job_title']}', {row['manager_id']})"
insert_statements.append(values)

# Combine all INSERT statements
all_values = ",\n ".join(insert_statements)
sql_insert = f"""-- Insert data from CSV
INSERT INTO brz_employees (employee_id, first_name, last_name, job_title, manager_id) VALUES
{all_values};
"""

# Create or append to the SQL file
with open('data_loading\output_queries\brz_employees.sql', 'a') as f:
f.write(sql_insert)

print(f"Successfully generated SQL INSERT statements for {len(df)} employee records")
print(f"Appended to employee_data.sql")
return True

except Exception as e:
print(f"Error processing the data: {e}")
return False

if __name__ == "__main__":
generate_sql_from_csv()
63 changes: 63 additions & 0 deletions data_loading/expenses_data_loading.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import os
import re
import glob

def parse_expense_file(file_path):
"""Parse an expense receipt text file and extract relevant information."""
with open(file_path, 'r') as file:
content = file.read()

# Extract employee name
employee_match = re.search(r'Employee: (.+)', content)
employee_name = employee_match.group(1) if employee_match else ""

# Extract unit price
price_match = re.search(r'Unit Price: (\d+\.\d+)', content)
unit_price = float(price_match.group(1)) if price_match else 0.0

# Extract quantity
quantity_match = re.search(r'Quantity: (\d+)', content)
quantity = int(quantity_match.group(1)) if quantity_match else 0

return {
'employee_name': employee_name if employee_name is not None else "",
'unit_price': unit_price,
'quantity': quantity
}

def generate_expenses_sql():
"""Generate SQL for brz_expenses table."""
# Create table
sql = """-- Create brz_expenses table
USE memory.default;
DROP TABLE IF EXISTS brz_expenses;
CREATE TABLE brz_expenses (
employee_full_name VARCHAR,
unit_price DECIMAL(8, 2),
quantity TINYINT
);

-- Insert data from receipts_from_last_night/*.txt files
"""

# Get all receipt files
receipt_files = glob.glob('finance\\receipts_from_last_night\\*.txt')

# Process each file and generate INSERT statements
inserts = []
for file_path in receipt_files:
expense_data = parse_expense_file(file_path)
insert = f"INSERT INTO brz_expenses (employee_full_name, unit_price, quantity) VALUES " \
f"(\'{expense_data['employee_name']}\', {expense_data['unit_price']}, {expense_data['quantity']});"
inserts.append(insert)

return sql + "\n".join(inserts)

def main():
# Generate SQL for expenses
expenses_sql = generate_expenses_sql()
with open('data_loading\\output_queries\\brz_expenses.sql', 'w') as file:
file.write(expenses_sql)

if __name__ == "__main__":
main()
64 changes: 64 additions & 0 deletions data_loading/invoices_data_loading.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import os
import re
import glob
def parse_invoice_file(file_path):
"""Parse an invoice text file and extract relevant information."""

with open(file_path, 'r') as file:
content = file.read()

# Extract invoice amount
company_name_match = re.search(r'Company Name: (.+)', content)
company_name = str(company_name_match.group(1)) if company_name_match else ""
company_name = company_name.replace("\'", "\"")
amount_match = re.search(r'Invoice Amount: (\d+)', content)
invoice_amount = float(amount_match.group(1)) if amount_match else 0.0

# Extract due date in months
due_date_match = re.search(r'Due Date: (\d+) months', content)
due_date_in_months = int(due_date_match.group(1)) if due_date_match else 0

return {
'company_name': company_name,
'invoice_amount': invoice_amount,
'due_date_in_months': due_date_in_months
}

def generate_invoices_sql():
"""Generate SQL for brz_invoices table."""
# Create table
sql = """-- Create brz_invoices table
USE memory.default;
DROP TABLE IF EXISTS brz_invoices;
CREATE TABLE brz_invoices (
company_name VARCHAR(100),
invoice_amount DECIMAL(8, 2),
due_date_in_months INT
);

-- Insert data from invoices_due/*.txt files
"""

# Get all invoice files
invoice_files = glob.glob('finance\\invoices_due\\*.txt')

# Process each file and generate INSERT statements
inserts = []
for file_path in invoice_files:
invoice_data = parse_invoice_file(file_path)
insert = f"INSERT INTO brz_invoices (company_name, invoice_amount, due_date_in_months) VALUES " \
f"('{invoice_data['company_name']}', {invoice_data['invoice_amount']}, {invoice_data['due_date_in_months']});"
inserts.append(insert)

return sql + "\n".join(inserts)

def main():
"""Generate SQL files for both tables."""
# Generate SQL for invoices
invoices_sql = generate_invoices_sql()
with open('data_loading\\output_queries\\brz_invoices.sql', 'w') as file:
file.write(invoices_sql)


if __name__ == "__main__":
main()
11 changes: 11 additions & 0 deletions data_loading/output_queries/brz_employees.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
-- Insert data from CSV
INSERT INTO brz_employees (employee_id, first_name, last_name, job_title, manager_id) VALUES
(1, 'Ian', 'James', 'CEO', 4),
(2, 'Umberto', 'Torrielli', 'CSO', 1),
(3, 'Alex', 'Jacobson', 'MD EMEA', 2),
(4, 'Darren', 'Poynton', 'CFO', 2),
(5, 'Tim', 'Beard', 'MD APAC', 2),
(6, 'Gemma', 'Dodd', 'COS', 1),
(7, 'Lisa', 'Platten', 'CHR', 6),
(8, 'Stefano', 'Camisaca', 'GM Activation', 2),
(9, 'Andrea', 'Ghibaudi', 'MD NAM', 2);
17 changes: 17 additions & 0 deletions data_loading/output_queries/brz_expenses.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
-- Create brz_expenses table
USE memory.default;
DROP TABLE IF EXISTS brz_expenses;
CREATE TABLE brz_expenses (
employee_full_name VARCHAR,
unit_price DECIMAL(8, 2),
quantity TINYINT
);

-- Insert data from receipts_from_last_night/*.txt files
INSERT INTO brz_expenses (employee_full_name, unit_price, quantity) VALUES ('Alex Jacobson', 6.5, 14);
INSERT INTO brz_expenses (employee_full_name, unit_price, quantity) VALUES ('Alex Jacobson', 11.0, 20);
INSERT INTO brz_expenses (employee_full_name, unit_price, quantity) VALUES ('Alex Jacobson', 22.0, 18);
INSERT INTO brz_expenses (employee_full_name, unit_price, quantity) VALUES ('Alex Jacobson', 13.0, 75);
INSERT INTO brz_expenses (employee_full_name, unit_price, quantity) VALUES ('Andrea Ghibaudi', 0.0, 1);
INSERT INTO brz_expenses (employee_full_name, unit_price, quantity) VALUES ('Darren Poynton', 40.0, 9);
INSERT INTO brz_expenses (employee_full_name, unit_price, quantity) VALUES ('Umberto Torrielli', 17.5, 4);
16 changes: 16 additions & 0 deletions data_loading/output_queries/brz_invoices.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
-- Create brz_invoices table
USE memory.default;
DROP TABLE IF EXISTS brz_invoices;
CREATE TABLE brz_invoices (
company_name VARCHAR(100),
invoice_amount DECIMAL(8, 2),
due_date_in_months INT
);

-- Insert data from invoices_due/*.txt files
INSERT INTO brz_invoices (company_name, invoice_amount, due_date_in_months) VALUES ('Party Animals', 6000.0, 3);
INSERT INTO brz_invoices (company_name, invoice_amount, due_date_in_months) VALUES ('Catering Plus', 2000.0, 2);
INSERT INTO brz_invoices (company_name, invoice_amount, due_date_in_months) VALUES ('Catering Plus', 1500.0, 3);
INSERT INTO brz_invoices (company_name, invoice_amount, due_date_in_months) VALUES ('Dave"s Discos', 500.0, 0);
INSERT INTO brz_invoices (company_name, invoice_amount, due_date_in_months) VALUES ('Entertainment tonight', 6000.0, 3);
INSERT INTO brz_invoices (company_name, invoice_amount, due_date_in_months) VALUES ('Ice Ice Baby', 4000.0, 6);
46 changes: 46 additions & 0 deletions find_manager_cycles.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
USE memory.default;

CREATE OR REPLACE VIEW manager_cycles AS ( SELECT * FROM (
-- Create recursive MANAGERS_CTE with all the managers with their employee ID list.
WITH RECURSIVE MANAGERS_CTE (
manager_id
, employee_id
, employee_id_list
, level
) AS (
-- Anchor: All managers
SELECT
manager_id
, employee_id
, cast(employee_id as VARCHAR) AS employee_id_list
, 0 as level
FROM EMPLOYEE

UNION ALL

-- Recursive: Add recursively the employees of each manager's employee
SELECT
man.manager_id
, emp.employee_id
, concat(man.employee_id_list, '; ', cast(emp.employee_id as VARCHAR)) AS employee_id_list
, level + 1
FROM EMPLOYEE AS emp
-- Recursive over each employee if it is also a manager
INNER JOIN MANAGERS_CTE AS man
ON man.employee_id = emp.manager_id
-- Avoid infinite recursion
WHERE position(cast(emp.employee_id as VARCHAR) in man.employee_id_list) = 0
)

-- List of employees that form a managing loop.
SELECT
manager_id
, max(employee_id_list) AS employee_id_list
, max(level) AS subordinates_level
FROM MANAGERS_CTE
-- Filter for managers that are in their own recursive list of employees (indicating a loop).
WHERE position(cast(manager_id as VARCHAR) in employee_id_list) <> 0
GROUP BY manager_id
));

SELECT * FROM manager_cycles;
Loading