diff --git a/02_activities/assignments/Cohort_8/assignment2.sql b/02_activities/assignments/Cohort_8/assignment2.sql index 5ad40748a..c2743d3b7 100644 --- a/02_activities/assignments/Cohort_8/assignment2.sql +++ b/02_activities/assignments/Cohort_8/assignment2.sql @@ -11,14 +11,16 @@ SELECT product_name || ', ' || product_size|| ' (' || product_qty_type || ')' FROM product + But wait! The product table has some bad data (a few NULL values). -Find the NULLs and then using COALESCE, replace the NULL with a -blank for the first problem, and 'unit' for the second problem. +Find the NULLs and then using COALESCE, replace the NULL with a blank for the first column with +nulls, and 'unit' for the second column with nulls. -HINT: keep the syntax the same, but edited the correct components with the string. +**HINT**: keep the syntax the same, but edited the correct components with the string. The `||` values concatenate the columns into strings. Edit the appropriate columns -- you're making two edits -- and the NULL rows will be fixed. -All the other rows will remain the same.) */ +All the other rows will remain the same. */ + diff --git a/02_activities/assignments/DC_Cohort/Assignment 1 - ERD Logical Map.png b/02_activities/assignments/DC_Cohort/Assignment 1 - ERD Logical Map.png new file mode 100644 index 000000000..5bfa33846 Binary files /dev/null and b/02_activities/assignments/DC_Cohort/Assignment 1 - ERD Logical Map.png differ diff --git a/02_activities/assignments/DC_Cohort/Assignment 2 - ERD Logical Map - Prompt 1.png b/02_activities/assignments/DC_Cohort/Assignment 2 - ERD Logical Map - Prompt 1.png new file mode 100644 index 000000000..589d42b81 Binary files /dev/null and b/02_activities/assignments/DC_Cohort/Assignment 2 - ERD Logical Map - Prompt 1.png differ diff --git a/02_activities/assignments/DC_Cohort/Assignment 2 - ERD Logical Map - Prompt 2.png b/02_activities/assignments/DC_Cohort/Assignment 2 - ERD Logical Map - Prompt 2.png new file mode 100644 index 000000000..9c407c3f1 Binary files /dev/null and b/02_activities/assignments/DC_Cohort/Assignment 2 - ERD Logical Map - Prompt 2.png differ diff --git a/02_activities/assignments/DC_Cohort/assignment2.sql b/02_activities/assignments/DC_Cohort/assignment2.sql index 5ad40748a..37c8df054 100644 --- a/02_activities/assignments/DC_Cohort/assignment2.sql +++ b/02_activities/assignments/DC_Cohort/assignment2.sql @@ -11,16 +11,21 @@ SELECT product_name || ', ' || product_size|| ' (' || product_qty_type || ')' FROM product + But wait! The product table has some bad data (a few NULL values). -Find the NULLs and then using COALESCE, replace the NULL with a -blank for the first problem, and 'unit' for the second problem. +Find the NULLs and then using COALESCE, replace the NULL with a blank for the first column with +nulls, and 'unit' for the second column with nulls. -HINT: keep the syntax the same, but edited the correct components with the string. +**HINT**: keep the syntax the same, but edited the correct components with the string. The `||` values concatenate the columns into strings. Edit the appropriate columns -- you're making two edits -- and the NULL rows will be fixed. -All the other rows will remain the same.) */ - +All the other rows will remain the same. */ +SELECT + product_name || ', ' || + COALESCE(product_size || ' ', '') || + '(' || COALESCE(product_qty_type, 'unit') || ')' AS product_description +FROM product; --Windowed Functions /* 1. Write a query that selects from the customer_purchases table and numbers each customer’s @@ -32,18 +37,36 @@ each new market date for each customer, or select only the unique market dates p (without purchase details) and number those visits. HINT: One of these approaches uses ROW_NUMBER() and one uses DENSE_RANK(). */ - +SELECT + customer_id, + market_date, + ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY market_date) AS visit_number +FROM customer_purchases +ORDER BY customer_id, market_date; /* 2. Reverse the numbering of the query from a part so each customer’s most recent visit is labeled 1, then write another query that uses this one as a subquery (or temp table) and filters the results to only the customer’s most recent visit. */ - +WITH ranked_visits AS ( + SELECT + customer_id, + market_date, + ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY market_date DESC) AS visit_rank + FROM customer_purchases +) +SELECT customer_id, market_date +FROM ranked_visits +WHERE visit_rank = 1 +ORDER BY customer_id; /* 3. Using a COUNT() window function, include a value along with each row of the customer_purchases table that indicates how many different times that customer has purchased that product_id. */ - +SELECT *, + COUNT(*) OVER (PARTITION BY customer_id, product_id) AS times_purchased_this_product +FROM customer_purchases +ORDER BY customer_id, product_id, market_date; -- String manipulations /* 1. Some product names in the product table have descriptions like "Jar" or "Organic". @@ -57,11 +80,17 @@ Remove any trailing or leading whitespaces. Don't just use a case statement for Hint: you might need to use INSTR(product_name,'-') to find the hyphens. INSTR will help split the column. */ - +SELECT + product_name, + NULLIF(TRIM(SUBSTR(product_name, INSTR(product_name, '-') + 1)), '') AS description +FROM product +WHERE INSTR(product_name, '-') > 0; /* 2. Filter the query to show any product_size value that contain a number with REGEXP. */ - +SELECT * +FROM product +WHERE product_size REGEXP '[0-9]'; -- UNION /* 1. Using a UNION, write a query that displays the market dates with the highest and lowest total sales. @@ -73,7 +102,32 @@ HINT: There are a possibly a few ways to do this query, but if you're struggling 3) Query the second temp table twice, once for the best day, once for the worst day, with a UNION binding them. */ - +WITH daily_sales AS ( + SELECT + market_date, + SUM(quantity * cost_to_customer_per_qty) AS total_sales + FROM customer_purchases + GROUP BY market_date +), +ranked AS ( + SELECT + market_date, + total_sales, + RANK() OVER (ORDER BY total_sales DESC) AS best_rank, + RANK() OVER (ORDER BY total_sales ASC) AS worst_rank + FROM daily_sales +) +SELECT market_date, total_sales, 'Highest sales day' AS label +FROM ranked +WHERE best_rank = 1 + +UNION ALL + +SELECT market_date, total_sales, 'Lowest sales day' AS label +FROM ranked +WHERE worst_rank = 1 + +ORDER BY total_sales DESC; /* SECTION 3 */ @@ -89,7 +143,16 @@ Think a bit about the row counts: how many distinct vendors, product names are t How many customers are there (y). Before your final group by you should have the product of those two queries (x*y). */ - +SELECT + v.vendor_name, + p.product_name, + 5 * COUNT(DISTINCT c.customer_id) * vi.quantity * vi.original_price AS potential_revenue +FROM vendor_inventory vi +JOIN vendor v ON vi.vendor_id = v.vendor_id +JOIN product p ON vi.product_id = p.product_id +CROSS JOIN customer c +GROUP BY v.vendor_name, p.product_name +ORDER BY potential_revenue DESC; -- INSERT /*1. Create a new table "product_units". @@ -97,19 +160,32 @@ This table will contain only products where the `product_qty_type = 'unit'`. It should use all of the columns from the product table, as well as a new column for the `CURRENT_TIMESTAMP`. Name the timestamp column `snapshot_timestamp`. */ - +DROP TABLE IF EXISTS product_units; +CREATE TABLE product_units AS +SELECT *, CURRENT_TIMESTAMP AS snapshot_timestamp +FROM product +WHERE product_qty_type = 'unit'; /*2. Using `INSERT`, add a new row to the product_units table (with an updated timestamp). This can be any product you desire (e.g. add another record for Apple Pie). */ - +INSERT INTO product_units +SELECT *, CURRENT_TIMESTAMP +FROM product +WHERE product_name = 'Apple Pie'; -- DELETE /* 1. Delete the older record for the whatever product you added. HINT: If you don't specify a WHERE clause, you are going to have a bad time.*/ - +DELETE FROM product_units +WHERE product_name = 'Apple Pie' + AND snapshot_timestamp = ( + SELECT MIN(snapshot_timestamp) + FROM product_units + WHERE product_name = 'Apple Pie' + ); -- UPDATE /* 1.We want to add the current_quantity to the product_units table. @@ -128,6 +204,15 @@ Finally, make sure you have a WHERE statement to update the right row, you'll need to use product_units.product_id to refer to the correct row within the product_units table. When you have all of these components, you can run the update statement. */ +ALTER TABLE product_units ADD COLUMN current_quantity INT; +UPDATE product_units +SET current_quantity = COALESCE(( + SELECT quantity + FROM vendor_inventory vi + WHERE vi.product_id = product_units.product_id + ORDER BY market_date DESC, vendor_id + LIMIT 1 +), 0); diff --git a/03_instructional_team/sqbpro_originals/module_3.sqbpro b/03_instructional_team/sqbpro_originals/module_3.sqbpro index d3b452b67..73baca37e 100644 --- a/03_instructional_team/sqbpro_originals/module_3.sqbpro +++ b/03_instructional_team/sqbpro_originals/module_3.sqbpro @@ -105,19 +105,6 @@ Filter to number of purchases between 300 and 500 */ /* 2. What is the name of the vendor who sells pie */ -/* MODULE 3 */ -/* Common Table Expression (CTE) */ - - -/* 1. Calculate sales per vendor per day */ -SELECT - - - - - -/* ... re-aggregate the daily sales for each WEEK instead now */ - /* MODULE 3 */ /* Temp Tables */ @@ -142,6 +129,21 @@ CREATE TABLE temp.new_vendor_inventory AS /* 2. put the previous table into another temp table, e.g. as temp.new_new_vendor_inventory */ +/* MODULE 3 */ +/* Common Table Expression (CTE) */ + + +/* 1. Calculate sales per vendor per day */ +SELECT + + + + + +/* ... re-aggregate the daily sales for each WEEK instead now */ + + + /* MODULE 3 */ /* Date functions */ diff --git a/03_instructional_team/sqbpro_originals/module_5.sqbpro b/03_instructional_team/sqbpro_originals/module_5.sqbpro index f608313e0..21dc1f642 100644 --- a/03_instructional_team/sqbpro_originals/module_5.sqbpro +++ b/03_instructional_team/sqbpro_originals/module_5.sqbpro @@ -1,4 +1,4 @@ -
/* MODULE 5 */ +/* MODULE 5 */ /* INSERT UPDATE DELETE */ @@ -45,30 +45,18 @@ CREATE TEMP TABLE product_expanded AS -/* MODULE 5 */ -/* UPDATE statements for view */ +/* MODULE 5 */ +/* DYNAMIC VIEW */ -/* 1. SET market_date equal to today for new_customer_purchases */ -/* 2. Add today's info to the market_date_info -we need to add -1. today's date -2. today's day -3. today's week number -4. today's year -INSERT INTO market_date_info -VALUES('....','....','....','....','8:00 AM','2:00 PM','nothing interesting','Summer','25','28',0,0); -*/ - -/* MODULE 5 */ -/* DYNAMIC VIEW */ +/* spoilers below */ @@ -78,7 +66,6 @@ VALUES('....','....','....','....','8:00 AM','2:00 PM','nothing interesting','Su -/* spoilers below */ @@ -87,24 +74,37 @@ VALUES('....','....','....','....','8:00 AM','2:00 PM','nothing interesting','Su +-- THIS ONLY WORKS IF YOU HAVE DONE THE PROPER STEPS FOR IMPORTING +-- 1) update new_customer_purchases to today +-- 2) add the union +-- 3) add the where statement +-- 4) update the market_date_info to include today +/* MODULE 5 */ +/* UPDATE statements for view */ +/* 1. SET market_date equal to today for new_customer_purchases */ --- THIS ONLY WORKS IF YOU HAVE DONE THE PROPER STEPS FOR IMPORTING --- 1) update new_customer_purchases to today --- 2) add the union --- 3) add the where statement --- 4) update the market_date_info to include today +/* 2. Add today's info to the market_date_info +we need to add +1. today's date +2. today's day +3. today's week number +4. today's year +INSERT INTO market_date_info +VALUES('....','....','....','....','8:00 AM','2:00 PM','nothing interesting','Summer','25','28',0,0); +*/ + /* MODULE 5 */ /* CROSS JOIN */ diff --git a/04_this_cohort/custom_slides/markdown/slides_01.Rmd b/04_this_cohort/custom_slides/markdown/slides_01.Rmd index 08994c023..d9869e6b4 100644 --- a/04_this_cohort/custom_slides/markdown/slides_01.Rmd +++ b/04_this_cohort/custom_slides/markdown/slides_01.Rmd @@ -203,7 +203,7 @@ class: left, top, inverse ### Course Tools - [DB Browser for SQLite](https://sqlitebrowser.org/dl/): *Where we will write code* - [GitHub](https://github.com/UofT-DSI/sql): *Module Overview* -- [Etherpad](https://pad.riseup.net/p/SQL_DSI_SGS_Oct2025): *Where we will keep track of session progress* +- [Etherpad](https://pad.riseup.net/p/SQL_DSI_Nov2025): *Where we will keep track of session progress* - **Visit and complete the sign in prompt at the start every session** - [SQLite documentation](https://www.sqlite.org/index.html): *For SQL specific questions* - [DrawIO](https://www.drawio.com/) or [Lucid](https://www.lucidchart.com/pages/): *For Assignments* diff --git a/04_this_cohort/custom_slides/markdown/slides_01.html b/04_this_cohort/custom_slides/markdown/slides_01.html index dc99c1a5f..f032dd230 100644 --- a/04_this_cohort/custom_slides/markdown/slides_01.html +++ b/04_this_cohort/custom_slides/markdown/slides_01.html @@ -191,7 +191,7 @@ ### Course Tools - [DB Browser for SQLite](https://sqlitebrowser.org/dl/): *Where we will write code* - [GitHub](https://github.com/UofT-DSI/sql): *Module Overview* -- [Etherpad](https://pad.riseup.net/p/SQL_DSI_SGS_Oct2025): *Where we will keep track of session progress* +- [Etherpad](https://pad.riseup.net/p/SQL_DSI_Nov2025): *Where we will keep track of session progress* - **Visit and complete the sign in prompt at the start every session** - [SQLite documentation](https://www.sqlite.org/index.html): *For SQL specific questions* - [DrawIO](https://www.drawio.com/) or [Lucid](https://www.lucidchart.com/pages/): *For Assignments* diff --git a/04_this_cohort/custom_slides/pdf/slides_01.pdf b/04_this_cohort/custom_slides/pdf/slides_01.pdf index 9fd184207..bae468031 100644 Binary files a/04_this_cohort/custom_slides/pdf/slides_01.pdf and b/04_this_cohort/custom_slides/pdf/slides_01.pdf differ diff --git a/04_this_cohort/live_code/Cohort_8/module_2/CASE_WHEN.sql b/04_this_cohort/live_code/Cohort_8/module_2/CASE_WHEN.sql new file mode 100644 index 000000000..2d252f620 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_2/CASE_WHEN.sql @@ -0,0 +1,31 @@ +/* MODULE 2 */ +/* CASE */ + + +SELECT *, +/* 1. Add a CASE statement declaring which days vendors should come */ +CASE WHEN vendor_type = 'Fresh Focused' THEN 'Wednesday' + WHEN vendor_type = 'Prepared Foods' THEN 'Thursday' + ELSE 'Saturday' + END as day_of_specialty + +/* 2. Add another CASE statement for Pie Day */ +,CASE WHEN vendor_name = "Annie's Pies" -- double quotes will work just this once + THEN 'Annie is the best' + END AS annie_is_queen + + +/* 3. Add another CASE statement with an ELSE clause to handle rows evaluating to False */ +,CASE WHEN vendor_name LIKE '%pie%' +THEN 'Wednesday' +ELSE 'Friday' -- with this else, we get values for all the FALSE statements +END AS pie_day + + +/* 4. Experiment with selecting a different column instead of just a string value */ +,CASE WHEN vendor_type = 'Fresh Focused' THEN vendor_owner_first_name + WHEN vendor_type = 'Eggs & Meats' THEN vendor_owner_last_name + END as first_or_last + + +FROM vendor \ No newline at end of file diff --git a/04_this_cohort/live_code/Cohort_8/module_2/DISTINCT.sql b/04_this_cohort/live_code/Cohort_8/module_2/DISTINCT.sql new file mode 100644 index 000000000..394ff56bb --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_2/DISTINCT.sql @@ -0,0 +1,41 @@ +/* MODULE 2 */ +/* DISTINCT */ + + +/* 1. Compare how many customer_ids are the customer_purchases table, +one select with distinct, one without */ + +-- 4221 rows +SELECT customer_id FROM customer_purchases; + +-- 26 rows +SELECT DISTINCT customer_id FROM customer_purchases; + + +/* 2. Compare the difference between selecting market_day in market_date_info, with and without distinct: + what do these difference mean?*/ + + -- 150 rows...days that market was open + SELECT market_day FROM market_date_info; + +-- the market is only open on wed and saturday + SELECT DISTINCT market_day FROM market_date_info; + + +/* 3. Which vendor has sold products to a customer */ +SELECT DISTINCT vendor_id +FROM customer_purchases; -- 3rows + + +/* 4. Which vendor has sold products to a customer ... and which product was it */ +SELECT DISTINCT vendor_id, product_id +FROM customer_purchases; -- 8 rows + + +/* 5. Which vendor has sold products to a customer +... and which product was it? +... AND to whom was it sold*/ +SELECT DISTINCT vendor_id, product_id, customer_id +FROM customer_purchases +ORDER BY customer_id ASC, product_id DESC -- 200 rows + diff --git a/04_this_cohort/live_code/Cohort_8/module_2/INNER_JION.sql b/04_this_cohort/live_code/Cohort_8/module_2/INNER_JION.sql new file mode 100644 index 000000000..84361a090 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_2/INNER_JION.sql @@ -0,0 +1,38 @@ +/* MODULE 2 */ +/* INNER JOIN */ + + +/* 1. Get product names (from product table) alongside customer_purchases + ... use an INNER JOIN to see only products that have been purchased */ + +-- without table aliases +SELECT +product_name, -- coming from product TABLE +vendor_id, -- coming from the customer_purchase table (and all below) +market_date, +customer_id, +customer_purchases.product_id + +FROM product +INNER JOIN customer_purchases + ON product.product_id = customer_purchases.product_id; + + + +/* 2. Using the Query #5 from DISTINCT earlier + (Which vendor has sold products to a customer AND which product was it AND to whom was it sold) + + Add customers' first and last names with an INNER JOIN */ + +-- using table aliases +SELECT DISTINCT +vendor_id, + product_id, + c.customer_id, -- from the customer table! + customer_first_name, + customer_last_name + + FROM customer_purchases as cp +INNER JOIN customer as c + ON c.customer_id = cp.customer_id + diff --git a/04_this_cohort/live_code/Cohort_8/module_2/LEFT_JOIN.sql b/04_this_cohort/live_code/Cohort_8/module_2/LEFT_JOIN.sql new file mode 100644 index 000000000..9008984df --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_2/LEFT_JOIN.sql @@ -0,0 +1,64 @@ +/* MODULE 2 */ +/* LEFT JOIN */ + + +/* 1. There are products that have been bought +... but are there products that have not been bought? +Use a LEFT JOIN to find out*/ +SELECT DISTINCT +p.product_id, +cp.product_id as [cp.product_id], +product_name + +FROM product as p +LEFT JOIN customer_purchases as cp + ON p.product_id = cp.product_id + +WHERE cp.product_id IS NULL; -- only shows the product_ids that have been sold + + +/* 2. Directions of LEFT JOINs matter ...*/ +-- this shows only products that have been sold...no products in cp that aren't in product table +SELECT DISTINCT +p.product_id, +cp.product_id as [cp.product_id], +product_name + +FROM customer_purchases as cp +LEFT JOIN product as p + ON p.product_id = cp.product_id; + + + +/* 3. As do which values you filter on ... */ +SELECT DISTINCT +pc.product_category_id +,p.product_category_id as [product_product_category_id] + +FROM product_category as pc +LEFT JOIN product as p + ON pc.product_category_id = p.product_category_id + +--WHERE pc.product_category_id BETWEEN 1 AND 6 -- 6 rows +WHERE p.product_category_id BETWEEN 1 AND 6 -- 5 rows + + +/* 4. Without using a RIGHT JOIN, make this query return the RIGHT JOIN result set +...**Hint, flip the order of the joins** ... + +SELECT * + +FROM product_category AS pc +LEFT JOIN product AS p + ON pc.product_category_id = p.product_category_id + ORDER by pc.product_category_id + +...Note how the row count changed from 24 to 23 +*/ +SELECT * + +FROM product AS p +LEFT JOIN product_category AS pc + ON pc.product_category_id = p.product_category_id + ORDER by pc.product_category_id + diff --git a/04_this_cohort/live_code/Cohort_8/module_2/SELECT.sql b/04_this_cohort/live_code/Cohort_8/module_2/SELECT.sql new file mode 100644 index 000000000..50519e65c --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_2/SELECT.sql @@ -0,0 +1,34 @@ +/* MODULE 2 */ +/* SELECT */ + + +/* 1. Select everything in the customer table */ +SELECT * FROM customer; + + +/* 2. Use sql as a calculator */ +SELECT 1+1 as [2], pi() as [pi]; + + +/* 3. Add order by and limit clauses */ +SELECT * FROM customer +ORDER BY customer_last_name DESC -- z->a +LIMIT 5; -- only 5 rows + + + +/* 4. Select multiple specific columns */ +SELECT product_name, +product_size, +product_qty_type + +FROM product; + + +/* 5. Add a static value in a column */ +SELECT '2025' as [this_year], -- persist the value 2025 across all rows that we returnm a static value +vendor_name, +vendor_type + +FROM vendor; + diff --git a/04_this_cohort/live_code/Cohort_8/module_2/WHERE.sql b/04_this_cohort/live_code/Cohort_8/module_2/WHERE.sql new file mode 100644 index 000000000..eb74c91b0 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_2/WHERE.sql @@ -0,0 +1,42 @@ +/* MODULE 2 */ +/* WHERE */ + +/* 1. Select only customer 1 from the customer table */ +SELECT * +FROM customer +WHERE customer_id = 1; + + +/* 2. Differentiate between AND and OR */ +SELECT * +FROM customer +WHERE customer_id = 1 +OR customer_id = 2; -- 1 or 2 +--AND customer_id -- returns nothing + + +/* 3. IN */ +SELECT * FROM customer +WHERE customer_id IN (3,4,5,6); -- only customers 3,4,5,6 + + +/* 4. LIKE */ +SELECT * +FROM product +WHERE product_name LIKE '%pepper%'; + +SELECT * +FROM customer +WHERE customer_last_name LIKE 'a%'; -- starting with a + + +/* 5. Nulls and Blanks*/ +SELECT * +FROM product +WHERE product_size IS NULL -- missing, null VALUES +OR product_size = ''; -- two single quotes '' not a double quote " + + +/* 6. BETWEEN x AND y */ +SELECT * FROM market_date_info +WHERE market_date BETWEEN '2022-03-01' AND '2022-05-31' -- works well with dates or numbers! diff --git a/04_this_cohort/live_code/Cohort_8/module_2/module_2.sqbpro b/04_this_cohort/live_code/Cohort_8/module_2/module_2.sqbpro index 55ab8821e..0e455ed5a 100644 --- a/04_this_cohort/live_code/Cohort_8/module_2/module_2.sqbpro +++ b/04_this_cohort/live_code/Cohort_8/module_2/module_2.sqbpro @@ -1,95 +1,151 @@ -
/* MODULE 2 */ +/* MODULE 2 */ /* SELECT */ /* 1. Select everything in the customer table */ -SELECT +SELECT * FROM customer; -/* 2. Use sql as a calculator */ +/* 2. Use sql as a calculator */ +SELECT 1+1 as [2], pi() as [pi]; /* 3. Add order by and limit clauses */ +SELECT * FROM customer +ORDER BY customer_last_name DESC -- z->a +LIMIT 5; -- only 5 rows /* 4. Select multiple specific columns */ +SELECT product_name, +product_size, +product_qty_type + +FROM product; + +/* 5. Add a static value in a column */ +SELECT '2025' as [this_year], -- persist the value 2025 across all rows that we returnm a static value +vendor_name, +vendor_type +FROM vendor; -/* 5. Add a static value in a column *//* MODULE 2 */ +/* MODULE 2 */ /* WHERE */ /* 1. Select only customer 1 from the customer table */ SELECT * FROM customer -WHERE +WHERE customer_id = 1; /* 2. Differentiate between AND and OR */ - +SELECT * +FROM customer +WHERE customer_id = 1 +OR customer_id = 2; -- 1 or 2 +--AND customer_id -- returns nothing /* 3. IN */ - +SELECT * FROM customer +WHERE customer_id IN (3,4,5,6); -- only customers 3,4,5,6 /* 4. LIKE */ +SELECT * +FROM product +WHERE product_name LIKE '%pepper%'; +SELECT * +FROM customer +WHERE customer_last_name LIKE 'a%'; -- starting with a /* 5. Nulls and Blanks*/ +SELECT * +FROM product +WHERE product_size IS NULL -- missing, null VALUES +OR product_size = ''; -- two single quotes '' not a double quote " - -/* 6. BETWEEN x AND y *//* MODULE 2 */ +/* 6. BETWEEN x AND y */ +SELECT * FROM market_date_info +WHERE market_date BETWEEN '2022-03-01' AND '2022-05-31' -- works well with dates or numbers! +/* MODULE 2 */ /* CASE */ -SELECT * -/* 1. Add a CASE statement declaring which days vendors should come */ - +SELECT *, +/* 1. Add a CASE statement declaring which days vendors should come */ +CASE WHEN vendor_type = 'Fresh Focused' THEN 'Wednesday' + WHEN vendor_type = 'Prepared Foods' THEN 'Thursday' + ELSE 'Saturday' + END as day_of_specialty /* 2. Add another CASE statement for Pie Day */ - +,CASE WHEN vendor_name = "Annie's Pies" -- double quotes will work just this once + THEN 'Annie is the best' + END AS annie_is_queen /* 3. Add another CASE statement with an ELSE clause to handle rows evaluating to False */ - +,CASE WHEN vendor_name LIKE '%pie%' +THEN 'Wednesday' +ELSE 'Friday' -- with this else, we get values for all the FALSE statements +END AS pie_day /* 4. Experiment with selecting a different column instead of just a string value */ +,CASE WHEN vendor_type = 'Fresh Focused' THEN vendor_owner_first_name + WHEN vendor_type = 'Eggs & Meats' THEN vendor_owner_last_name + END as first_or_last -FROM vendor/* MODULE 2 */ +FROM vendor/* MODULE 2 */ /* DISTINCT */ -/* 1. Compare how many customer_ids are the customer_purchases table, one select with distinct, one without */ +/* 1. Compare how many customer_ids are the customer_purchases table, +one select with distinct, one without */ -- 4221 rows -SELECT customer_id FROM customer_purchases +SELECT customer_id FROM customer_purchases; +-- 26 rows +SELECT DISTINCT customer_id FROM customer_purchases; /* 2. Compare the difference between selecting market_day in market_date_info, with and without distinct: what do these difference mean?*/ - + + -- 150 rows...days that market was open + SELECT market_day FROM market_date_info; + +-- the market is only open on wed and saturday + SELECT DISTINCT market_day FROM market_date_info; /* 3. Which vendor has sold products to a customer */ - +SELECT DISTINCT vendor_id +FROM customer_purchases; -- 3rows /* 4. Which vendor has sold products to a customer ... and which product was it */ - +SELECT DISTINCT vendor_id, product_id +FROM customer_purchases; -- 8 rows /* 5. Which vendor has sold products to a customer ... and which product was it? ... AND to whom was it sold*/ +SELECT DISTINCT vendor_id, product_id, customer_id +FROM customer_purchases +ORDER BY customer_id ASC, product_id DESC -- 200 rows -/* MODULE 2 */ +/* MODULE 2 */ /* INNER JOIN */ @@ -97,35 +153,79 @@ SELECT customer_id FROM customer_purchases ... use an INNER JOIN to see only products that have been purchased */ -- without table aliases +SELECT +product_name, -- coming from product TABLE +vendor_id, -- coming from the customer_purchase table (and all below) +market_date, +customer_id, +customer_purchases.product_id +FROM product +INNER JOIN customer_purchases + ON product.product_id = customer_purchases.product_id; -/* 2. Using the Query #4 from DISTINCT earlier +/* 2. Using the Query #5 from DISTINCT earlier (Which vendor has sold products to a customer AND which product was it AND to whom was it sold) Add customers' first and last names with an INNER JOIN */ -- using table aliases - - -/* MODULE 2 */ +SELECT DISTINCT +vendor_id, + product_id, + c.customer_id, -- from the customer table! + customer_first_name, + customer_last_name + + FROM customer_purchases as cp +INNER JOIN customer as c + ON c.customer_id = cp.customer_id + +/* MODULE 2 */ /* LEFT JOIN */ /* 1. There are products that have been bought ... but are there products that have not been bought? Use a LEFT JOIN to find out*/ +SELECT DISTINCT +p.product_id, +cp.product_id as [cp.product_id], +product_name + +FROM product as p +LEFT JOIN customer_purchases as cp + ON p.product_id = cp.product_id + +WHERE cp.product_id IS NULL; -- only shows the product_ids that have been sold /* 2. Directions of LEFT JOINs matter ...*/ +-- this shows only products that have been sold...no products in cp that aren't in product table +SELECT DISTINCT +p.product_id, +cp.product_id as [cp.product_id], +product_name +FROM customer_purchases as cp +LEFT JOIN product as p + ON p.product_id = cp.product_id; /* 3. As do which values you filter on ... */ +SELECT DISTINCT +pc.product_category_id +,p.product_category_id as [product_product_category_id] +FROM product_category as pc +LEFT JOIN product as p + ON pc.product_category_id = p.product_category_id +--WHERE pc.product_category_id BETWEEN 1 AND 6 -- 6 rows +WHERE p.product_category_id BETWEEN 1 AND 6 -- 5 rows /* 4. Without using a RIGHT JOIN, make this query return the RIGHT JOIN result set @@ -140,15 +240,37 @@ LEFT JOIN product AS p ...Note how the row count changed from 24 to 23 */ +SELECT * -/* MODULE 2 */ +FROM product AS p +LEFT JOIN product_category AS pc + ON pc.product_category_id = p.product_category_id + ORDER by pc.product_category_id + +/* MODULE 2 */ /* Multiple Table JOINs */ -/* 1. Using the Query #4 from DISTINCT earlier +/* 1. Using the Query #5 from DISTINCT earlier (Which vendor has sold products to a customer AND which product was it AND to whom was it sold) Replace all the IDs (customer, vendor, and product) with the names instead*/ +SELECT DISTINCT +--vendor_id, +vendor_name, +--product_id, +product_name, +--customer_id +customer_first_name, +customer_last_name + +FROM customer_purchases as cp +INNER JOIN vendor as v + ON v.vendor_id = cp.vendor_id +INNER JOIN product as p + ON p.product_id = cp.product_id +INNER JOIN customer as c + ON c.customer_id = cp.customer_id; @@ -156,5 +278,20 @@ LEFT JOIN product AS p ... how does this LEFT JOIN affect the number of rows? Why do we have more rows now?*/ +SELECT +product_category_name +,p.* +,cp.product_id as [cp.product_id] + +FROM product_category as pc +INNER JOIN product as p + ON p.product_category_id = pc.product_category_id +LEFT JOIN customer_purchases as cp -- inner join is 4221, but left join adds 15 rows (4236 rows) for unsold products + ON cp.product_id = p.product_id + +ORDER BY cp.product_id + + + diff --git a/04_this_cohort/live_code/Cohort_8/module_2/multiple_table_joins.sql b/04_this_cohort/live_code/Cohort_8/module_2/multiple_table_joins.sql new file mode 100644 index 000000000..b03d2cc25 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_2/multiple_table_joins.sql @@ -0,0 +1,47 @@ +/* MODULE 2 */ +/* Multiple Table JOINs */ + + +/* 1. Using the Query #5 from DISTINCT earlier + (Which vendor has sold products to a customer AND which product was it AND to whom was it sold) + + Replace all the IDs (customer, vendor, and product) with the names instead*/ +SELECT DISTINCT +--vendor_id, +vendor_name, +--product_id, +product_name, +--customer_id +customer_first_name, +customer_last_name + +FROM customer_purchases as cp +INNER JOIN vendor as v + ON v.vendor_id = cp.vendor_id +INNER JOIN product as p + ON p.product_id = cp.product_id +INNER JOIN customer as c + ON c.customer_id = cp.customer_id; + + + +/* 2. Select product_category_name, everything from the product table, and then LEFT JOIN the customer_purchases table +... how does this LEFT JOIN affect the number of rows? + +Why do we have more rows now?*/ +SELECT +product_category_name +,p.* +,cp.product_id as [cp.product_id] + +FROM product_category as pc +INNER JOIN product as p + ON p.product_category_id = pc.product_category_id +LEFT JOIN customer_purchases as cp -- inner join is 4221, but left join adds 15 rows (4236 rows) for unsold products + ON cp.product_id = p.product_id + +ORDER BY cp.product_id + + + + diff --git a/04_this_cohort/live_code/Cohort_8/module_3/COUNT.sql b/04_this_cohort/live_code/Cohort_8/module_3/COUNT.sql new file mode 100644 index 000000000..8e026a546 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_3/COUNT.sql @@ -0,0 +1,32 @@ +/* MODULE 3 */ +/* COUNT */ + + +/* 1. Count the number of products */ + SELECT COUNT(product_id) as num_of_prods + FROM product; + + +/* 2. How many products per/by product_qty_type */ +SELECT product_qty_type +,COUNT(product_id) as num_of_prods +FROM product +GROUP BY product_qty_type; + + +/* 3. How many products per product_qty_type and per their product_size */ +SELECT product_size +,product_qty_type +,COUNT(product_id) as num_of_prods + +FROM product +GROUP BY product_size, product_qty_type; + + +/* COUNT DISTINCT + 4. How many unique products were bought */ + + SELECT count(DISTINCT product_id) as bought_products + FROM customer_purchases; + + diff --git a/04_this_cohort/live_code/Cohort_8/module_3/CTE.sql b/04_this_cohort/live_code/Cohort_8/module_3/CTE.sql new file mode 100644 index 000000000..3a31ec9c0 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_3/CTE.sql @@ -0,0 +1,43 @@ +/* MODULE 3 */ +/* Common Table Expression (CTE) */ + + +/* 1. Calculate sales per vendor per day */ + +WITH vendor_daily_sales AS ( + SELECT + md.market_date, + market_day, + market_week, + market_year, + vendor_name, + SUM(quantity*cost_to_customer_per_qty) as sales + + + FROM customer_purchases cp + INNER JOIN market_date_info md + ON cp.market_date = md.market_date + INNER JOIN vendor v + ON v.vendor_id = cp.vendor_id + + GROUP BY md.market_date, v.vendor_id +), + + -- if we want another CTE .... add a comma but not another WITH +new_customer_result AS ( + SELECT * FROM customer +) + + +/* ... re-aggregate the daily sales for each WEEK instead now */ +SELECT +market_year +,market_week +,vendor_name +,SUM(sales) + +FROM vendor_daily_sales + +GROUP BY market_year, market_week, vendor_name + + diff --git a/04_this_cohort/live_code/Cohort_8/module_3/DATES.sql b/04_this_cohort/live_code/Cohort_8/module_3/DATES.sql new file mode 100644 index 000000000..5a4e69c01 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_3/DATES.sql @@ -0,0 +1,35 @@ +/* MODULE 3 */ +/* Date functions */ + + +/* 1. now */ +SELECT DISTINCT +DATE('now') as [now] +,DATETIME() as [right_now] +,datetime('now','localtime') as [right_now_timezone] + +/* 2. strftime */ +,strftime('%Y-%m','now') as this_year_this_month +,strftime('%Y/%m/%d', '2025-08-10','+50 days') as the_future +,market_date +,strftime('%m-%d-%Y', market_date, '+30 days','-1 year') as the_past + +/* 3. adding dates, e.g. last date of the month */ +-- last date of the month +,DATE(market_date,'start of month','-1 day') as end_of_previous_month +,DATE(market_date, 'start of month','-1 day','start of month') as start_of_previous_month + + +/* 4. difference between dates, + a. number of days between now and each market_date + b. number of YEARS between now and market_date + c. number of HOURS bewtween now and market_date + */ + ,market_date + ,julianday('now') - julianday(market_date) as now_md_dd + ,(julianday('now') - julianday(market_date)) / 365.25 as now_md_dd_yrs + ,(julianday('now') - julianday(market_date)) * 24 as now_md_dd_hours + + FROM market_date_info + + \ No newline at end of file diff --git a/04_this_cohort/live_code/Cohort_8/module_3/HAVING.sql b/04_this_cohort/live_code/Cohort_8/module_3/HAVING.sql new file mode 100644 index 000000000..a477acd74 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_3/HAVING.sql @@ -0,0 +1,33 @@ +/* MODULE 3 */ +/* HAVING */ + + +/* 1. How much did a customer spend on each day? +Filter to customer_id between 1 and 5 and total_cost > 50 +... What order of execution occurs?*/ + +SELECT -- fifth +market_date +,customer_id +,SUM(quantity*cost_to_customer_per_qty) as total_spend + +FROM customer_purchases -- first +WHERE customer_id BETWEEN 1 AND 5 --filtering the non-aggregated values, second + +GROUP BY market_date, customer_id -- third +HAVING total_spend > 50; -- filtering the aggreated values (total spend), fourth + + + +/* 2. How many products were bought? +Filter to number of purchases between 300 and 500 */ +SELECT +count(product_id) as number_of_products +,product_id + +FROM customer_purchases + +GROUP BY product_id +HAVING count(product_id) BETWEEN 300 AND 500 + + diff --git a/04_this_cohort/live_code/Cohort_8/module_3/MIN_MAX.sql b/04_this_cohort/live_code/Cohort_8/module_3/MIN_MAX.sql new file mode 100644 index 000000000..11a34b000 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_3/MIN_MAX.sql @@ -0,0 +1,60 @@ +/* MODULE 3 */ +/* MIN & MAX */ + + +/* 1. What is the most expensive product +...pay attention to how it doesn't handle ties very well +*/ +SELECT +product_name +,max(original_price) as most_expensive + +FROM product p +INNER JOIN vendor_inventory vi + ON p.product_id = vi.product_id; + + +/* 2. Prove that max is working */ +SELECT DISTINCT +product_name +,original_price + +FROM product p +INNER JOIN vendor_inventory vi + ON p.product_id = vi.product_id +ORDER BY original_price DESC; + + +/* 3. Find the minimum price per each product_qty_type */ +SELECT +product_name +,product_qty_type +,min(original_price) as least_expensive + +FROM product p +INNER JOIN vendor_inventory vi + ON p.product_id = vi.product_id +GROUP BY product_qty_type + +ORDER BY product_qty_type ASC, original_price ASC; + + +/* 4. Prove that min is working */ +SELECT DISTINCT +product_name +,product_qty_type +,original_price + +FROM product p +INNER JOIN vendor_inventory vi + ON p.product_id = vi.product_id + +ORDER BY product_qty_type ASC, original_price ASC; + + +/* 5. Min/max on a string +... not particularly useful? */ +SELECT min(customer_first_name) +FROM customer + + diff --git a/04_this_cohort/live_code/Cohort_8/module_3/SUM_AVG.sql b/04_this_cohort/live_code/Cohort_8/module_3/SUM_AVG.sql new file mode 100644 index 000000000..69eb78e21 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_3/SUM_AVG.sql @@ -0,0 +1,27 @@ +/* MODULE 3 */ +/* SUM & AVG */ + + +/* 1. How much did customers spend each day */ +SELECT +market_date +,customer_id +,SUM(quantity*cost_to_customer_per_qty) as total_cost + +FROM customer_purchases +GROUP BY market_date, customer_id; + + +/* 2. How much does each customer spend on average */ +SELECT +customer_first_name +,customer_last_name +,customer_postal_code +,ROUND(AVG(quantity*cost_to_customer_per_qty),2) as total_cost + +FROM customer_purchases as cp +INNER JOIN customer as c + ON c.customer_id = cp.customer_id + +GROUP BY c.customer_id -- this represents the single row that customer_first and customer_last_name are using + diff --git a/04_this_cohort/live_code/Cohort_8/module_3/Temp_Tables.sql b/04_this_cohort/live_code/Cohort_8/module_3/Temp_Tables.sql new file mode 100644 index 000000000..949527bfe --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_3/Temp_Tables.sql @@ -0,0 +1,36 @@ +/* MODULE 3 */ +/* Temp Tables */ + + +/* 1. Put our inflation query into a temp table, e.g. as temp.new_vendor_inventory*/ + +/* some structural code */ +/* ...heads up, sometimes this query can be finnicky -- it's good to try highlighting different sections to help it succeed...*/ + +-- if a table named new_vendor_inventory exists, delete it, other do NOTHING +DROP TABLE IF EXISTS temp.new_vendor_inventory; + +--make the table +CREATE TABLE temp.new_vendor_inventory AS + +-- definition of the table +SELECT * +,original_price * 5 as inflation +FROM vendor_inventory; + +SELECT * FROM new_vendor_inventory; + + + +/* 2. put the previous table into another temp table, e.g. as temp.new_new_vendor_inventory */ + +DROP TABLE IF EXISTS temp.new_new_vendor_inventory; + +CREATE TABLE temp.new_new_vendor_inventory AS +SELECT * +,inflation*2 as super_inflation + +FROM new_vendor_inventory; + +SELECT * FROM new_new_vendor_inventory + diff --git a/04_this_cohort/live_code/Cohort_8/module_3/arithmitic.sql b/04_this_cohort/live_code/Cohort_8/module_3/arithmitic.sql new file mode 100644 index 000000000..d08502b16 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_3/arithmitic.sql @@ -0,0 +1,24 @@ +/* MODULE 3 */ +/* Arithmitic */ + + +/* 1. power, pi(), ceiling, division, integer division, etc */ +SELECT +power(2,3) as [power] +,pi() as [pi] +,10.0 / 3.0 as division +,cast(10.0 as INT) / cast(3.0 as int) as integer_division +,ceiling(4.5) as [ceilign]; + +/* 2. Every even vendor_id with modulo */ +SELECT * + +FROM vendor +WHERE vendor_id % 2 = 0; + + +/* 3. What about every third? */ +SELECT * + +FROM vendor +WHERE vendor_id % 3 = 1; diff --git a/04_this_cohort/live_code/Cohort_8/module_3/module_3.sqbpro b/04_this_cohort/live_code/Cohort_8/module_3/module_3.sqbpro index d3b452b67..f73885a62 100644 --- a/04_this_cohort/live_code/Cohort_8/module_3/module_3.sqbpro +++ b/04_this_cohort/live_code/Cohort_8/module_3/module_3.sqbpro @@ -1,124 +1,257 @@ -
/* MODULE 3 */ +/* MODULE 3 */ /* COUNT */ /* 1. Count the number of products */ - + SELECT COUNT(product_id) as num_of_prods + FROM product; -/* 2. How many products per product_qty_type */ - +/* 2. How many products per/by product_qty_type */ +SELECT product_qty_type +,COUNT(product_id) as num_of_prods +FROM product +GROUP BY product_qty_type; /* 3. How many products per product_qty_type and per their product_size */ +SELECT product_size +,product_qty_type +,COUNT(product_id) as num_of_prods +FROM product +GROUP BY product_size, product_qty_type; /* COUNT DISTINCT 4. How many unique products were bought */ + + SELECT count(DISTINCT product_id) as bought_products + FROM customer_purchases; -/* MODULE 3 */ +/* MODULE 3 */ /* SUM & AVG */ /* 1. How much did customers spend each day */ +SELECT +market_date +,customer_id +,SUM(quantity*cost_to_customer_per_qty) as total_cost +FROM customer_purchases +GROUP BY market_date, customer_id; /* 2. How much does each customer spend on average */ +SELECT +customer_first_name +,customer_last_name +,customer_postal_code +,ROUND(AVG(quantity*cost_to_customer_per_qty),2) as total_cost + +FROM customer_purchases as cp +INNER JOIN customer as c + ON c.customer_id = cp.customer_id +GROUP BY c.customer_id -- this represents the single row that customer_first and customer_last_name are using -/* MODULE 3 */ +/* MODULE 3 */ /* MIN & MAX */ /* 1. What is the most expensive product ...pay attention to how it doesn't handle ties very well */ +SELECT +product_name +,max(original_price) as most_expensive + +FROM product p +INNER JOIN vendor_inventory vi + ON p.product_id = vi.product_id; /* 2. Prove that max is working */ +SELECT DISTINCT +product_name +,original_price +FROM product p +INNER JOIN vendor_inventory vi + ON p.product_id = vi.product_id +ORDER BY original_price DESC; /* 3. Find the minimum price per each product_qty_type */ +SELECT +product_name +,product_qty_type +,min(original_price) as least_expensive +FROM product p +INNER JOIN vendor_inventory vi + ON p.product_id = vi.product_id +GROUP BY product_qty_type + +ORDER BY product_qty_type ASC, original_price ASC; /* 4. Prove that min is working */ +SELECT DISTINCT +product_name +,product_qty_type +,original_price + +FROM product p +INNER JOIN vendor_inventory vi + ON p.product_id = vi.product_id +ORDER BY product_qty_type ASC, original_price ASC; /* 5. Min/max on a string ... not particularly useful? */ +SELECT min(customer_first_name) +FROM customer -/* MODULE 3 */ +/* MODULE 3 */ /* Arithmitic */ /* 1. power, pi(), ceiling, division, integer division, etc */ SELECT - +power(2,3) as [power] +,pi() as [pi] +,10.0 / 3.0 as division +,cast(10.0 as INT) / cast(3.0 as int) as integer_division +,ceiling(4.5) as [ceilign]; /* 2. Every even vendor_id with modulo */ +SELECT * +FROM vendor +WHERE vendor_id % 2 = 0; /* 3. What about every third? */ +SELECT * -/* MODULE 3 */ +FROM vendor +WHERE vendor_id % 3 = 1; +/* MODULE 3 */ /* HAVING */ /* 1. How much did a customer spend on each day? Filter to customer_id between 1 and 5 and total_cost > 50 ... What order of execution occurs?*/ + +SELECT -- fifth +market_date +,customer_id +,SUM(quantity*cost_to_customer_per_qty) as total_spend + +FROM customer_purchases -- first +WHERE customer_id BETWEEN 1 AND 5 --filtering the non-aggregated values, second + +GROUP BY market_date, customer_id -- third +HAVING total_spend > 50; -- filtering the aggreated values (total spend), fourth /* 2. How many products were bought? Filter to number of purchases between 300 and 500 */ +SELECT +count(product_id) as number_of_products +,product_id -/* MODULE 3 */ +FROM customer_purchases + +GROUP BY product_id +HAVING count(product_id) BETWEEN 300 AND 500 + + +/* MODULE 3 */ /* Subquery FROM */ /*1. Simple subquery in a FROM statement, e.g. for inflation ...we could imagine joining this to a more complex query perhaps */ +SELECT DISTINCT product_id, inflation + +FROM ( + SELECT product_id + ,cost_to_customer_per_qty + ,CASE + WHEN cost_to_customer_per_qty < '1.00' + THEN cost_to_customer_per_qty*5 + ELSE cost_to_customer_per_qty END as inflation + + FROM customer_purchases +); /* 2. What is the single item that has been bought in the greatest quantity?*/ +SELECT product_name +,max(quantity_purchased) +FROM product p +INNER JOIN ( + SELECT product_id + ,COUNT(quantity) as quantity_purchased + + FROM customer_purchases + GROUP BY product_id -/* MODULE 3 */ +) x ON p.product_id = x.product_id + +/* MODULE 3 */ /* Subquery WHERE */ /* 1. How much did each customer spend at each vendor for each day at the market WHEN IT RAINS */ - +SELECT +market_date +,customer_id +,vendor_id +,SUM(quantity*cost_to_customer_per_qty) as total_cost + +FROM customer_purchases +WHERE market_date IN + ( + SELECT market_date + FROM market_date_info + WHERE market_rain_flag = 1 + ) + +GROUP BY +market_date +,customer_id +,vendor_id; /* 2. What is the name of the vendor who sells pie */ +SELECT DISTINCT vendor_name -/* MODULE 3 */ -/* Common Table Expression (CTE) */ - - -/* 1. Calculate sales per vendor per day */ -SELECT - +FROM vendor v +INNER JOIN vendor_inventory vi + ON v.vendor_id = vi.vendor_id +WHERE product_id IN ( + SELECT product_id + FROM product + WHERE product_name LIKE '%pie%' +) -/* ... re-aggregate the daily sales for each WEEK instead now */ - -/* MODULE 3 */ +/* MODULE 3 */ /* Temp Tables */ @@ -134,28 +267,89 @@ DROP TABLE IF EXISTS temp.new_vendor_inventory; CREATE TABLE temp.new_vendor_inventory AS -- definition of the table +SELECT * +,original_price * 5 as inflation +FROM vendor_inventory; - +SELECT * FROM new_vendor_inventory; /* 2. put the previous table into another temp table, e.g. as temp.new_new_vendor_inventory */ +DROP TABLE IF EXISTS temp.new_new_vendor_inventory; + +CREATE TABLE temp.new_new_vendor_inventory AS +SELECT * +,inflation*2 as super_inflation + +FROM new_vendor_inventory; + +SELECT * FROM new_new_vendor_inventory + +/* MODULE 3 */ +/* Common Table Expression (CTE) */ + + +/* 1. Calculate sales per vendor per day */ + +WITH vendor_daily_sales AS ( + SELECT + md.market_date, + market_day, + market_week, + market_year, + vendor_name, + SUM(quantity*cost_to_customer_per_qty) as sales + + + FROM customer_purchases cp + INNER JOIN market_date_info md + ON cp.market_date = md.market_date + INNER JOIN vendor v + ON v.vendor_id = cp.vendor_id + + GROUP BY md.market_date, v.vendor_id +), + + -- if we want another CTE .... add a comma but not another WITH +new_customer_result AS ( + SELECT * FROM customer +) + + +/* ... re-aggregate the daily sales for each WEEK instead now */ +SELECT +market_year +,market_week +,vendor_name +,SUM(sales) + +FROM vendor_daily_sales + +GROUP BY market_year, market_week, vendor_name + /* MODULE 3 */ /* Date functions */ /* 1. now */ -SELECT - +SELECT DISTINCT +DATE('now') as [now] +,DATETIME() as [right_now] +,datetime('now','localtime') as [right_now_timezone] /* 2. strftime */ - - +,strftime('%Y-%m','now') as this_year_this_month +,strftime('%Y/%m/%d', '2025-08-10','+50 days') as the_future +,market_date +,strftime('%m-%d-%Y', market_date, '+30 days','-1 year') as the_past /* 3. adding dates, e.g. last date of the month */ - +-- last date of the month +,DATE(market_date,'start of month','-1 day') as end_of_previous_month +,DATE(market_date, 'start of month','-1 day','start of month') as start_of_previous_month /* 4. difference between dates, @@ -163,4 +357,11 @@ SELECT b. number of YEARS between now and market_date c. number of HOURS bewtween now and market_date */ + ,market_date + ,julianday('now') - julianday(market_date) as now_md_dd + ,(julianday('now') - julianday(market_date)) / 365.25 as now_md_dd_yrs + ,(julianday('now') - julianday(market_date)) * 24 as now_md_dd_hours + + FROM market_date_info + diff --git a/04_this_cohort/live_code/Cohort_8/module_3/subquery_FROM.sql b/04_this_cohort/live_code/Cohort_8/module_3/subquery_FROM.sql new file mode 100644 index 000000000..d544b52a6 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_3/subquery_FROM.sql @@ -0,0 +1,36 @@ +/* MODULE 3 */ +/* Subquery FROM */ + + +/*1. Simple subquery in a FROM statement, e.g. for inflation +...we could imagine joining this to a more complex query perhaps */ + +SELECT DISTINCT product_id, inflation + +FROM ( + + SELECT product_id + ,cost_to_customer_per_qty + ,CASE + WHEN cost_to_customer_per_qty < '1.00' + THEN cost_to_customer_per_qty*5 + ELSE cost_to_customer_per_qty END as inflation + + FROM customer_purchases +); + + +/* 2. What is the single item that has been bought in the greatest quantity?*/ +SELECT product_name +,max(quantity_purchased) + +FROM product p +INNER JOIN ( + SELECT product_id + ,COUNT(quantity) as quantity_purchased + + FROM customer_purchases + GROUP BY product_id + +) x ON p.product_id = x.product_id + diff --git a/04_this_cohort/live_code/Cohort_8/module_3/subquery_WHERE.sql b/04_this_cohort/live_code/Cohort_8/module_3/subquery_WHERE.sql new file mode 100644 index 000000000..71c0253bc --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_3/subquery_WHERE.sql @@ -0,0 +1,41 @@ +/* MODULE 3 */ +/* Subquery WHERE */ + + +/* 1. How much did each customer spend at each vendor for each day at the market WHEN IT RAINS */ + +SELECT +market_date +,customer_id +,vendor_id +,SUM(quantity*cost_to_customer_per_qty) as total_cost + +FROM customer_purchases +WHERE market_date IN + ( + SELECT market_date + FROM market_date_info + WHERE market_rain_flag = 1 + ) + +GROUP BY +market_date +,customer_id +,vendor_id; + + +/* 2. What is the name of the vendor who sells pie */ +SELECT DISTINCT vendor_name + +FROM vendor v +INNER JOIN vendor_inventory vi + ON v.vendor_id = vi.vendor_id + +WHERE product_id IN ( + SELECT product_id + FROM product + WHERE product_name LIKE '%pie%' +) + + + diff --git a/04_this_cohort/live_code/Cohort_8/module_4/FULL_OUTER_JOIN_FROM_UNION.sql b/04_this_cohort/live_code/Cohort_8/module_4/FULL_OUTER_JOIN_FROM_UNION.sql new file mode 100644 index 000000000..1d0bdfbfc --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_4/FULL_OUTER_JOIN_FROM_UNION.sql @@ -0,0 +1,45 @@ +/* MODULE 4 */ +/* UNION */ + +/* 1. Emulate a FULL OUTER JOIN with a UNION */ +DROP TABLE IF EXISTS temp.store1; +CREATE TEMP TABLE IF NOT EXISTS temp.store1 +( +costume TEXT, +quantity INT +); + +INSERT INTO temp.store1 +VALUES("tiger",6), + ("elephant",2), + ("princess", 4); + + +DROP TABLE IF EXISTS temp.store2; +CREATE TEMP TABLE IF NOT EXISTS temp.store2 +( +costume TEXT, +quantity INT +); + +INSERT INTO temp.store2 +VALUES("tiger",2), + ("dancer",7), + ("superhero", 5); + +SELECT s1.costume, s1.quantity as store1_quantity, s2.quantity as store2_quantity, 'top query' as location +FROM store1 s1 +LEFT JOIN store2 s2 + ON s1.costume = s2.costume + +UNION ALL -- allow for duplicates, because a FULL OUTER join would ALSO allow for duplicates + +SELECT s2.costume, s1.quantity , s2.quantity, 'bottom query' +FROM store2 s2 +LEFT JOIN store1 s1 + ON s1.costume = s2.costume +WHERE s1.costume IS NULL + + + + diff --git a/04_this_cohort/live_code/Cohort_8/module_4/INTERSECT_EXCEPT.sql b/04_this_cohort/live_code/Cohort_8/module_4/INTERSECT_EXCEPT.sql new file mode 100644 index 000000000..68fa370d2 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_4/INTERSECT_EXCEPT.sql @@ -0,0 +1,43 @@ +/* MODULE 4 */ +/* INTERSECT & EXCEPT */ + +/* 1. Find products that have been sold (e.g. are in customer purchases AND product) */ +SELECT product_id +FROM product +INTERSECT +SELECT product_id +FROM customer_purchases; + + +/* 2. Find products that have NOT been sold (e.g. are NOT in customer purchases even though in product) */ +SELECT product_name, x.product_id + +FROM ( + SELECT product_id + FROM product + EXCEPT + SELECT product_id + FROM customer_purchases +) x +INNER JOIN product p on x.product_id = p.product_id; + +/* 3. Directions matter... if we switch the order here: +products that do not exist, because no products purchased are NOT in the product table (e.g. are NOT in product even though in customer purchases)*/ + +SELECT product_id +FROM customer_purchases +EXCEPT +SELECT product_id +FROM product; + + +/* 4. We can remake the intersect with a WHERE subquery for more details ... */ + +SELECT * FROM product +WHERE product_id IN ( + SELECT product_id + FROM product + INTERSECT + SELECT product_id + FROM customer_purchases +) diff --git a/04_this_cohort/live_code/Cohort_8/module_4/NTILE.sql b/04_this_cohort/live_code/Cohort_8/module_4/NTILE.sql new file mode 100644 index 000000000..fa87c2552 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_4/NTILE.sql @@ -0,0 +1,30 @@ +/* MODULE 4 */ +/* Windowed functions: NTILE */ + + +/* 1. Calculate quartile, quntiles, and percentiles from vendor daily sales */ +SELECT * +,NTILE(4) OVER(PARTITION BY vendor_name ORDER BY sales) as [quartile] +,NTILE(5) OVER(PARTITION BY vendor_name ORDER BY sales) as [quintile] +,NTILE(100) OVER(PARTITION BY vendor_name ORDER BY sales) as [percentile] + +FROM ( + +-- vendor daily sales + SELECT + md.market_date + ,market_day + ,market_week + ,market_year + ,vendor_name + ,SUM(quantity*cost_to_customer_per_qty) AS sales + + FROM customer_purchases AS cp + JOIN market_date_info AS md + ON cp.market_date = md.market_date + JOIN vendor AS v + ON v.vendor_id = cp.vendor_id + + GROUP BY cp.market_date, v.vendor_id + +) x \ No newline at end of file diff --git a/04_this_cohort/live_code/Cohort_8/module_4/NULL_management.sql b/04_this_cohort/live_code/Cohort_8/module_4/NULL_management.sql new file mode 100644 index 000000000..b304f3525 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_4/NULL_management.sql @@ -0,0 +1,30 @@ +/* MODULE 4 */ +/* NULL Management */ + + +/* 1. IFNULL: Missing product_size, missing product_qty_type */ + +SELECT * +,IFNULL(product_size, 'Unknown') as new_product_size +,IFNULL(product_size,product_qty_type) as both_null + +/* 2. Coalesce */ +,coalesce(product_size,product_qty_type) as still_both_null +,coalesce(product_size, product_qty_type, 'missing') as new_col -- if the first value is null, then the second value, if that is null, then missing + +FROM product; + + +/* 3. NULLIF +finding values in the product_size column that are "blank" strings and setting them to NULL if they are blank */ +SELECT * +,coalesce(product_size, 'Unknown') as new_product_size +,NULLIF(product_size,'') as nullif_check +,coalesce(NULLIF(product_size,''),'Unknown') as better_product_size + +/* 4. NULLIF +filtering which rows are null or blank */ + +FROM product + +WHERE NULLIF(product_size,'') IS NULL diff --git a/04_this_cohort/live_code/Cohort_8/module_4/UNION.sql b/04_this_cohort/live_code/Cohort_8/module_4/UNION.sql new file mode 100644 index 000000000..0558a4de8 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_4/UNION.sql @@ -0,0 +1,48 @@ +/* MODULE 4 */ +/* UNION */ + +/* 1. Find the most and least expensive product by vendor with UNION (and row_number!) */ + + +SELECT +vendor_id +,product_id +,original_price +,rn_max as [row_number] + +FROM ( + + SELECT DISTINCT + vendor_id, + product_id, + original_price, + ROW_NUMBER() OVER(PARTITION BY vendor_id ORDER BY original_price DESC) as rn_max + + FROM vendor_inventory + +) +where rn_max = 1 + +UNION -- UNION, 5 rows; duplicate value for vendor_id 4 product_id 16, UNION ALL returned 6 rows with the duplicate + +SELECT +vendor_id +,product_id +,original_price +,rn_min + +FROM ( + + SELECT DISTINCT + vendor_id, + product_id, + original_price, + ROW_NUMBER() OVER(PARTITION BY vendor_id ORDER BY original_price ASC) as rn_min + + FROM vendor_inventory + +) +where rn_min = 1 + + + diff --git a/04_this_cohort/live_code/Cohort_8/module_4/module_4.sqbpro b/04_this_cohort/live_code/Cohort_8/module_4/module_4.sqbpro index b1f922c2e..72a706a62 100644 --- a/04_this_cohort/live_code/Cohort_8/module_4/module_4.sqbpro +++ b/04_this_cohort/live_code/Cohort_8/module_4/module_4.sqbpro @@ -1,23 +1,33 @@ -
/* MODULE 4 */ +/* MODULE 4 */ /* NULL Management */ /* 1. IFNULL: Missing product_size, missing product_qty_type */ +SELECT * +,IFNULL(product_size, 'Unknown') as new_product_size +,IFNULL(product_size,product_qty_type) as both_null /* 2. Coalesce */ +,coalesce(product_size,product_qty_type) as still_both_null +,coalesce(product_size, product_qty_type, 'missing') as new_col -- if the first value is null, then the second value, if that is null, then missing +FROM product; /* 3. NULLIF finding values in the product_size column that are "blank" strings and setting them to NULL if they are blank */ - - +SELECT * +,coalesce(product_size, 'Unknown') as new_product_size +,NULLIF(product_size,'') as nullif_check +,coalesce(NULLIF(product_size,''),'Unknown') as better_product_size /* 4. NULLIF filtering which rows are null or blank */ +FROM product +WHERE NULLIF(product_size,'') IS NULL /* MODULE 4 */ /* NULLIF Budget (example from the slides) */ @@ -59,12 +69,29 @@ FROM budgets https://learn.microsoft.com/en-us/sql/t-sql/language-elements/nullif-transact-sql?view=sql-server-ver17 */ -/* MODULE 4 */ +/* MODULE 4 */ /* Windowed functions: row_number */ /* 1. What product is the highest price per vendor */ +SELECT +x.*, product_name + +FROM ( + --inner QUERY + SELECT + vendor_id, + market_date, + product_id, + original_price, + ROW_NUMBER() OVER(PARTITION BY vendor_id ORDER BY original_price DESC) as price_rank + + FROM vendor_inventory +) x +INNER JOIN product p + ON x.product_id = p.product_id +WHERE price_rank = 1 /* See how this varies from using max due to the group by @@ -77,7 +104,7 @@ GROUP BY vendor_id--,product_id */ -/* MODULE 4 */ +/* MODULE 4 */ /* Windowed functions: dense_rank, rank, row_number */ @@ -102,15 +129,37 @@ VALUES (7, 230000), (8, 100000), (9, 165000), -(10, 100000); +(10, 100000), +(11, 90000); + +SELECT * +,ROW_NUMBER() OVER(ORDER BY salary desc) as [row_number] +,RANK() OVER(ORDER BY salary desc) as [rank] +,DENSE_RANK() OVER(ORDER BY salary desc) as [dense_rank] + +FROM row_rank_dense + + + -/* MODULE 4 */ + + + + + + +/* MODULE 4 */ /* Windowed functions: NTILE */ /* 1. Calculate quartile, quntiles, and percentiles from vendor daily sales */ +SELECT * +,NTILE(4) OVER(PARTITION BY vendor_name ORDER BY sales) as [quartile] +,NTILE(5) OVER(PARTITION BY vendor_name ORDER BY sales) as [quintile] +,NTILE(100) OVER(PARTITION BY vendor_name ORDER BY sales) as [percentile] +FROM ( -- vendor daily sales SELECT @@ -127,27 +176,58 @@ VALUES JOIN vendor AS v ON v.vendor_id = cp.vendor_id - GROUP BY cp.market_date, v.vendor_id/* MODULE 4 */ + GROUP BY cp.market_date, v.vendor_id + +) x/* MODULE 4 */ /* String Manipulations */ /* 1. ltrim, rtrim, trim*/ -SELECT +SELECT DISTINCT +LTRIM(' THOMAS ROSENTHAL ') as [ltrim] +,RTRIM(' THOMAS ROSENTHAL ') as [rtrim] +,TRIM(' THOMAS ROSENTHAL ') as [trim] /* 2. replace*/ +,REPLACE('THOMAS ROSENTHAL','A','') as [not_my_name] +,customer_first_name +,REPLACE(customer_first_name,'a','') as new_customer_names +,REPLACE(REPLACE(customer_first_name,'a',''),'e','') as more_cust_names +,REPLACE(customer_first_name,'A','') as new_customer_names /* 3. upper, lower*/ /* 4. concat with || */ +,UPPER(customer_first_name) || ' ' || LOWER(customer_last_name) as FULL_name +,NULL || customer_first_name as [always_null] -- if you have nulls in a column with concat...you will have an issue /* 5. substr */ +,customer_first_name +,SUBSTR(customer_first_name,1,4) as first_four +,SUBSTR(customer_first_name,-5,4) as five_four /* 6. length */ +,LENGTH(customer_last_name) as last_name_length /* 7. unicode, char */ +,' + +THOMAS +ROSENTHAL + +' as linebreak +,TRIM(REPLACE(' + +THOMAS +ROSENTHAL + +',char(10), ' ')) as better_line_break +FROM customer -/* 8. REGEXP in a WHERE statement *//* MODULE 4 */ +/* 8. REGEXP in a WHERE statement */ + +WHERE customer_first_name REGEXP '(a)$' -- filtering to only end in A ... must be valid regex/* MODULE 4 */ /* Substring & instring together */ @@ -167,11 +247,54 @@ SELECT INSTR('FirstWord, SecondWord, ThirdWord',',')+1)) ,',') + INSTR('FirstWord, SecondWord, ThirdWord',',')+1) AS ThirdDelim -/* MODULE 4 */ +/* MODULE 4 */ /* UNION */ /* 1. Find the most and least expensive product by vendor with UNION (and row_number!) */ + +SELECT +vendor_id +,product_id +,original_price +,rn_max as [row_number] + +FROM ( + + SELECT DISTINCT + vendor_id, + product_id, + original_price, + ROW_NUMBER() OVER(PARTITION BY vendor_id ORDER BY original_price DESC) as rn_max + + FROM vendor_inventory + +) +where rn_max = 1 + +UNION -- UNION, 5 rows; duplicate value for vendor_id 4 product_id 16, UNION ALL returned 6 rows with the duplicate + +SELECT +vendor_id +,product_id +,original_price +,rn_min + +FROM ( + + SELECT DISTINCT + vendor_id, + product_id, + original_price, + ROW_NUMBER() OVER(PARTITION BY vendor_id ORDER BY original_price ASC) as rn_min + + FROM vendor_inventory + +) +where rn_min = 1 + + + /* MODULE 4 */ /* UNION */ @@ -184,9 +307,9 @@ quantity INT ); INSERT INTO temp.store1 -VALUES("tiger",6), - ("elephant",2), - ("princess", 4); +VALUES("tiger",6), + ("elephant",2), + ("princess", 4); DROP TABLE IF EXISTS temp.store2; @@ -197,25 +320,68 @@ quantity INT ); INSERT INTO temp.store2 -VALUES("tiger",2), - ("dancer",7), - ("superhero", 5);/* MODULE 4 */ +VALUES("tiger",2), + ("dancer",7), + ("superhero", 5); + +SELECT s1.costume, s1.quantity as store1_quantity, s2.quantity as store2_quantity, 'top query' as location +FROM store1 s1 +LEFT JOIN store2 s2 + ON s1.costume = s2.costume + +UNION ALL -- allow for duplicates, because a FULL OUTER join would ALSO allow for duplicates + +SELECT s2.costume, s1.quantity , s2.quantity, 'bottom query' +FROM store2 s2 +LEFT JOIN store1 s1 + ON s1.costume = s2.costume +WHERE s1.costume IS NULL + + +/* MODULE 4 */ /* INTERSECT & EXCEPT */ /* 1. Find products that have been sold (e.g. are in customer purchases AND product) */ - +SELECT product_id +FROM product +INTERSECT +SELECT product_id +FROM customer_purchases; /* 2. Find products that have NOT been sold (e.g. are NOT in customer purchases even though in product) */ +SELECT product_name, x.product_id - +FROM ( + SELECT product_id + FROM product + EXCEPT + SELECT product_id + FROM customer_purchases +) x +INNER JOIN product p on x.product_id = p.product_id; /* 3. Directions matter... if we switch the order here: products that do not exist, because no products purchased are NOT in the product table (e.g. are NOT in product even though in customer purchases)*/ +SELECT product_id +FROM customer_purchases +EXCEPT +SELECT product_id +FROM product; /* 4. We can remake the intersect with a WHERE subquery for more details ... */ +SELECT * FROM product +WHERE product_id IN ( + SELECT product_id + FROM product + INTERSECT + SELECT product_id + FROM customer_purchases +) + + - + \ No newline at end of file diff --git a/04_this_cohort/live_code/Cohort_8/module_4/nullif_budget.sql b/04_this_cohort/live_code/Cohort_8/module_4/nullif_budget.sql new file mode 100644 index 000000000..f9cdc7914 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_4/nullif_budget.sql @@ -0,0 +1,41 @@ +/* MODULE 4 */ +/* NULLIF Budget (example from the slides) */ + +/* The following example creates a budgets table to show a department (dept) +...its current budget (current_year) and its previous budget (previous_year). + +For the current year, NULL is used for departments with budgets that have not changed from the previous year, +and 0 is used for budgets that have not yet been determined. + +To find out the average of only those departments that receive a budget and to include the budget value +from the previous year (use the previous_year value, where the current_year is NULL), +combine the NULLIF and COALESCE functions. */ + +DROP TABLE IF EXISTS temp.budgets; +CREATE TEMP TABLE IF NOT EXISTS temp.budgets ( +dept STRING +,current_year INT +,previous_year INT +); + + +INSERT INTO temp.budgets VALUES +('software',1000,1000) +, ('candles',NULL,500) +, ('coffee', 400, 200) +, ('pencils',0, 50); + + +/*examine each of these columns */ +SELECT +NULLIF(current_year, previous_year) +--,NULLIF(COALESCE(current_year, previous_year), 0.00) +--, +--AVG(NULLIF(COALESCE(current_year, previous_year), 0.00)) +FROM budgets + + +/* more NULLIF here: +https://learn.microsoft.com/en-us/sql/t-sql/language-elements/nullif-transact-sql?view=sql-server-ver17 +*/ + diff --git a/04_this_cohort/live_code/Cohort_8/module_4/row_number.sql b/04_this_cohort/live_code/Cohort_8/module_4/row_number.sql new file mode 100644 index 000000000..fbbb5fa47 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_4/row_number.sql @@ -0,0 +1,35 @@ +/* MODULE 4 */ +/* Windowed functions: row_number */ + + +/* 1. What product is the highest price per vendor */ +SELECT +x.*, product_name + +FROM ( + --inner QUERY + SELECT + vendor_id, + market_date, + product_id, + original_price, + ROW_NUMBER() OVER(PARTITION BY vendor_id ORDER BY original_price DESC) as price_rank + + FROM vendor_inventory +) x +INNER JOIN product p + ON x.product_id = p.product_id + +WHERE price_rank = 1 + + +/* See how this varies from using max due to the group by +SELECT vendor_id, +--product_id, +MAX(original_price) + +FROM vendor_inventory +GROUP BY vendor_id--,product_id + +*/ + diff --git a/04_this_cohort/live_code/Cohort_8/module_4/row_rank_dense.sql b/04_this_cohort/live_code/Cohort_8/module_4/row_rank_dense.sql new file mode 100644 index 000000000..923ae1bbf --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_4/row_rank_dense.sql @@ -0,0 +1,45 @@ +/* MODULE 4 */ +/* Windowed functions: dense_rank, rank, row_number */ + + +/* 1. Compare dense_rank, rank, and row_number */ + +DROP TABLE IF EXISTS TEMP.row_rank_dense; + +CREATE TEMP TABLE IF NOT EXISTS TEMP.row_rank_dense +( +emp_id INT, +salary INT +); + +INSERT INTO temp.row_rank_dense +VALUES +(1,200000), +(2,200000), +(3, 160000), +(4, 120000), +(5, 125000), +(6, 165000), +(7, 230000), +(8, 100000), +(9, 165000), +(10, 100000), +(11, 90000); + +SELECT * +,ROW_NUMBER() OVER(ORDER BY salary desc) as [row_number] +,RANK() OVER(ORDER BY salary desc) as [rank] +,DENSE_RANK() OVER(ORDER BY salary desc) as [dense_rank] + +FROM row_rank_dense + + + + + + + + + + + diff --git a/04_this_cohort/live_code/Cohort_8/module_4/string_manipulations.sql b/04_this_cohort/live_code/Cohort_8/module_4/string_manipulations.sql new file mode 100644 index 000000000..171bc3390 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_4/string_manipulations.sql @@ -0,0 +1,50 @@ +/* MODULE 4 */ +/* String Manipulations */ + + +/* 1. ltrim, rtrim, trim*/ +SELECT DISTINCT +LTRIM(' THOMAS ROSENTHAL ') as [ltrim] +,RTRIM(' THOMAS ROSENTHAL ') as [rtrim] +,TRIM(' THOMAS ROSENTHAL ') as [trim] + +/* 2. replace*/ +,REPLACE('THOMAS ROSENTHAL','A','') as [not_my_name] +,customer_first_name +,REPLACE(customer_first_name,'a','') as new_customer_names +,REPLACE(REPLACE(customer_first_name,'a',''),'e','') as more_cust_names +,REPLACE(customer_first_name,'A','') as new_customer_names + +/* 3. upper, lower*/ + +/* 4. concat with || */ +,UPPER(customer_first_name) || ' ' || LOWER(customer_last_name) as FULL_name +,NULL || customer_first_name as [always_null] -- if you have nulls in a column with concat...you will have an issue + +/* 5. substr */ +,customer_first_name +,SUBSTR(customer_first_name,1,4) as first_four +,SUBSTR(customer_first_name,-5,4) as five_four + +/* 6. length */ +,LENGTH(customer_last_name) as last_name_length + +/* 7. unicode, char */ +,' + +THOMAS +ROSENTHAL + +' as linebreak +,TRIM(REPLACE(' + +THOMAS +ROSENTHAL + +',char(10), ' ')) as better_line_break + +FROM customer + +/* 8. REGEXP in a WHERE statement */ + +WHERE customer_first_name REGEXP '(a)$' -- filtering to only end in A ... must be valid regex \ No newline at end of file diff --git a/04_this_cohort/live_code/Cohort_8/module_4/substr_instr_combination_query.sql b/04_this_cohort/live_code/Cohort_8/module_4/substr_instr_combination_query.sql new file mode 100644 index 000000000..559d58e59 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_4/substr_instr_combination_query.sql @@ -0,0 +1,20 @@ +/* MODULE 4 */ +/* Substring & instring together */ + + +/* think of this as a comma delimiter ... but it's a bit silly ... do this in python/R instead unless you have to */ + +SELECT +'FirstWord, SecondWord, ThirdWord', + SUBSTR('FirstWord, SecondWord, ThirdWord',0, INSTR('FirstWord, SecondWord, ThirdWord',',')) as FirstDelim + --,SUBSTR('FirstWord, SecondWord, ThirdWord',0, 10) as FirstDelim -- same thing but not dynamic + ,SUBSTR('FirstWord, SecondWord, ThirdWord', + INSTR('FirstWord, SecondWord, ThirdWord',',')+1, + INSTR('FirstWord, SecondWord, ThirdWord',',')+1) as SecondDelim + + ,SUBSTR('FirstWord, SecondWord, ThirdWord', + INSTR( + (SUBSTR('FirstWord, SecondWord, ThirdWord', + INSTR('FirstWord, SecondWord, ThirdWord',',')+1)) + ,',') + + INSTR('FirstWord, SecondWord, ThirdWord',',')+1) AS ThirdDelim diff --git a/04_this_cohort/live_code/Cohort_8/module_5/CROSS_JOIN.sql b/04_this_cohort/live_code/Cohort_8/module_5/CROSS_JOIN.sql new file mode 100644 index 000000000..3fab373ff --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_5/CROSS_JOIN.sql @@ -0,0 +1,23 @@ +/* MODULE 5 */ +/* CROSS JOIN */ + + +/* 1. CROSS JOIN sizes with product*/ + +DROP TABLE IF EXISTS TEMP.sizes; +CREATE TEMP TABLE IF NOT EXISTS TEMP.sizes (size TEXT); + +INSERT INTO TEMP.sizes +VALUES('small'), +('medium'), +('large'); + +SELECT * FROM TEMP.sizes; + + +SELECT product_id, product_name, size +FROM temp.sizes +CROSS JOIN product + + + diff --git a/04_this_cohort/live_code/Cohort_8/module_5/INSERT_UPDATE_DELETE.sql b/04_this_cohort/live_code/Cohort_8/module_5/INSERT_UPDATE_DELETE.sql new file mode 100644 index 000000000..9910e0fc6 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_5/INSERT_UPDATE_DELETE.sql @@ -0,0 +1,36 @@ +/* MODULE 5 */ +/* INSERT UPDATE DELETE */ + + +DROP TABLE IF EXISTS temp.product_expanded; +CREATE TEMP TABLE product_expanded AS + SELECT * FROM product; + +--SELECT * FROM product_expanded + +/* 1. add a product to the temp table */ +--INSERT +INSERT INTO product_expanded +VALUES(24, 'Almonds', '1 lbs', 3, 'lbs'); + +SELECT * FROM product_expanded; + +/* 2. change the product_size for THAT product */ +--UPDATE +--change the product_size for almonds to 1/2 kg +UPDATE product_expanded +SET product_size = '1/2 kg', product_qty_type = 'kg' +--SELECT * FROM product_expanded +WHERE product_id = 24; + +SELECT * FROM product_expanded; + +/* 3. delete the newly added product */ +--DELETE +DELETE FROM product_expanded +--SELECT * FROM product_expanded -- write this first, it can help you to determine you are looking at the right rows before deleting +WHERE product_id = 24; + +SELECT * FROM product_expanded; + + diff --git a/04_this_cohort/live_code/Cohort_8/module_5/JSON_TO_TABLE.sql b/04_this_cohort/live_code/Cohort_8/module_5/JSON_TO_TABLE.sql new file mode 100644 index 000000000..b15f47c7d --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_5/JSON_TO_TABLE.sql @@ -0,0 +1,35 @@ +--JSON to a TABLE + +--create a temp TABLE +--insert the json as a long string +--write a json_each statement +--use the json_each statement as a subquery to extract our column values +-- now we have a table! + +DROP TABLE IF EXISTS temp.[new_json]; +CREATE TEMP TABLE IF NOT EXISTS temp.new_json +( +the_json BLOB -- the column and the column type +); + +INSERT INTO temp.new_json +VALUES( +'[ + { + "country": "Afghanistan", + "city": "Kabul" + }, + { + "country": "Albania", + "city": "Tirana" + }]' + ); + +SELECT key +,JSON_EXTRACT(value,'$.country') as country +,JSON_EXTRACT(value,'$.city') as city + +FROM ( + SELECT * + FROM new_json,JSON_EACH(new_json.col1, '$') + ) x diff --git a/04_this_cohort/live_code/Cohort_8/module_5/SELF_JOIN.sql b/04_this_cohort/live_code/Cohort_8/module_5/SELF_JOIN.sql new file mode 100644 index 000000000..636c5ec25 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_5/SELF_JOIN.sql @@ -0,0 +1,26 @@ +/* MODULE 5 */ +/* SELF JOIN */ + + +/* 1. Create a self-joining hierarchy */ + +DROP TABLE IF EXISTS TEMP.employees; +CREATE TEMP TABLE TEMP.employees +( +emp_id INT +,emp_name text +,mgr_id INT +); + +INSERT INTO TEMP.employees +VALUES(1,'Thomas',3) +,(2,'Niyaz', 4) +,(3,'Rohan', NULL) +,(4, 'Jennie',3); + +SELECT * FROM TEMP.employees; + +SELECT e.emp_name,m.emp_name as mgr_name +FROM temp.employees e +LEFT JOIN temp.employees m + ON e.mgr_id = m.emp_id diff --git a/04_this_cohort/live_code/Cohort_8/module_5/make_the_view_dynamic.sql b/04_this_cohort/live_code/Cohort_8/module_5/make_the_view_dynamic.sql new file mode 100644 index 000000000..3ca8b0116 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_5/make_the_view_dynamic.sql @@ -0,0 +1,54 @@ +/* MODULE 5 */ +/* DYNAMIC VIEW */ + +DROP VIEW IF EXISTS todays_vendor_daily_sales; +CREATE VIEW IF NOT EXISTS todays_vendor_daily_sales AS + + SELECT + md.market_date + ,market_day + ,market_week + ,market_year + ,vendor_name + ,SUM(quantity*cost_to_customer_per_qty) as sales + + + FROM market_date_info md -- days the market is open ... + INNER JOIN ( + SELECT * FROM customer_purchases + UNION + SELECT * FROM new_customer_purchases + ) cp + ON md.market_date = cp.market_date + INNER JOIN vendor v + ON cp.vendor_id = v.vendor_id + + WHERE md.market_date = DATE('now','localtime') -- "today", if the timezone is not to localtime + + GROUP BY cp.market_date, v.vendor_id; + + + + + + +/* spoilers below */ + + + + + + + + +-- THIS ONLY WORKS IF YOU HAVE DONE THE PROPER STEPS FOR IMPORTING +-- 1) update new_customer_purchases to today +-- 2) add the union +-- 3) add the where statement +-- 4) update the market_date_info to include today + +SELECT * FROM todays_vendor_daily_sales + + + + diff --git a/04_this_cohort/live_code/Cohort_8/module_5/module_5.sqbpro b/04_this_cohort/live_code/Cohort_8/module_5/module_5.sqbpro index f608313e0..2e3e8c29d 100644 --- a/04_this_cohort/live_code/Cohort_8/module_5/module_5.sqbpro +++ b/04_this_cohort/live_code/Cohort_8/module_5/module_5.sqbpro @@ -1,4 +1,4 @@ -
/* MODULE 5 */ +/* MODULE 5 */ /* INSERT UPDATE DELETE */ @@ -9,18 +9,39 @@ CREATE TEMP TABLE product_expanded AS --SELECT * FROM product_expanded /* 1. add a product to the temp table */ +--INSERT +INSERT INTO product_expanded +VALUES(24, 'Almonds', '1 lbs', 3, 'lbs'); - +SELECT * FROM product_expanded; /* 2. change the product_size for THAT product */ +--UPDATE +--change the product_size for almonds to 1/2 kg +UPDATE product_expanded +SET product_size = '1/2 kg', product_qty_type = 'kg' +--SELECT * FROM product_expanded +WHERE product_id = 24; + +SELECT * FROM product_expanded; +/* 3. delete the newly added product */ +--DELETE +DELETE FROM product_expanded +--SELECT * FROM product_expanded -- write this first, it can help you to determine you are looking at the right rows before deleting +WHERE product_id = 24; +SELECT * FROM product_expanded; -/* 3. delete the newly added product *//* MODULE 5 */ + +/* MODULE 5 */ /* VIEW */ /* 1. Create a vendor daily sales view */ +DROP VIEW IF EXISTS vendor_daily_sales; +CREATE VIEW IF NOT EXISTS vendor_daily_sales AS + SELECT md.market_date ,market_day @@ -37,41 +58,55 @@ CREATE TEMP TABLE product_expanded AS ON cp.vendor_id = v.vendor_id GROUP BY cp.market_date, v.vendor_id; + +SELECT * FROM vendor_daily_sales -/* MODULE 5 */ +/* MODULE 5 */ /* VIEW in another query */ /* 1. Transform the daily sales view into a sales by vendor per week result */ +SELECT +market_year +,market_week +,vendor_name +,SUM(sales) as sales +FROM vendor_daily_sales -/* MODULE 5 */ -/* UPDATE statements for view */ - - -/* 1. SET market_date equal to today for new_customer_purchases */ - - +GROUP BY +market_date +,market_week +,vendor_name - -/* 2. Add today's info to the market_date_info - -we need to add -1. today's date -2. today's day -3. today's week number -4. today's year - -INSERT INTO market_date_info -VALUES('....','....','....','....','8:00 AM','2:00 PM','nothing interesting','Summer','25','28',0,0); - -*/ - -/* MODULE 5 */ +/* MODULE 5 */ /* DYNAMIC VIEW */ +DROP VIEW IF EXISTS todays_vendor_daily_sales; +CREATE VIEW IF NOT EXISTS todays_vendor_daily_sales AS - + SELECT + md.market_date + ,market_day + ,market_week + ,market_year + ,vendor_name + ,SUM(quantity*cost_to_customer_per_qty) as sales + + + FROM market_date_info md -- days the market is open ... + INNER JOIN ( + SELECT * FROM customer_purchases + UNION + SELECT * FROM new_customer_purchases + ) cp + ON md.market_date = cp.market_date + INNER JOIN vendor v + ON cp.vendor_id = v.vendor_id + + WHERE md.market_date = DATE('now','localtime') -- "today", if the timezone is not to localtime + + GROUP BY cp.market_date, v.vendor_id; @@ -87,25 +122,46 @@ VALUES('....','....','....','....','8:00 AM','2:00 PM','nothing interesting','Su +-- THIS ONLY WORKS IF YOU HAVE DONE THE PROPER STEPS FOR IMPORTING +-- 1) update new_customer_purchases to today +-- 2) add the union +-- 3) add the where statement +-- 4) update the market_date_info to include today +SELECT * FROM todays_vendor_daily_sales +/* MODULE 5 */ +/* UPDATE statements for view */ +/* 1. SET market_date equal to today for new_customer_purchases */ +UPDATE new_customer_purchases +SET market_date = '2025-11-19'; +SELECT * FROM new_customer_purchases --- THIS ONLY WORKS IF YOU HAVE DONE THE PROPER STEPS FOR IMPORTING --- 1) update new_customer_purchases to today --- 2) add the union --- 3) add the where statement --- 4) update the market_date_info to include today +/* 2. Add today's info to the market_date_info +we need to add +1. today's date +2. today's day +3. today's week number +4. today's year -/* MODULE 5 */ +INSERT INTO market_date_info +VALUES('....','....','....','....','8:00 AM','2:00 PM','nothing interesting','Summer','25','28',0,0); + +*/ + +INSERT INTO market_date_info +VALUES('2025-11-19','Wednesday','47','2025','8:00 AM','2:00 PM','nothing interesting','Winter','1','-6',0,0); + +/* MODULE 5 */ /* CROSS JOIN */ @@ -122,8 +178,13 @@ VALUES('small'), SELECT * FROM TEMP.sizes; +SELECT product_id, product_name, size +FROM temp.sizes +CROSS JOIN product + -/* MODULE 5 */ + +/* MODULE 5 */ /* SELF JOIN */ @@ -144,4 +205,9 @@ VALUES(1,'Thomas',3) ,(4, 'Jennie',3); SELECT * FROM TEMP.employees; - + +SELECT e.emp_name,m.emp_name as mgr_name +FROM temp.employees e +LEFT JOIN temp.employees m + ON e.mgr_id = m.emp_id + diff --git a/04_this_cohort/live_code/Cohort_8/module_5/update_statements_for_view.sql b/04_this_cohort/live_code/Cohort_8/module_5/update_statements_for_view.sql new file mode 100644 index 000000000..87be95b80 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_5/update_statements_for_view.sql @@ -0,0 +1,28 @@ +/* MODULE 5 */ +/* UPDATE statements for view */ + + +/* 1. SET market_date equal to today for new_customer_purchases */ +UPDATE new_customer_purchases +SET market_date = '2025-11-19'; + +SELECT * FROM new_customer_purchases + + + +/* 2. Add today's info to the market_date_info + +we need to add +1. today's date +2. today's day +3. today's week number +4. today's year + +INSERT INTO market_date_info +VALUES('....','....','....','....','8:00 AM','2:00 PM','nothing interesting','Summer','25','28',0,0); + +*/ + +INSERT INTO market_date_info +VALUES('2025-11-19','Wednesday','47','2025','8:00 AM','2:00 PM','nothing interesting','Winter','1','-6',0,0); + diff --git a/04_this_cohort/live_code/Cohort_8/module_5/vendor_daily_sales_view.sql b/04_this_cohort/live_code/Cohort_8/module_5/vendor_daily_sales_view.sql new file mode 100644 index 000000000..c0b6bc6d8 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_5/vendor_daily_sales_view.sql @@ -0,0 +1,27 @@ +/* MODULE 5 */ +/* VIEW */ + +/* 1. Create a vendor daily sales view */ + +DROP VIEW IF EXISTS vendor_daily_sales; +CREATE VIEW IF NOT EXISTS vendor_daily_sales AS + + SELECT + md.market_date + ,market_day + ,market_week + ,market_year + ,vendor_name + ,SUM(quantity*cost_to_customer_per_qty) as sales + + + FROM market_date_info md + INNER JOIN customer_purchases cp + ON md.market_date = cp.market_date + INNER JOIN vendor v + ON cp.vendor_id = v.vendor_id + + GROUP BY cp.market_date, v.vendor_id; + +SELECT * FROM vendor_daily_sales + diff --git a/04_this_cohort/live_code/Cohort_8/module_5/view_in_another_query.sql b/04_this_cohort/live_code/Cohort_8/module_5/view_in_another_query.sql new file mode 100644 index 000000000..aaf9b1b6a --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_5/view_in_another_query.sql @@ -0,0 +1,18 @@ +/* MODULE 5 */ +/* VIEW in another query */ + +/* 1. Transform the daily sales view into a sales by vendor per week result */ + +SELECT +market_year +,market_week +,vendor_name +,SUM(sales) as sales + +FROM vendor_daily_sales + +GROUP BY +market_date +,market_week +,vendor_name + diff --git a/04_this_cohort/live_code/Cohort_8/module_6/1nf.sql b/04_this_cohort/live_code/Cohort_8/module_6/1nf.sql new file mode 100644 index 000000000..f57239761 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_6/1nf.sql @@ -0,0 +1,18 @@ +--1nf +drop table if exists temp.hold; +CREATE TABLE temp.hold AS +SELECT DISTINCT +name, +OS, +SUBSTR(software, 1, INSTR(software,',')-1) AS s1, +SUBSTR(software,INSTR(software,',')+1, INSTR(SUBSTR(software, INSTR(software, ',')+1),',')-1) as s2, +SUBSTR(software,INSTR(SUBSTR(software,INSTR(software,',')+1),',')+INSTR(software,',')+1) as s3, +supervisor + +FROM skills; + +SELECT name,OS,s1 as software, supervisor FROM hold +UNION +SELECT name,OS,s2 as software, supervisor FROM hold +UNION +SELECT name,OS,s3 as software, supervisor FROM hold \ No newline at end of file diff --git a/04_this_cohort/live_code/Cohort_8/module_6/2nf.sql b/04_this_cohort/live_code/Cohort_8/module_6/2nf.sql new file mode 100644 index 000000000..234d98d9a --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_6/2nf.sql @@ -0,0 +1,52 @@ +-- 2nf +drop table if exists temp.student; +drop table if exists temp.supervisor; +drop table if exists temp.student_software; + +create temp table if not exists temp.supervisor +( +id INTEGER PRIMARY KEY AUTOINCREMENT, +name TEXT +); + +INSERT INTO temp.supervisor(name) +select distinct supervisor +from skills; + +create temp table if not exists temp.student +( +id INTEGER PRIMARY KEY AUTOINCREMENT, +name TEXT, +OS TEXT, +supervisor_id INTEGER, +CONSTRAINT "fk_supervisor_id" FOREIGN KEY ("supervisor_id") REFERENCES "supervisor" ("id") +) + +INSERT INTO student(name, OS, supervisor_id) +SELECT DISTINCT +h.name +,OS +,s.id AS supervisor_id + +FROM hold h +JOIN supervisor s + on h.supervisor = s.name + +CREATE TABLE temp.student_software AS +SELECT id, software + +FROM student s +JOIN ( + SELECT name,OS,s1 as software, supervisor FROM hold + UNION + SELECT name,OS,s2 as software, supervisor FROM hold + UNION + SELECT name,OS,s3 as software, supervisor FROM hold +) u +ON s.name = u.name + +--select * from student +--select * from supervisor +select * from student_software + + diff --git a/04_this_cohort/live_code/Cohort_8/module_6/3nf.sql b/04_this_cohort/live_code/Cohort_8/module_6/3nf.sql new file mode 100644 index 000000000..d5f037013 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_6/3nf.sql @@ -0,0 +1,37 @@ +--3nf +drop table if exists temp.OS; +drop table if exists temp.software; +create temp table if not exists temp.OS +( +OS_id INTEGER, +OS TEXT, +win_only TEXT +); + +insert into temp.OS +values(1,"win","TRUE"), + (2,"mac","FALSE"); + + +create temp table if not exists temp.software +( +software_id INTEGER PRIMARY KEY AUTOINCREMENT, +software TEXT, +win_only TEXT +); + +INSERT INTO temp.software(software, win_only) +SELECT DISTINCT software, win_only +FROM student_software s +CROSS JOIN ( + SELECT * FROM OS WHERE OS = 'mac' +); + +UPDATE software +SET win_only = 'TRUE' +WHERE software.software = ' MSSQL'; + +SELECT * FROM OS +--SELECT * FROM software + + diff --git a/04_this_cohort/live_code/Cohort_8/module_6/SQLite_and_python.ipynb b/04_this_cohort/live_code/Cohort_8/module_6/SQLite_and_python.ipynb new file mode 100644 index 000000000..9fddb799a --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_6/SQLite_and_python.ipynb @@ -0,0 +1,717 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "05e1dbf0", + "metadata": {}, + "source": [ + "# Connect to FarmersMarket.db" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "f1d8cb62", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import sqlite3\n", + "#set your location, slash direction will change for windows and mac\n", + "DB = '/Users/thomas/Documents/GitHub/02-intro_sql/05_src/sql/farmersmarket.db' \n", + "#establish your connection\n", + "conn = sqlite3.connect(DB, isolation_level=None,\n", + " detect_types=sqlite3.PARSE_COLNAMES)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "1204e343", + "metadata": {}, + "outputs": [], + "source": [ + "#run your query, use \"\\\" to allow line breaks\n", + "db_df = pd.read_sql_query(\"SELECT p.*,pc.product_category_name \\\n", + " FROM product p \\\n", + " JOIN product_category pc \\\n", + " ON p.product_category_id = pc.product_category_id\"\n", + " ,conn)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "5c7863ee-08cd-4095-b80a-61f82425bd2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
product_idproduct_nameproduct_sizeproduct_category_idproduct_qty_typeproduct_category_name
01Habanero Peppers - Organicmedium1lbsFresh Fruits & Vegetables
12Jalapeno Peppers - Organicsmall1lbsFresh Fruits & Vegetables
23Poblano Peppers - Organiclarge1unitFresh Fruits & Vegetables
34Banana Peppers - Jar8 oz3unitPackaged Prepared Food
45Whole Wheat Bread1.5 lbs3unitPackaged Prepared Food
56Cut Zinnias Bouquetmedium5unitPlants & Flowers
67Apple Pie10\"3unitPackaged Prepared Food
79Sweet Potatoesmedium1lbsFresh Fruits & Vegetables
810Eggs1 dozen6unitEggs & Meat (Fresh or Frozen)
911Pork Chops1 lb6lbsEggs & Meat (Fresh or Frozen)
1012Baby Salad Lettuce Mix - Bag1/2 lb1unitFresh Fruits & Vegetables
1113Baby Salad Lettuce Mix1 lb1lbsFresh Fruits & Vegetables
1214Red PotatoesNone1NoneFresh Fruits & Vegetables
1315Red Potatoes - Small1NoneFresh Fruits & Vegetables
1416Sweet CornEar1unitFresh Fruits & Vegetables
1517Carrotssold by weight1lbsFresh Fruits & Vegetables
1618Carrots - Organicbunch1unitFresh Fruits & Vegetables
1719Farmer's Market Resuable Shopping Bagmedium7unitNon-Edible Products
1820Homemade Beeswax Candles6\"7unitNon-Edible Products
1921Organic Cherry Tomatoespint1unitFresh Fruits & Vegetables
2022Roma Tomatoesmedium1lbsFresh Fruits & Vegetables
2123Maple Syrup - Jar8 oz2unitPackaged Pantry Goods
228Cherry Pie10\"3unitPackaged Prepared Food
\n", + "" + ], + "text/plain": [ + " product_id product_name product_size \\\n", + "0 1 Habanero Peppers - Organic medium \n", + "1 2 Jalapeno Peppers - Organic small \n", + "2 3 Poblano Peppers - Organic large \n", + "3 4 Banana Peppers - Jar 8 oz \n", + "4 5 Whole Wheat Bread 1.5 lbs \n", + "5 6 Cut Zinnias Bouquet medium \n", + "6 7 Apple Pie 10\" \n", + "7 9 Sweet Potatoes medium \n", + "8 10 Eggs 1 dozen \n", + "9 11 Pork Chops 1 lb \n", + "10 12 Baby Salad Lettuce Mix - Bag 1/2 lb \n", + "11 13 Baby Salad Lettuce Mix 1 lb \n", + "12 14 Red Potatoes None \n", + "13 15 Red Potatoes - Small \n", + "14 16 Sweet Corn Ear \n", + "15 17 Carrots sold by weight \n", + "16 18 Carrots - Organic bunch \n", + "17 19 Farmer's Market Resuable Shopping Bag medium \n", + "18 20 Homemade Beeswax Candles 6\" \n", + "19 21 Organic Cherry Tomatoes pint \n", + "20 22 Roma Tomatoes medium \n", + "21 23 Maple Syrup - Jar 8 oz \n", + "22 8 Cherry Pie 10\" \n", + "\n", + " product_category_id product_qty_type product_category_name \n", + "0 1 lbs Fresh Fruits & Vegetables \n", + "1 1 lbs Fresh Fruits & Vegetables \n", + "2 1 unit Fresh Fruits & Vegetables \n", + "3 3 unit Packaged Prepared Food \n", + "4 3 unit Packaged Prepared Food \n", + "5 5 unit Plants & Flowers \n", + "6 3 unit Packaged Prepared Food \n", + "7 1 lbs Fresh Fruits & Vegetables \n", + "8 6 unit Eggs & Meat (Fresh or Frozen) \n", + "9 6 lbs Eggs & Meat (Fresh or Frozen) \n", + "10 1 unit Fresh Fruits & Vegetables \n", + "11 1 lbs Fresh Fruits & Vegetables \n", + "12 1 None Fresh Fruits & Vegetables \n", + "13 1 None Fresh Fruits & Vegetables \n", + "14 1 unit Fresh Fruits & Vegetables \n", + "15 1 lbs Fresh Fruits & Vegetables \n", + "16 1 unit Fresh Fruits & Vegetables \n", + "17 7 unit Non-Edible Products \n", + "18 7 unit Non-Edible Products \n", + "19 1 unit Fresh Fruits & Vegetables \n", + "20 1 lbs Fresh Fruits & Vegetables \n", + "21 2 unit Packaged Pantry Goods \n", + "22 3 unit Packaged Prepared Food " + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db_df" + ] + }, + { + "cell_type": "markdown", + "id": "8b7c36c0", + "metadata": {}, + "source": [ + "Export the query:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "ee17555e", + "metadata": {}, + "outputs": [], + "source": [ + "#save\n", + "db_df.to_csv('database-py.CSV', index=False)" + ] + }, + { + "cell_type": "markdown", + "id": "ed14b573", + "metadata": {}, + "source": [ + "# Run a SQL query with pandasql" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ac82fb05", + "metadata": {}, + "outputs": [], + "source": [ + "#!pip install pandasql" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "4f783bd4", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import pandasql as sql #this allows us to run SQLite queries!\n", + "p = \"https://raw.githubusercontent.com/allisonhorst/palmerpenguins/master/inst/extdata/penguins.csv\"\n", + "penguins = pd.read_csv(p) #create a dataframe\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "7892f454", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
speciesislandbill_length_mmbill_depth_mmflipper_length_mmbody_mass_gsexyear
0AdelieTorgersen39.118.7181.03750.0male2007
1AdelieTorgersen39.517.4186.03800.0female2007
2AdelieTorgersen40.318.0195.03250.0female2007
3AdelieTorgersenNaNNaNNaNNaNNaN2007
4AdelieTorgersen36.719.3193.03450.0female2007
...........................
339ChinstrapDream55.819.8207.04000.0male2009
340ChinstrapDream43.518.1202.03400.0female2009
341ChinstrapDream49.618.2193.03775.0male2009
342ChinstrapDream50.819.0210.04100.0male2009
343ChinstrapDream50.218.7198.03775.0female2009
\n", + "

344 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " species island bill_length_mm bill_depth_mm flipper_length_mm \\\n", + "0 Adelie Torgersen 39.1 18.7 181.0 \n", + "1 Adelie Torgersen 39.5 17.4 186.0 \n", + "2 Adelie Torgersen 40.3 18.0 195.0 \n", + "3 Adelie Torgersen NaN NaN NaN \n", + "4 Adelie Torgersen 36.7 19.3 193.0 \n", + ".. ... ... ... ... ... \n", + "339 Chinstrap Dream 55.8 19.8 207.0 \n", + "340 Chinstrap Dream 43.5 18.1 202.0 \n", + "341 Chinstrap Dream 49.6 18.2 193.0 \n", + "342 Chinstrap Dream 50.8 19.0 210.0 \n", + "343 Chinstrap Dream 50.2 18.7 198.0 \n", + "\n", + " body_mass_g sex year \n", + "0 3750.0 male 2007 \n", + "1 3800.0 female 2007 \n", + "2 3250.0 female 2007 \n", + "3 NaN NaN 2007 \n", + "4 3450.0 female 2007 \n", + ".. ... ... ... \n", + "339 4000.0 male 2009 \n", + "340 3400.0 female 2009 \n", + "341 3775.0 male 2009 \n", + "342 4100.0 male 2009 \n", + "343 3775.0 female 2009 \n", + "\n", + "[344 rows x 8 columns]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "penguins" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "8036d336", + "metadata": {}, + "outputs": [], + "source": [ + "yrly_penguins = sql.sqldf('''SELECT DISTINCT year, COUNT(*) AS count, \n", + " SUM(COUNT(*)) OVER (ORDER BY year) AS running_total\n", + " FROM penguins\n", + " GROUP BY year''') #run a SQLite query with sqldf()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "80fd4dd6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearcountrunning_total
02007110110
12008114224
22009120344
\n", + "
" + ], + "text/plain": [ + " year count running_total\n", + "0 2007 110 110\n", + "1 2008 114 224\n", + "2 2009 120 344" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "yrly_penguins" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0cd3de3f-fb4f-46ac-ad42-23971226e5d0", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/04_this_cohort/live_code/Cohort_8/module_6/denormalized.sql b/04_this_cohort/live_code/Cohort_8/module_6/denormalized.sql new file mode 100644 index 000000000..da4208587 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_6/denormalized.sql @@ -0,0 +1,14 @@ +-- normal forms creation + +drop table if exists temp.skills; +create temp table if not exists temp.skills +( +name TEXT, +OS TEXT, +software TEXT, +supervisor TEXT +); + +insert into temp.skills +values("A","win","VSCode, MSSQL, RStudio", "Eric Yu"), + ("Thomas","mac", "Spyder, SQLite, RStudio", "Rohan Alexander"); diff --git a/04_this_cohort/live_code/Cohort_8/module_6/penguins_in_python_sql.sql b/04_this_cohort/live_code/Cohort_8/module_6/penguins_in_python_sql.sql new file mode 100644 index 000000000..2326c1d29 --- /dev/null +++ b/04_this_cohort/live_code/Cohort_8/module_6/penguins_in_python_sql.sql @@ -0,0 +1,9 @@ +select * from penguins; + +-- how many penguins were identified each year +SELECT DISTINCT year +,COUNT(*) AS count +,SUM(COUNT(*)) OVER (ORDER BY year) AS running_total + + FROM penguins +GROUP BY year \ No newline at end of file diff --git a/04_this_cohort/live_code/DC/module_4/FULL_OUTER_JOIN_WITH_UNION.sql b/04_this_cohort/live_code/DC/module_4/FULL_OUTER_JOIN_WITH_UNION.sql new file mode 100644 index 000000000..5f07038bd --- /dev/null +++ b/04_this_cohort/live_code/DC/module_4/FULL_OUTER_JOIN_WITH_UNION.sql @@ -0,0 +1,44 @@ +/* MODULE 4 */ +/* UNION */ + +/* 1. Emulate a FULL OUTER JOIN with a UNION */ +DROP TABLE IF EXISTS temp.store1; +CREATE TEMP TABLE IF NOT EXISTS temp.store1 +( +costume TEXT, +quantity INT +); + +INSERT INTO temp.store1 +VALUES("tiger",6), + ("elephant",2), + ("princess", 4); + + +DROP TABLE IF EXISTS temp.store2; +CREATE TEMP TABLE IF NOT EXISTS temp.store2 +( +costume TEXT, +quantity INT +); + +INSERT INTO temp.store2 +VALUES("tiger",2), + ("dancer",7), + ("superhero", 5); + +SELECT s1.costume, s1.quantity as store1_quantity, s2.quantity as store2_quantity +FROM store1 s1 +LEFT JOIN store2 s2 + ON s1.costume = s2.costume + +UNION ALL + +SELECT s2.costume,s1.quantity,s2.quantity +FROM store2 s2 +LEFT JOIN store1 s1 + ON s1.costume = s2.costume +WHERE s1.costume IS NULL + +ORDER BY s1.quantity DESC, s2.quantity + \ No newline at end of file diff --git a/04_this_cohort/live_code/DC/module_4/INTERSECT_EXCEPT.sql b/04_this_cohort/live_code/DC/module_4/INTERSECT_EXCEPT.sql new file mode 100644 index 000000000..7b7afe9b9 --- /dev/null +++ b/04_this_cohort/live_code/DC/module_4/INTERSECT_EXCEPT.sql @@ -0,0 +1,50 @@ +/* MODULE 4 */ +/* INTERSECT & EXCEPT */ + +/* 1. Find products that have been sold (e.g. are in customer purchases AND product) */ +--direction does not matter + +SELECT product_id +FROM customer_purchases +INTERSECT +SELECT product_id +FROM product; + + +/* 2. Find products that have NOT been sold (e.g. are NOT in customer purchases even though in product) */ +--direction matters +SELECT x.product_id, product_name +FROM ( + SELECT product_id + FROM product + EXCEPT + SELECT product_id + FROM customer_purchases +) x +JOIN product p on x.product_id = p.product_id; + + +/* 3. Directions matter... if we switch the order here: +products that do not exist, because no products purchased are NOT in the product table (e.g. are NOT in product even though in customer purchases)*/ + +--returning 0 rows +SELECT product_id +FROM customer_purchases +EXCEPT +SELECT product_id +FROM product; + + +/* 4. We can remake the intersect with a WHERE subquery for more details ... */ + +SELECT * +FROM product +WHERE product_id IN + ( + SELECT product_id + FROM customer_purchases + INTERSECT + SELECT product_id + FROM product + ) + diff --git a/04_this_cohort/live_code/DC/module_4/UNION_UNION_ALL.sql b/04_this_cohort/live_code/DC/module_4/UNION_UNION_ALL.sql new file mode 100644 index 000000000..7fc8ba8d2 --- /dev/null +++ b/04_this_cohort/live_code/DC/module_4/UNION_UNION_ALL.sql @@ -0,0 +1,32 @@ +/* MODULE 4 */ +/* UNION */ + +/* 1. Find the most and least expensive product by vendor with UNION (and row_number!) */ + +SELECT vendor_id, product_id, original_price, rn_max as [row_number] +FROM ( + + SELECT + vendor_id, + product_id, + original_price, + ROW_NUMBER() OVER(PARTITION BY vendor_id ORDER BY original_price DESC) as rn_max + + FROM vendor_inventory +) +WHERE rn_max = 1 + +UNION --union returned 5 rows....union all returned 6 rows (vendor #4 was duplicated) + +SELECT vendor_id, product_id, original_price, rn_min +FROM ( + + SELECT + vendor_id, + product_id, + original_price, + ROW_NUMBER() OVER(PARTITION BY vendor_id ORDER BY original_price ASC) as rn_min + + FROM vendor_inventory +) +WHERE rn_min = 1 \ No newline at end of file diff --git a/04_this_cohort/live_code/DC/module_4/module_4.sqbpro b/04_this_cohort/live_code/DC/module_4/module_4.sqbpro index 1a017f61b..14f953a1a 100644 --- a/04_this_cohort/live_code/DC/module_4/module_4.sqbpro +++ b/04_this_cohort/live_code/DC/module_4/module_4.sqbpro @@ -1,4 +1,4 @@ -
/* MODULE 4 */ +/* MODULE 4 */ /* NULL Management */ @@ -77,7 +77,7 @@ FROM budgets https://learn.microsoft.com/en-us/sql/t-sql/language-elements/nullif-transact-sql?view=sql-server-ver17 */ -/* MODULE 4 */ +/* MODULE 4 */ /* Windowed functions: row_number */ @@ -132,7 +132,7 @@ GROUP BY vendor_id--,product_id ) x WHERE x.sales_rank = 1 -ORDER BY cost desc/* MODULE 4 */ +ORDER BY cost desc/* MODULE 4 */ /* Windowed functions: dense_rank, rank, row_number */ @@ -169,7 +169,7 @@ SELECT * FROM row_rank_dense -/* MODULE 4 */ +/* MODULE 4 */ /* Windowed functions: NTILE */ @@ -196,7 +196,7 @@ FROM ( ON v.vendor_id = cp.vendor_id GROUP BY cp.market_date, v.vendor_id -) x/* MODULE 4 */ +) x/* MODULE 4 */ /* String Manipulations */ @@ -272,55 +272,4 @@ SELECT INSTR('FirstWord, SecondWord, ThirdWord',',')+1)) ,',') + INSTR('FirstWord, SecondWord, ThirdWord',',')+1) AS ThirdDelim -/* MODULE 4 */ -/* UNION */ - -/* 1. Find the most and least expensive product by vendor with UNION (and row_number!) */ - -/* MODULE 4 */ -/* UNION */ - -/* 1. Emulate a FULL OUTER JOIN with a UNION */ -DROP TABLE IF EXISTS temp.store1; -CREATE TEMP TABLE IF NOT EXISTS temp.store1 -( -costume TEXT, -quantity INT -); - -INSERT INTO temp.store1 -VALUES("tiger",6), - ("elephant",2), - ("princess", 4); - - -DROP TABLE IF EXISTS temp.store2; -CREATE TEMP TABLE IF NOT EXISTS temp.store2 -( -costume TEXT, -quantity INT -); - -INSERT INTO temp.store2 -VALUES("tiger",2), - ("dancer",7), - ("superhero", 5);/* MODULE 4 */ -/* INTERSECT & EXCEPT */ - -/* 1. Find products that have been sold (e.g. are in customer purchases AND product) */ - - - -/* 2. Find products that have NOT been sold (e.g. are NOT in customer purchases even though in product) */ - - - -/* 3. Directions matter... if we switch the order here: -products that do not exist, because no products purchased are NOT in the product table (e.g. are NOT in product even though in customer purchases)*/ - - - -/* 4. We can remake the intersect with a WHERE subquery for more details ... */ - - - +-- Reference to file "/Users/thomas/Documents/GitHub/02-intro_sql/04_this_cohort/live_code/DC/module_4/UNION_UNION_ALL.sql" (not supported by this version) ---- Reference to file "/Users/thomas/Documents/GitHub/02-intro_sql/04_this_cohort/live_code/DC/module_4/FULL_OUTER_JOIN_WITH_UNION.sql" (not supported by this version) ---- Reference to file "/Users/thomas/Documents/GitHub/02-intro_sql/04_this_cohort/live_code/DC/module_4/INTERSECT_EXCEPT.sql" (not supported by this version) -- diff --git a/04_this_cohort/live_code/DC/module_5/CROSS_JOIN.sql b/04_this_cohort/live_code/DC/module_5/CROSS_JOIN.sql new file mode 100644 index 000000000..5ecd918e5 --- /dev/null +++ b/04_this_cohort/live_code/DC/module_5/CROSS_JOIN.sql @@ -0,0 +1,24 @@ +/* MODULE 5 */ +/* CROSS JOIN */ + + +/* 1. CROSS JOIN sizes with product*/ + +DROP TABLE IF EXISTS TEMP.sizes; +CREATE TEMP TABLE IF NOT EXISTS TEMP.sizes (size TEXT); + +INSERT INTO TEMP.sizes +VALUES('small'), +('medium'), +('large'); + +SELECT * FROM TEMP.sizes; + +SELECT product_name, product_qty_type, size +FROM product -- 23 rows +CROSS JOIN temp.sizes -- 3 rows +-- 3*23 = 69 rows for the cartesian product +--WHERE product_qty_type = 'unit' -- maybe makes more sense, but reduces the number of rows + + + diff --git a/04_this_cohort/live_code/DC/module_5/DYNAMIC_VIEW.sql b/04_this_cohort/live_code/DC/module_5/DYNAMIC_VIEW.sql new file mode 100644 index 000000000..ddd0edae9 --- /dev/null +++ b/04_this_cohort/live_code/DC/module_5/DYNAMIC_VIEW.sql @@ -0,0 +1,62 @@ +/* MODULE 5 */ +/* DYNAMIC VIEW */ + + +DROP VIEW IF EXISTS todays_vendor_daily_sales; +CREATE VIEW IF NOT EXISTS todays_vendor_daily_sales AS + + SELECT + md.market_date + ,market_day + ,market_week + ,market_year + ,vendor_name + ,SUM(quantity*cost_to_customer_per_qty) as sales + + + FROM market_date_info md + INNER JOIN + (SELECT * FROM customer_purchases + UNION + SELECT * FROM new_customer_purchases + ) cp + ON md.market_date = cp.market_date + INNER JOIN vendor v + ON cp.vendor_id = v.vendor_id + + WHERE md.market_date = DATE('now', 'localtime') -- if the timezone not set + + GROUP BY cp.market_date, v.vendor_id + + + + + + +/* spoilers below */ + + + + + + + + + + + + + + + + + +-- THIS ONLY WORKS IF YOU HAVE DONE THE PROPER STEPS FOR IMPORTING +-- 1) update new_customer_purchases to today +-- 2) add the union +-- 3) add the where statement +-- 4) update the market_date_info to include today + + + + diff --git a/04_this_cohort/live_code/DC/module_5/FIRST_VIEW.sql b/04_this_cohort/live_code/DC/module_5/FIRST_VIEW.sql new file mode 100644 index 000000000..7bedda654 --- /dev/null +++ b/04_this_cohort/live_code/DC/module_5/FIRST_VIEW.sql @@ -0,0 +1,27 @@ +/* MODULE 5 */ +/* VIEW */ + +/* 1. Create a vendor daily sales view */ +DROP VIEW IF EXISTS vendor_daily_sales; +CREATE VIEW IF NOT EXISTS vendor_daily_sales AS + + SELECT + md.market_date + ,market_day + ,market_week + ,market_year + ,vendor_name + ,SUM(quantity*cost_to_customer_per_qty) as sales + + + FROM market_date_info md + INNER JOIN customer_purchases cp + ON md.market_date = cp.market_date + INNER JOIN vendor v + ON cp.vendor_id = v.vendor_id + + GROUP BY cp.market_date, v.vendor_id + + + + diff --git a/04_this_cohort/live_code/DC/module_5/INSERT_UPDATE_DELETE.sql b/04_this_cohort/live_code/DC/module_5/INSERT_UPDATE_DELETE.sql new file mode 100644 index 000000000..0314fbf88 --- /dev/null +++ b/04_this_cohort/live_code/DC/module_5/INSERT_UPDATE_DELETE.sql @@ -0,0 +1,32 @@ +/* MODULE 5 */ +/* INSERT UPDATE DELETE */ + + +DROP TABLE IF EXISTS temp.product_expanded; +CREATE TEMP TABLE product_expanded AS + SELECT * FROM product; + +--SELECT * FROM product_expanded + +/* 1. add a product to the temp table */ +--INSERT +INSERT INTO product_expanded +VALUES(24,'Almonds','1 lb',3,'lbs'); + + +/* 2. change the product_size for THAT product */ +--UPDATE +-- almonds to 1/2 kg +UPDATE product_expanded +SET product_size = '1/2 kg', product_qty_type = 'kg' +WHERE product_id = 24; + + +/* 3. delete the newly added product */ +DELETE FROM product_expanded +--SELECT * FROM product_expanded -- can help you determine you are looking at the right rows before running a deletion +WHERE product_id = 24; -- if you remove this, all data will be removed from the table + +SELECT * FROM product_expanded + + diff --git a/04_this_cohort/live_code/DC/module_5/SELF_JOIN.sql b/04_this_cohort/live_code/DC/module_5/SELF_JOIN.sql new file mode 100644 index 000000000..24fbbd48e --- /dev/null +++ b/04_this_cohort/live_code/DC/module_5/SELF_JOIN.sql @@ -0,0 +1,27 @@ +/* MODULE 5 */ +/* SELF JOIN */ + + +/* 1. Create a self-joining hierarchy */ + +DROP TABLE IF EXISTS TEMP.employees; +CREATE TEMP TABLE TEMP.employees +( +emp_id INT +,emp_name text +,mgr_id INT +); + +INSERT INTO TEMP.employees +VALUES(1,'Thomas',3) +,(2,'Niyaz', [3,4]) +,(3,'Rohan', NULL) +,(4, 'Jennie',3); + +SELECT * FROM TEMP.employees; + +SELECT e.emp_name, m.emp_name as mgr_name +FROM temp.employees e +LEFT JOIN temp.employees m + on e.mgr_id = m.emp_id + diff --git a/04_this_cohort/live_code/DC/module_5/UPDATE_DYNAMIC_VIEW.sql b/04_this_cohort/live_code/DC/module_5/UPDATE_DYNAMIC_VIEW.sql new file mode 100644 index 000000000..bb98b078c --- /dev/null +++ b/04_this_cohort/live_code/DC/module_5/UPDATE_DYNAMIC_VIEW.sql @@ -0,0 +1,25 @@ +/* MODULE 5 */ +/* UPDATE statements for view */ + + +/* 1. SET market_date equal to today for new_customer_purchases */ +UPDATE new_customer_purchases +SET market_date = DATE('now'); + + + +/* 2. Add today's info to the market_date_info + +we need to add +1. today's date +2. today's day +3. today's week number +4. today's year + +INSERT INTO market_date_info +VALUES('....','....','....','....','8:00 AM','2:00 PM','nothing interesting','Summer','25','28',0,0); + +*/ + INSERT INTO market_date_info +VALUES('2025-11-11','Tuesday','46','2025','8:00 AM','2:00 PM','nothing interesting','Winter','0','3',0,1); + diff --git a/04_this_cohort/live_code/DC/module_5/VIEW_IN_A_QUERY.sql b/04_this_cohort/live_code/DC/module_5/VIEW_IN_A_QUERY.sql new file mode 100644 index 000000000..4d9c19634 --- /dev/null +++ b/04_this_cohort/live_code/DC/module_5/VIEW_IN_A_QUERY.sql @@ -0,0 +1,20 @@ +/* MODULE 5 */ +/* VIEW in another query */ + +/* 1. Transform the daily sales view into a sales by vendor per week result */ + +SELECT +market_year +,market_week +,vendor_name +,SUM(sales) + + +FROM vendor_daily_sales + +GROUP BY +market_year +,market_week +,vendor_name + + diff --git a/04_this_cohort/live_code/DC/module_5/module_5.sqbpro b/04_this_cohort/live_code/DC/module_5/module_5.sqbpro index f608313e0..0bb5b8cc6 100644 --- a/04_this_cohort/live_code/DC/module_5/module_5.sqbpro +++ b/04_this_cohort/live_code/DC/module_5/module_5.sqbpro @@ -1,147 +1 @@ -
/* MODULE 5 */ -/* INSERT UPDATE DELETE */ - - -DROP TABLE IF EXISTS temp.product_expanded; -CREATE TEMP TABLE product_expanded AS - SELECT * FROM product; - ---SELECT * FROM product_expanded - -/* 1. add a product to the temp table */ - - - -/* 2. change the product_size for THAT product */ - - - -/* 3. delete the newly added product *//* MODULE 5 */ -/* VIEW */ - -/* 1. Create a vendor daily sales view */ - - SELECT - md.market_date - ,market_day - ,market_week - ,market_year - ,vendor_name - ,SUM(quantity*cost_to_customer_per_qty) as sales - - - FROM market_date_info md - INNER JOIN customer_purchases cp - ON md.market_date = cp.market_date - INNER JOIN vendor v - ON cp.vendor_id = v.vendor_id - - GROUP BY cp.market_date, v.vendor_id; - -/* MODULE 5 */ -/* VIEW in another query */ - -/* 1. Transform the daily sales view into a sales by vendor per week result */ - - - -/* MODULE 5 */ -/* UPDATE statements for view */ - - -/* 1. SET market_date equal to today for new_customer_purchases */ - - - - -/* 2. Add today's info to the market_date_info - -we need to add -1. today's date -2. today's day -3. today's week number -4. today's year - -INSERT INTO market_date_info -VALUES('....','....','....','....','8:00 AM','2:00 PM','nothing interesting','Summer','25','28',0,0); - -*/ - -/* MODULE 5 */ -/* DYNAMIC VIEW */ - - - - - - - - - -/* spoilers below */ - - - - - - - - - - - - - - - - - --- THIS ONLY WORKS IF YOU HAVE DONE THE PROPER STEPS FOR IMPORTING --- 1) update new_customer_purchases to today --- 2) add the union --- 3) add the where statement --- 4) update the market_date_info to include today - - - - -/* MODULE 5 */ -/* CROSS JOIN */ - - -/* 1. CROSS JOIN sizes with product*/ - -DROP TABLE IF EXISTS TEMP.sizes; -CREATE TEMP TABLE IF NOT EXISTS TEMP.sizes (size TEXT); - -INSERT INTO TEMP.sizes -VALUES('small'), -('medium'), -('large'); - -SELECT * FROM TEMP.sizes; - - - -/* MODULE 5 */ -/* SELF JOIN */ - - -/* 1. Create a self-joining hierarchy */ - -DROP TABLE IF EXISTS TEMP.employees; -CREATE TEMP TABLE TEMP.employees -( -emp_id INT -,emp_name text -,mgr_id INT -); - -INSERT INTO TEMP.employees -VALUES(1,'Thomas',3) -,(2,'Niyaz', 4) -,(3,'Rohan', NULL) -,(4, 'Jennie',3); - -SELECT * FROM TEMP.employees; -
+
-- Reference to file "/Users/thomas/Documents/GitHub/02-intro_sql/04_this_cohort/live_code/DC/module_5/INSERT_UPDATE_DELETE.sql" (not supported by this version) ---- Reference to file "/Users/thomas/Documents/GitHub/02-intro_sql/04_this_cohort/live_code/DC/module_5/FIRST_VIEW.sql" (not supported by this version) ---- Reference to file "/Users/thomas/Documents/GitHub/02-intro_sql/04_this_cohort/live_code/DC/module_5/VIEW_IN_A_QUERY.sql" (not supported by this version) ---- Reference to file "/Users/thomas/Documents/GitHub/02-intro_sql/04_this_cohort/live_code/DC/module_5/UPDATE_DYNAMIC_VIEW.sql" (not supported by this version) ---- Reference to file "/Users/thomas/Documents/GitHub/02-intro_sql/04_this_cohort/live_code/DC/module_5/DYNAMIC_VIEW.sql" (not supported by this version) ---- Reference to file "/Users/thomas/Documents/GitHub/02-intro_sql/04_this_cohort/live_code/DC/module_5/CROSS_JOIN.sql" (not supported by this version) ---- Reference to file "/Users/thomas/Documents/GitHub/02-intro_sql/04_this_cohort/live_code/DC/module_5/SELF_JOIN.sql" (not supported by this version) --
diff --git a/04_this_cohort/live_code/DC/module_6/1nf.sql b/04_this_cohort/live_code/DC/module_6/1nf.sql new file mode 100644 index 000000000..f57239761 --- /dev/null +++ b/04_this_cohort/live_code/DC/module_6/1nf.sql @@ -0,0 +1,18 @@ +--1nf +drop table if exists temp.hold; +CREATE TABLE temp.hold AS +SELECT DISTINCT +name, +OS, +SUBSTR(software, 1, INSTR(software,',')-1) AS s1, +SUBSTR(software,INSTR(software,',')+1, INSTR(SUBSTR(software, INSTR(software, ',')+1),',')-1) as s2, +SUBSTR(software,INSTR(SUBSTR(software,INSTR(software,',')+1),',')+INSTR(software,',')+1) as s3, +supervisor + +FROM skills; + +SELECT name,OS,s1 as software, supervisor FROM hold +UNION +SELECT name,OS,s2 as software, supervisor FROM hold +UNION +SELECT name,OS,s3 as software, supervisor FROM hold \ No newline at end of file diff --git a/04_this_cohort/live_code/DC/module_6/2nf.sql b/04_this_cohort/live_code/DC/module_6/2nf.sql new file mode 100644 index 000000000..234d98d9a --- /dev/null +++ b/04_this_cohort/live_code/DC/module_6/2nf.sql @@ -0,0 +1,52 @@ +-- 2nf +drop table if exists temp.student; +drop table if exists temp.supervisor; +drop table if exists temp.student_software; + +create temp table if not exists temp.supervisor +( +id INTEGER PRIMARY KEY AUTOINCREMENT, +name TEXT +); + +INSERT INTO temp.supervisor(name) +select distinct supervisor +from skills; + +create temp table if not exists temp.student +( +id INTEGER PRIMARY KEY AUTOINCREMENT, +name TEXT, +OS TEXT, +supervisor_id INTEGER, +CONSTRAINT "fk_supervisor_id" FOREIGN KEY ("supervisor_id") REFERENCES "supervisor" ("id") +) + +INSERT INTO student(name, OS, supervisor_id) +SELECT DISTINCT +h.name +,OS +,s.id AS supervisor_id + +FROM hold h +JOIN supervisor s + on h.supervisor = s.name + +CREATE TABLE temp.student_software AS +SELECT id, software + +FROM student s +JOIN ( + SELECT name,OS,s1 as software, supervisor FROM hold + UNION + SELECT name,OS,s2 as software, supervisor FROM hold + UNION + SELECT name,OS,s3 as software, supervisor FROM hold +) u +ON s.name = u.name + +--select * from student +--select * from supervisor +select * from student_software + + diff --git a/04_this_cohort/live_code/DC/module_6/3nf.sql b/04_this_cohort/live_code/DC/module_6/3nf.sql new file mode 100644 index 000000000..d5f037013 --- /dev/null +++ b/04_this_cohort/live_code/DC/module_6/3nf.sql @@ -0,0 +1,37 @@ +--3nf +drop table if exists temp.OS; +drop table if exists temp.software; +create temp table if not exists temp.OS +( +OS_id INTEGER, +OS TEXT, +win_only TEXT +); + +insert into temp.OS +values(1,"win","TRUE"), + (2,"mac","FALSE"); + + +create temp table if not exists temp.software +( +software_id INTEGER PRIMARY KEY AUTOINCREMENT, +software TEXT, +win_only TEXT +); + +INSERT INTO temp.software(software, win_only) +SELECT DISTINCT software, win_only +FROM student_software s +CROSS JOIN ( + SELECT * FROM OS WHERE OS = 'mac' +); + +UPDATE software +SET win_only = 'TRUE' +WHERE software.software = ' MSSQL'; + +SELECT * FROM OS +--SELECT * FROM software + + diff --git a/04_this_cohort/live_code/DC/module_6/SQLite_and_python.ipynb b/04_this_cohort/live_code/DC/module_6/SQLite_and_python.ipynb new file mode 100644 index 000000000..9fddb799a --- /dev/null +++ b/04_this_cohort/live_code/DC/module_6/SQLite_and_python.ipynb @@ -0,0 +1,717 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "05e1dbf0", + "metadata": {}, + "source": [ + "# Connect to FarmersMarket.db" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "f1d8cb62", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import sqlite3\n", + "#set your location, slash direction will change for windows and mac\n", + "DB = '/Users/thomas/Documents/GitHub/02-intro_sql/05_src/sql/farmersmarket.db' \n", + "#establish your connection\n", + "conn = sqlite3.connect(DB, isolation_level=None,\n", + " detect_types=sqlite3.PARSE_COLNAMES)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "1204e343", + "metadata": {}, + "outputs": [], + "source": [ + "#run your query, use \"\\\" to allow line breaks\n", + "db_df = pd.read_sql_query(\"SELECT p.*,pc.product_category_name \\\n", + " FROM product p \\\n", + " JOIN product_category pc \\\n", + " ON p.product_category_id = pc.product_category_id\"\n", + " ,conn)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "5c7863ee-08cd-4095-b80a-61f82425bd2e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
product_idproduct_nameproduct_sizeproduct_category_idproduct_qty_typeproduct_category_name
01Habanero Peppers - Organicmedium1lbsFresh Fruits & Vegetables
12Jalapeno Peppers - Organicsmall1lbsFresh Fruits & Vegetables
23Poblano Peppers - Organiclarge1unitFresh Fruits & Vegetables
34Banana Peppers - Jar8 oz3unitPackaged Prepared Food
45Whole Wheat Bread1.5 lbs3unitPackaged Prepared Food
56Cut Zinnias Bouquetmedium5unitPlants & Flowers
67Apple Pie10\"3unitPackaged Prepared Food
79Sweet Potatoesmedium1lbsFresh Fruits & Vegetables
810Eggs1 dozen6unitEggs & Meat (Fresh or Frozen)
911Pork Chops1 lb6lbsEggs & Meat (Fresh or Frozen)
1012Baby Salad Lettuce Mix - Bag1/2 lb1unitFresh Fruits & Vegetables
1113Baby Salad Lettuce Mix1 lb1lbsFresh Fruits & Vegetables
1214Red PotatoesNone1NoneFresh Fruits & Vegetables
1315Red Potatoes - Small1NoneFresh Fruits & Vegetables
1416Sweet CornEar1unitFresh Fruits & Vegetables
1517Carrotssold by weight1lbsFresh Fruits & Vegetables
1618Carrots - Organicbunch1unitFresh Fruits & Vegetables
1719Farmer's Market Resuable Shopping Bagmedium7unitNon-Edible Products
1820Homemade Beeswax Candles6\"7unitNon-Edible Products
1921Organic Cherry Tomatoespint1unitFresh Fruits & Vegetables
2022Roma Tomatoesmedium1lbsFresh Fruits & Vegetables
2123Maple Syrup - Jar8 oz2unitPackaged Pantry Goods
228Cherry Pie10\"3unitPackaged Prepared Food
\n", + "
" + ], + "text/plain": [ + " product_id product_name product_size \\\n", + "0 1 Habanero Peppers - Organic medium \n", + "1 2 Jalapeno Peppers - Organic small \n", + "2 3 Poblano Peppers - Organic large \n", + "3 4 Banana Peppers - Jar 8 oz \n", + "4 5 Whole Wheat Bread 1.5 lbs \n", + "5 6 Cut Zinnias Bouquet medium \n", + "6 7 Apple Pie 10\" \n", + "7 9 Sweet Potatoes medium \n", + "8 10 Eggs 1 dozen \n", + "9 11 Pork Chops 1 lb \n", + "10 12 Baby Salad Lettuce Mix - Bag 1/2 lb \n", + "11 13 Baby Salad Lettuce Mix 1 lb \n", + "12 14 Red Potatoes None \n", + "13 15 Red Potatoes - Small \n", + "14 16 Sweet Corn Ear \n", + "15 17 Carrots sold by weight \n", + "16 18 Carrots - Organic bunch \n", + "17 19 Farmer's Market Resuable Shopping Bag medium \n", + "18 20 Homemade Beeswax Candles 6\" \n", + "19 21 Organic Cherry Tomatoes pint \n", + "20 22 Roma Tomatoes medium \n", + "21 23 Maple Syrup - Jar 8 oz \n", + "22 8 Cherry Pie 10\" \n", + "\n", + " product_category_id product_qty_type product_category_name \n", + "0 1 lbs Fresh Fruits & Vegetables \n", + "1 1 lbs Fresh Fruits & Vegetables \n", + "2 1 unit Fresh Fruits & Vegetables \n", + "3 3 unit Packaged Prepared Food \n", + "4 3 unit Packaged Prepared Food \n", + "5 5 unit Plants & Flowers \n", + "6 3 unit Packaged Prepared Food \n", + "7 1 lbs Fresh Fruits & Vegetables \n", + "8 6 unit Eggs & Meat (Fresh or Frozen) \n", + "9 6 lbs Eggs & Meat (Fresh or Frozen) \n", + "10 1 unit Fresh Fruits & Vegetables \n", + "11 1 lbs Fresh Fruits & Vegetables \n", + "12 1 None Fresh Fruits & Vegetables \n", + "13 1 None Fresh Fruits & Vegetables \n", + "14 1 unit Fresh Fruits & Vegetables \n", + "15 1 lbs Fresh Fruits & Vegetables \n", + "16 1 unit Fresh Fruits & Vegetables \n", + "17 7 unit Non-Edible Products \n", + "18 7 unit Non-Edible Products \n", + "19 1 unit Fresh Fruits & Vegetables \n", + "20 1 lbs Fresh Fruits & Vegetables \n", + "21 2 unit Packaged Pantry Goods \n", + "22 3 unit Packaged Prepared Food " + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "db_df" + ] + }, + { + "cell_type": "markdown", + "id": "8b7c36c0", + "metadata": {}, + "source": [ + "Export the query:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "ee17555e", + "metadata": {}, + "outputs": [], + "source": [ + "#save\n", + "db_df.to_csv('database-py.CSV', index=False)" + ] + }, + { + "cell_type": "markdown", + "id": "ed14b573", + "metadata": {}, + "source": [ + "# Run a SQL query with pandasql" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ac82fb05", + "metadata": {}, + "outputs": [], + "source": [ + "#!pip install pandasql" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "4f783bd4", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import pandasql as sql #this allows us to run SQLite queries!\n", + "p = \"https://raw.githubusercontent.com/allisonhorst/palmerpenguins/master/inst/extdata/penguins.csv\"\n", + "penguins = pd.read_csv(p) #create a dataframe\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "7892f454", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
speciesislandbill_length_mmbill_depth_mmflipper_length_mmbody_mass_gsexyear
0AdelieTorgersen39.118.7181.03750.0male2007
1AdelieTorgersen39.517.4186.03800.0female2007
2AdelieTorgersen40.318.0195.03250.0female2007
3AdelieTorgersenNaNNaNNaNNaNNaN2007
4AdelieTorgersen36.719.3193.03450.0female2007
...........................
339ChinstrapDream55.819.8207.04000.0male2009
340ChinstrapDream43.518.1202.03400.0female2009
341ChinstrapDream49.618.2193.03775.0male2009
342ChinstrapDream50.819.0210.04100.0male2009
343ChinstrapDream50.218.7198.03775.0female2009
\n", + "

344 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " species island bill_length_mm bill_depth_mm flipper_length_mm \\\n", + "0 Adelie Torgersen 39.1 18.7 181.0 \n", + "1 Adelie Torgersen 39.5 17.4 186.0 \n", + "2 Adelie Torgersen 40.3 18.0 195.0 \n", + "3 Adelie Torgersen NaN NaN NaN \n", + "4 Adelie Torgersen 36.7 19.3 193.0 \n", + ".. ... ... ... ... ... \n", + "339 Chinstrap Dream 55.8 19.8 207.0 \n", + "340 Chinstrap Dream 43.5 18.1 202.0 \n", + "341 Chinstrap Dream 49.6 18.2 193.0 \n", + "342 Chinstrap Dream 50.8 19.0 210.0 \n", + "343 Chinstrap Dream 50.2 18.7 198.0 \n", + "\n", + " body_mass_g sex year \n", + "0 3750.0 male 2007 \n", + "1 3800.0 female 2007 \n", + "2 3250.0 female 2007 \n", + "3 NaN NaN 2007 \n", + "4 3450.0 female 2007 \n", + ".. ... ... ... \n", + "339 4000.0 male 2009 \n", + "340 3400.0 female 2009 \n", + "341 3775.0 male 2009 \n", + "342 4100.0 male 2009 \n", + "343 3775.0 female 2009 \n", + "\n", + "[344 rows x 8 columns]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "penguins" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "8036d336", + "metadata": {}, + "outputs": [], + "source": [ + "yrly_penguins = sql.sqldf('''SELECT DISTINCT year, COUNT(*) AS count, \n", + " SUM(COUNT(*)) OVER (ORDER BY year) AS running_total\n", + " FROM penguins\n", + " GROUP BY year''') #run a SQLite query with sqldf()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "80fd4dd6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearcountrunning_total
02007110110
12008114224
22009120344
\n", + "
" + ], + "text/plain": [ + " year count running_total\n", + "0 2007 110 110\n", + "1 2008 114 224\n", + "2 2009 120 344" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "yrly_penguins" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0cd3de3f-fb4f-46ac-ad42-23971226e5d0", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/04_this_cohort/live_code/DC/module_6/denormalized.sql b/04_this_cohort/live_code/DC/module_6/denormalized.sql new file mode 100644 index 000000000..da4208587 --- /dev/null +++ b/04_this_cohort/live_code/DC/module_6/denormalized.sql @@ -0,0 +1,14 @@ +-- normal forms creation + +drop table if exists temp.skills; +create temp table if not exists temp.skills +( +name TEXT, +OS TEXT, +software TEXT, +supervisor TEXT +); + +insert into temp.skills +values("A","win","VSCode, MSSQL, RStudio", "Eric Yu"), + ("Thomas","mac", "Spyder, SQLite, RStudio", "Rohan Alexander"); diff --git a/04_this_cohort/live_code/DC/module_6/penguins_in_python_sql.sql b/04_this_cohort/live_code/DC/module_6/penguins_in_python_sql.sql new file mode 100644 index 000000000..2326c1d29 --- /dev/null +++ b/04_this_cohort/live_code/DC/module_6/penguins_in_python_sql.sql @@ -0,0 +1,9 @@ +select * from penguins; + +-- how many penguins were identified each year +SELECT DISTINCT year +,COUNT(*) AS count +,SUM(COUNT(*)) OVER (ORDER BY year) AS running_total + + FROM penguins +GROUP BY year \ No newline at end of file diff --git a/05_src/sql/farmersmarket.db b/05_src/sql/farmersmarket.db index 4720f2483..f846637d1 100644 Binary files a/05_src/sql/farmersmarket.db and b/05_src/sql/farmersmarket.db differ diff --git a/README.md b/README.md index 4159c348b..8696ed949 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ Participants should review the [Assignment Submission Guide](https://github.com/ There are two assignments. -🚨ATTENTION! During November 2025, both the SGS DC Cohort and Cohort 8 are running in parallel. Please navigate to the appropriate folder for your cohort as **the assignment are not the same**. +🚨ATTENTION! During November 2025, both the SGS DC Cohort and Cohort 8 are running in parallel. Please navigate to the appropriate folder for your cohort as **the assignments are not the same**. ### DC 1. [Assignment 1](./02_activities/assignments/DC_Cohort/Assignment1.md) @@ -103,7 +103,7 @@ There are two assignments. This module will include live learning sessions and optional, asynchronous work periods. During live learning sessions, the Technical Facilitator will introduce and explain key concepts and demonstrate core skills. Learning is facilitated during this time. Before and after each live learning session, the instructional team will be available for questions related to the core concepts of the module. Optional work periods are to be used to seek help from peers, the Learning Support team, and to work through the assignments in the learning module, with access to live help. Content is not facilitated, but rather this time should be driven by participants. We encourage participants to come to these work periods with questions and problems to work through.   -Participants are encouraged to engage actively during the learning module. They key to developing the core skills in each learning module is through practice. The more participants engage in coding along with the instructional team, and applying the skills in each module, the more likely it is that these skills will solidify. +Participants are encouraged to engage actively during the learning module. The key to developing the core skills in each learning module is through practice. The more participants engage in coding along with the instructional team, and applying the skills in each module, the more likely it is that these skills will solidify. This module will use a dedicated _Etherboard_ for student collaboration. The link will be provided in the first class. New content will be added by the Technical Facilitator before each session. Each session will consist of slides to introduce topics, live coding to demonstrate the topics, and occasional breakout rooms/live polls to reinforce the topics. @@ -147,7 +147,7 @@ Before First Live Learning Session: Install & Pre-Session [Setup](./05_src/sql/s * Participants are encouraged to ask questions, and collaborate with others to enhance their learning experience. * Participants must have a computer and an internet connection to participate in online activities. * Participants must not use generative AI such as ChatGPT to generate code in order to complete assignments. It should be used as a supportive tool to seek out answers to questions you may have. -* We expect Participants to have completed the instructions mentioned in the [onboarding repo](https://github.com/UofT-DSI/onboarding/blob/main/environment_setup/README.md). +* We expect participants to have completed the instructions mentioned in the [onboarding repo](https://github.com/UofT-DSI/onboarding/blob/main/environment_setup/README.md). * We encourage participants to default to having their camera on at all times, and turning the camera off only as needed. This will greatly enhance the learning experience for all participants and provides real-time feedback for the instructional team. ## Resources @@ -189,8 +189,8 @@ Before First Live Learning Session: Install & Pre-Session [Setup](./05_src/sql/s * **activities**: Contains self-assessments, graded assignments, and rubrics for evaluating assignments. * **instructional_team**: Resources for the instructional team. * **this_cohort**: Additional materials and resources for this cohort. -* **src**: Source code, databases, logs, and required dependencies (requirements.txt) needed during the module. -* **.gitignore**: Files to exclude from this folder, specified by the Technical Facilitator +* **src**: Source code, databases, logs, and required dependencies needed during the module. +* **.gitignore**: Files to exclude from this repository, specified by the Technical Facilitator. * **LICENSE**: The license for this repository. * **SETUP.md**: Contains the instructions for following the steps required to complete the SQL onboarding tasks. * **README.md**: This file.