-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsql_analysis.sql
More file actions
213 lines (172 loc) · 5.75 KB
/
sql_analysis.sql
File metadata and controls
213 lines (172 loc) · 5.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
# Creating a dedicated user for your Python app
-- CREATE USER 'pyuser'@'localhost'
-- IDENTIFIED WITH mysql_native_password BY 'py_password';
-- GRANT ALL PRIVILEGES ON customer_behaviour.* TO 'pyuser'@'localhost';
-- FLUSH PRIVILEGES;
# ----------------------------------------------------------------------------------------
# Checking Data:
-- SELECT * FROM customer_purchases LIMIT 10
# ----------------------------------------------------------------------------------------
# Answering Some Important Questions:
-- Q1. What is the total revenue generated by male vs. female customers ?
SELECT
gender AS Gender,
SUM(purchase_amount) AS Revenue
FROM customer_purchases
GROUP BY gender;
-- Q2. Which are the top 5 Product with the highest average review rating ?
SELECT
item_purchased AS Product,
ROUND(AVG(review_rating), 2) AS Avg_Product_Rating
FROM customer_purchases
GROUP BY item_purchased
ORDER BY AVG(review_rating) DESC
LIMIT 5;
-- Q3. What are the top 10 best-selling products?
SELECT
Item_Purchased AS Products,
COUNT(item_purchased) AS Volume
FROM Customer_Purchases
GROUP BY Item_Purchased
ORDER BY Volume DESC
LIMIT 10;
-- Q4. Which is top category by revenue and % of contribution to the total revenue?
WITH Category_Rev AS (
SELECT
category,
SUM(purchase_amount) AS category_revenue
FROM customer_purchases
GROUP BY category
),
Total_Rev AS (
SELECT SUM(purchase_amount) AS total_revenue
FROM customer_purchases
)
SELECT
c.category,
c.category_revenue,
ROUND((c.category_revenue / t.total_revenue) * 100, 2) AS revenue_pct
FROM Category_Rev c
CROSS JOIN Total_Rev t
ORDER BY c.category_revenue DESC;
-- Q5. Compare the average Purchase Amounts between Standard and Express Shipping.
SELECT
shipping_type AS Shipping_Type,
ROUND(AVG(purchase_amount), 2) AS Avg_Purchase_Amount
FROM customer_purchases
WHERE shipping_type IN ('Standard', 'Express')
GROUP BY shipping_type;
-- Q6. Do subscribed customers spend more ? Compare average spend and total revenue between
-- subscribers and non-subscribers.
SELECT
CASE WHEN subscription_status = 'Yes' THEN 'Subscribers'
ELSE 'Non-Subscribers'
END AS Customer_Status,
COUNT(customer_id) AS Total_Customers,
ROUND(AVG(purchase_amount), 2) AS Average_Spend,
ROUND(SUM(purchase_amount), 2) AS Total_Revenue
FROM customer_purchases
GROUP BY Customer_Status
ORDER BY Total_Revenue DESC, Average_Spend DESC;
-- Q7. Which 5 products have the highest percentage of purchases with discounts applied?
SELECT
item_purchased AS Product,
ROUND(SUM(CASE WHEN discount_applied = 'Yes' THEN 1 ELSE 0 END)/COUNT(*) * 100, 2) AS Purchase_Percentage
FROM customer_purchases
GROUP BY item_purchased
ORDER BY Purchase_Percentage DESC
LIMIT 5;
-- Q8. Segment customers into New, Returning and Loyal based on their total no. of previous
-- purchases, and show the count of each segment. Also, their contribution to the revenue.
WITH customer_type AS (
SELECT
customer_id,
previous_purchases,
SUM(purchase_amount) AS Revenue,
CASE WHEN previous_purchases <= 1 THEN 'New'
WHEN previous_purchases < 10 THEN 'Returning'
ELSE 'Loyal'
END AS Customer_Segment
FROM customer_purchases
GROUP BY customer_id, previous_purchases
)
SELECT
Customer_Segment,
COUNT(*) AS Number_of_Customers,
SUM(Revenue) AS Total_Revenue
FROM customer_type
GROUP BY Customer_Segment
ORDER BY Number_of_Customers DESC, Total_Revenue DESC;
-- Q9. What are the top 3 most purchased products within each category ?
WITH item_counts as (
SELECT
Category,
Item_Purchased AS Product,
COUNT(customer_id) AS Total_Orders,
ROW_NUMBER() OVER (PARTITION BY Category ORDER BY COUNT(customer_id) DESC) AS Item_Rank
FROM customer_purchases
GROUP BY Category, item_purchased)
SELECT
Item_Rank,
Category,
Product
Total_Orders
FROM item_counts
WHERE item_rank <= 3;
-- Q10. Are customers who are repeat buyers (more than 5 previous purchases) also likely to subscribe?
SELECT
Subscription_Status,
COUNT(Customer_ID) AS Repeat_Buyers
FROM customer_purchases
WHERE previous_purchases > 5
GROUP BY subscription_status;
-- Q11. What is the revenue contribution and Average Order Value (AOV) of each age group ?
SELECT
Age_Group,
SUM(Purchase_Amount) AS Total_Revenue,
ROUND(SUM(Purchase_Amount)/COUNT(*), 2) AS Average_Order_Value
FROM customer_purchases
GROUP BY age_group
ORDER BY Total_Revenue DESC, Average_Order_Value DESC ;
-- Q12. Which product categories show the highest discount dependency
-- (% of category revenue that came from discounted items)?
SELECT
Category,
SUM(CASE WHEN discount_applied = 'Yes' THEN purchase_amount ELSE 0 END)
/ SUM(purchase_amount) AS Discount_Dependency_Ratio
FROM customer_purchases
GROUP BY category
ORDER BY discount_dependency_ratio DESC;
-- Q.13 Which top 10 locations (states) contribute the highest revenue and which underperform?
SELECT
Location,
SUM(Purchase_Amount) AS Total_Revenue,
RANK() OVER (ORDER BY SUM(Purchase_Amount) DESC) AS Revenue_Rank
FROM customer_purchases
GROUP BY Location
ORDER BY Total_Revenue DESC
LIMIT 10;
-- Q.14 Which shipping types result in the highest review ratings (customer satisfaction)?
SELECT
Shipping_Type,
ROUND(AVG(Review_Rating), 2) AS Avg_Review_Rating
FROM customer_purchases
GROUP BY shipping_type
ORDER BY Avg_Review_Rating DESC;
-- Q.15 Which seasons generate the highest revenue for each category?
WITH Season_Rev AS
(SELECT
Category,
Season,
SUM(Purchase_Amount) AS Total_Revenue,
ROW_NUMBER() OVER (PARTITION BY Category ORDER BY SUM(Purchase_Amount) DESC) AS Revenue_Rank
FROM customer_purchases
GROUP BY Season, Category
)
SELECT
Category,
Season,
Total_Revenue
FROM Season_Rev
WHERE Revenue_Rank = 1
ORDER BY Total_Revenue DESC;