-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathWalmart_Sales_SQL_Quries.sql
More file actions
344 lines (258 loc) · 6.42 KB
/
Walmart_Sales_SQL_Quries.sql
File metadata and controls
344 lines (258 loc) · 6.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
-- Create database
CREATE DATABASE IF NOT EXISTS walmartSales;
---------- Create table ----------
CREATE TABLE IF NOT EXISTS sales(
invoice_id VARCHAR(30) NOT NULL PRIMARY KEY,
branch VARCHAR(5) NOT NULL,
city VARCHAR(30) NOT NULL,
customer_type VARCHAR(30) NOT NULL,
gender VARCHAR(30) NOT NULL,
product_line VARCHAR(100) NOT NULL,
unit_price DECIMAL(10,2) NOT NULL,
quantity INT NOT NULL,
tax_pct FLOAT(6,4) NOT NULL,
total DECIMAL(12, 4) NOT NULL,
date DATETIME NOT NULL,
time TIME NOT NULL,
payment VARCHAR(15) NOT NULL,
cogs DECIMAL(10,2) NOT NULL,
gross_margin_pct FLOAT(11,9),
gross_income DECIMAL(12, 4),
rating FLOAT(2, 1)
);
---------- Feature Engineering ----------
-- Add the time_of_day column --
SELECT
time,
(CASE
WHEN `time` BETWEEN "00:00:00" AND "12:00:00" THEN "Morning"
WHEN `time` BETWEEN "12:01:00" AND "16:00:00" THEN "Afternoon"
ELSE "Evening"
END) AS time_of_day
FROM sales;
ALTER TABLE sales ADD COLUMN time_of_day VARCHAR(20);
UPDATE sales
SET time_of_day = (
CASE
WHEN `time` BETWEEN "00:00:00" AND "12:00:00" THEN "Morning"
WHEN `time` BETWEEN "12:01:00" AND "16:00:00" THEN "Afternoon"
ELSE "Evening"
END
);
-- Add day_name column --
SELECT
date,
DAYNAME(date)
FROM sales;
ALTER TABLE sales ADD COLUMN day_name VARCHAR(10);
UPDATE sales
SET day_name = DAYNAME(date);
-- Add month_name column --
SELECT
date,
MONTHNAME(date)
FROM sales;
ALTER TABLE sales ADD COLUMN month_name VARCHAR(10);
UPDATE sales
SET month_name = MONTHNAME(date);
---------------------------- Generic Questions ------------------------------
1. How many different cities are present in the dataset?
SELECT
DISTINCT city
FROM sales;
2. In which city is each branch situated?
SELECT
DISTINCT city,
branch
FROM sales;
---------------------------- Product Analysis -------------------------------
1. How many unique product lines does the data have?
SELECT
DISTINCT product_line
FROM sales;
2. What is the most common payment method?
SELECT
DISTINCT(payment_method) as payments,
COUNT(*) as times
FROM sales
GROUP BY payments
ORDER BY times DESC
3. What is the most selling product line
SELECT
SUM(quantity) as qty,
product_line
FROM sales
GROUP BY product_line
ORDER BY qty DESC;
4. What is the total revenue by month?
SELECT
month_name AS month,
SUM(total) AS total_revenue
FROM sales
GROUP BY month_name
ORDER BY total_revenue DESC;
5. Which month recorded the highest Cost of Goods Sold (COGS)?
SELECT
month_name AS month,
SUM(cogs) AS cogs
FROM sales
GROUP BY month_name
ORDER BY cogs DESC;
6. Which product line generated the highest revenue?
SELECT
product_line,
SUM(total) as total_revenue
FROM sales
GROUP BY product_line
ORDER BY total_revenue DESC;
7. Which city has the highest revenue?
SELECT
city,
SUM(total) AS total_revenue
FROM sales
GROUP BY city
ORDER BY total_revenue DESC;
8. Which product line incurred the highest VAT?
SELECT
product_line,
AVG(VAT) as avg_tax
FROM sales
GROUP BY product_line
ORDER BY avg_tax DESC;
9. Retrieve each product line and add a column product_category, indicating 'Good' or 'Bad,' based on whether its sales are above the average.
WITH product_avg_sales AS (SELECT
product_line,
AVG(total) as avg_sales
FROM sales
GROUP BY product_line),
overall_avg_sales AS (
SELECT
AVG(total) as overall_avg FROM sales)
SELECT
p.product_line,
p.avg_sales,
CASE
WHEN p.avg_sales > o.overall_avg THEN 'Good'
WHEN p.avg_sales < o.overall_avg THEN 'Bad' END AS rating
FROM product_avg_sales as p
JOIN overall_avg_sales as o ;
10. Which branch sold more products than average product sold?
SELECT
product_line,
SUM(VAT) as total_VAT
FROM sales
GROUP BY product_line
ORDER BY total_VAT DESC
11. What is the most common product line by gender?
SELECT
gender,
product_line,
COUNT(gender) AS total_count
FROM sales
GROUP BY gender, product_line
ORDER BY total_count DESC;
12. What is the average rating of each product line?
SELECT
product_line,
ROUND(AVG(rating),2) as avg_rating
FROM sales
GROUP BY product_line
ORDER BY avg_rating DESC
---------------------------- Sales Analysis ---------------------------------
1. Number of sales made in each time of the day per weekday.
SELECT
COUNT(*) as sales_count,
time_of_day
FROM sales
WHERE day_name != 'Saturday' AND day_name != 'Sunday'
GROUP BY time_of_day
ORDER BY sales_count DESC
2. Identify the customer type that generates the highest revenue.
SELECT
customer_type,
SUM(total) as total_revenue
FROM sales
GROUP BY customer_type
ORDER BY total_revenue DESC
3. Which city has the largest tax percent/ VAT (Value Added Tax)?
SELECT
city,
ROUND(AVG(VAT),2) as avg_vat
FROM sales
GROUP BY city
ORDER BY avg_vat DESC
4. Which customer type pays the most VAT?
SELECT
customer_type,
ROUND(AVG(VAT),2) as avg_vat
FROM sales
GROUP BY customer_type
ORDER BY avg_vat DESC
-------------------------- Customer Analysis -------------------------------
1. How many unique customer types does the data have?
SELECT
DISTINCT(customer_type) as unique_customers
from sales
2. How many unique payment methods does the data have?
SELECT
DISTINCT (payment_method) as unique_payment_method
FROM sales;
3. Which is the most common customer type?
SELECT
customer_type,
COUNT(*) as count
FROM sales
GROUP BY customer_type
ORDER BY count DESC
LIMIT 1;
4. Which customer type buys the most?
SELECT
customer_type,
COUNT(*) as count
FROM sales
GROUP BY customer_type
ORDER BY count DESC
5. What is the gender of most of the customers?
SELECT
gender,
COUNT(*) as gender_count
FROM sales
GROUP BY gender
ORDER BY gender_count DESC
6. What is the gender distribution per branch?
SELECT
branch,
gender,
COUNT(*) as gender_count
FROM sales
GROUP BY branch, gender
7. Which time of the day do customers give most ratings?
SELECT
time_of_day,
AVG(rating) as avg_rating
FROM sales
GROUP BY time_of_day
ORDER BY avg_rating DESC
8. Which time of the day do customers give most ratings per branch?
SELECT
branch,
time_of_day,
AVG(rating) as avg_rating
FROM sales
GROUP BY branch, time_of_day
ORDER BY avg_rating DESC
9. Which day of the week has the best avg ratings?
SELECT
day_name,
AVG(rating) as avg_rating
FROM sales
GROUP BY day_name
ORDER BY avg_rating DESC
10. Which day of the week has the best average ratings per branch?
SELECT
day_name,
branch,
AVG(rating) as avg_rating
FROM sales
GROUP BY day_name,branch
ORDER BY avg_rating DESC