-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalytics.sql
More file actions
153 lines (144 loc) · 4.38 KB
/
analytics.sql
File metadata and controls
153 lines (144 loc) · 4.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
-- Retail Revenue Optimization: Analytics Queries (PostgreSQL)
-- Tips:
-- - Returns/cancellations are typically invoice_no LIKE 'C%' and/or negative quantities.
-- - Many analyses below provide both "gross" and "net" views.
-- 0) Quick sanity checks
SELECT COUNT(*) AS rows_in_fact FROM fact_sales;
SELECT
COUNT(*) FILTER (WHERE invoice_no LIKE 'C%') AS cancellation_rows,
COUNT(*) FILTER (WHERE quantity < 0) AS negative_qty_rows,
COUNT(*) FILTER (WHERE customer_key IS NULL) AS missing_customer_rows
FROM fact_sales;
-- 1) Monthly revenue trend (gross vs net)
WITH base AS (
SELECT
date_trunc('month', invoice_date)::date AS month,
SUM(gross_sales) AS gross_revenue,
SUM(CASE WHEN invoice_no LIKE 'C%' OR quantity < 0 THEN gross_sales ELSE 0 END) AS returns_value
FROM fact_sales
GROUP BY 1
)
SELECT
month,
gross_revenue,
returns_value,
(gross_revenue + returns_value) AS net_revenue
FROM base
ORDER BY month;
-- 2) Top 20 products by net revenue (exclude cancellations)
SELECT
p.stock_code,
MAX(p.description) AS description,
SUM(f.gross_sales) AS net_revenue
FROM fact_sales f
JOIN dim_product p ON p.product_key = f.product_key
WHERE f.invoice_no NOT LIKE 'C%' AND f.quantity > 0
GROUP BY 1
ORDER BY net_revenue DESC
LIMIT 20;
-- 3) Countries by net revenue + AOV (average order value)
WITH orders AS (
SELECT
invoice_no,
MIN(invoice_date) AS order_date,
MAX(c.country) AS country,
SUM(gross_sales) AS order_value
FROM fact_sales f
LEFT JOIN dim_customer c ON c.customer_key = f.customer_key
WHERE f.invoice_no NOT LIKE 'C%' AND f.quantity > 0
GROUP BY 1
)
SELECT
country,
COUNT(*) AS orders,
ROUND(AVG(order_value)::numeric, 2) AS avg_order_value,
ROUND(SUM(order_value)::numeric, 2) AS net_revenue
FROM orders
GROUP BY 1
ORDER BY net_revenue DESC;
-- 4) Customer RFM segmentation (Recency, Frequency, Monetary)
-- Recency measured from the dataset max order date.
WITH orders AS (
SELECT
COALESCE(c.customer_id, -1) AS customer_id,
MAX(f.invoice_date)::date AS last_order_date,
COUNT(DISTINCT f.invoice_no) AS frequency,
SUM(CASE WHEN f.invoice_no NOT LIKE 'C%' AND f.quantity > 0 THEN f.gross_sales ELSE 0 END) AS monetary
FROM fact_sales f
LEFT JOIN dim_customer c ON c.customer_key = f.customer_key
GROUP BY 1
),
anchor AS (SELECT MAX(invoice_date)::date AS max_date FROM fact_sales),
rfm AS (
SELECT
o.*,
(SELECT max_date FROM anchor) - o.last_order_date AS recency_days
FROM orders o
WHERE customer_id <> -1
),
scored AS (
SELECT
*,
NTILE(4) OVER (ORDER BY recency_days ASC) AS r_score, -- lower recency_days => better
NTILE(4) OVER (ORDER BY frequency DESC) AS f_score,
NTILE(4) OVER (ORDER BY monetary DESC) AS m_score
FROM rfm
)
SELECT
CONCAT(r_score, f_score, m_score) AS rfm_cell,
COUNT(*) AS customers,
ROUND(AVG(monetary)::numeric, 2) AS avg_monetary
FROM scored
GROUP BY 1
ORDER BY customers DESC;
-- 5) Return rate by month (value-based)
WITH monthly AS (
SELECT
date_trunc('month', invoice_date)::date AS month,
SUM(CASE WHEN invoice_no NOT LIKE 'C%' AND quantity > 0 THEN gross_sales ELSE 0 END) AS sales_value,
ABS(SUM(CASE WHEN invoice_no LIKE 'C%' OR quantity < 0 THEN gross_sales ELSE 0 END)) AS return_value
FROM fact_sales
GROUP BY 1
)
SELECT
month,
sales_value,
return_value,
ROUND((return_value / NULLIF(sales_value,0))::numeric, 4) AS return_rate
FROM monthly
ORDER BY month;
-- 6) Basket size distribution (items per order)
WITH order_items AS (
SELECT
invoice_no,
SUM(CASE WHEN invoice_no NOT LIKE 'C%' AND quantity > 0 THEN quantity ELSE 0 END) AS items
FROM fact_sales
GROUP BY 1
)
SELECT
CASE
WHEN items = 0 THEN '0 (all returns)'
WHEN items BETWEEN 1 AND 5 THEN '1-5'
WHEN items BETWEEN 6 AND 10 THEN '6-10'
WHEN items BETWEEN 11 AND 20 THEN '11-20'
ELSE '21+'
END AS basket_bucket,
COUNT(*) AS orders
FROM order_items
GROUP BY 1
ORDER BY 1;
-- 7) Price sensitivity proxy: revenue by unit_price band
WITH banded AS (
SELECT
CASE
WHEN unit_price < 1 THEN '<$1'
WHEN unit_price < 2 THEN '$1-$1.99'
WHEN unit_price < 5 THEN '$2-$4.99'
WHEN unit_price < 10 THEN '$5-$9.99'
ELSE '$10+'
END AS price_band,
SUM(CASE WHEN invoice_no NOT LIKE 'C%' AND quantity > 0 THEN gross_sales ELSE 0 END) AS net_revenue
FROM fact_sales
GROUP BY 1
)
SELECT * FROM banded ORDER BY net_revenue DESC;