-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsql_queries.py
More file actions
139 lines (113 loc) · 5.58 KB
/
sql_queries.py
File metadata and controls
139 lines (113 loc) · 5.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# DROP TABLES
user_table_drop = "DROP TABLE IF EXISTS users"
song_table_drop = "DROP TABLE IF EXISTS songs"
artist_table_drop = "DROP TABLE IF EXISTS artists"
time_table_drop = "DROP TABLE IF EXISTS time"
songplay_table_drop = "DROP TABLE IF EXISTS songplays"
# CREATE TABLES
user_table_create = ("""CREATE TABLE IF NOT EXISTS users (
user_id int PRIMARY KEY,
first_name text NOT NULL,
last_name text,
gender text,
level text NOT NULL); """)
song_table_create = ("""CREATE TABLE IF NOT EXISTS songs (
song_id text PRIMARY KEY,
title text NOT NULL,
artist_id text,
year int,
duration decimal ); """)
artist_table_create = ("""CREATE TABLE IF NOT EXISTS artists (
artist_id text PRIMARY KEY,
name text NOT NULL,
location text,
latitude float,
longitude float
); """)
time_table_create = ("""CREATE TABLE IF NOT EXISTS time (
start_time timestamp PRIMARY KEY,
hour int,
day int,
week int,
month int,
year int,
weekday int
); """)
songplay_table_create = ("""CREATE TABLE IF NOT EXISTS songplays (
songplay_id serial PRIMARY KEY,
start_time timestamp NOT NULL,
user_id int NOT NULL,
level text NOT NULL,
song_id text,
artist_id text,
session_id int NOT NULL ,
location text,
user_agent text,
CONSTRAINT user_id
FOREIGN KEY (user_id)
REFERENCES users(user_id),
CONSTRAINT song_id
FOREIGN KEY (song_id)
REFERENCES songs(song_id),
CONSTRAINT artist_id
FOREIGN KEY (artist_id)
REFERENCES artists(artist_id),
CONSTRAINT ts
FOREIGN KEY (start_time)
REFERENCES time(start_time)); """)
# INSERT RECORDS
songplay_table_insert = ("""INSERT INTO songplays (start_time, user_id, level, song_id, artist_id , session_id , location, user_agent)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s);""")
user_table_insert = (""" INSERT INTO users (user_id, first_name, last_name, gender, level)
VALUES (%s, %s, %s, %s, %s)
ON CONFLICT (user_id)
DO UPDATE SET level = EXCLUDED.level;""")
song_table_insert = (""" INSERT INTO songs (song_id, title, artist_id, year, duration)
VALUES (%s,%s,%s,%s,%s)
ON CONFLICT (song_id) DO NOTHING;
""")
artist_table_insert = ("""INSERT INTO artists (artist_id, name, location, latitude, longitude)
VALUES (%s,%s,%s,%s,%s)
ON CONFLICT (artist_id) DO NOTHING;
""")
time_table_insert = (""" INSERT INTO time (start_time, hour, day, week, month, year, weekday)
VALUES (%s,%s,%s,%s,%s,%s,%s)
ON CONFLICT (start_time) DO NOTHING;
""")
# FIND SONGS
song_select = (""" SELECT song_id, a.artist_id FROM songs s
JOIN
artists a ON s.artist_id = a.artist_id
WHERE
s.title = %s AND a.name = %s AND s.duration = %s;
""")
# QUERY EXAMPLES
COUNT_WEEKDAY = """ SELECT case when weekday = 0 then 'Sunday'
when weekday = 1 then 'Monday'
when weekday = 2 then 'Tuesday'
when weekday = 3 then 'Wednesday'
when weekday = 4 then 'Thursday'
when weekday = 5 then 'Friday'
when weekday = 6 then 'Saturday' end as weekday,
COUNT(songplay_id) songs
FROM songplays s
JOIN time t ON s.start_time = t.start_time
GROUP BY 1
ORDER BY songs DESC"""
AMOUNT_SONG_GENDER = """ SELECT case when gender = 'F' then 'Woman' else 'Man' end as gender, COUNT(*)
FROM songplays s
JOIN users u ON s.user_id = u.user_id
GROUP BY 1
ORDER BY gender DESC"""
DURATION_ARTIST = """ SELECT a.name, SUM(duration)
FROM songplays s
JOIN artists a ON s.artist_id = a.artist_id
JOIN songs sg ON sg.song_id = s.song_id
GROUP BY 1 """
LEVEL_SONG = """ SELECT level, COUNT(*) status
FROM songplays
GROUP BY 1
ORDER BY status DESC"""
# QUERY LISTS
create_table_queries = [user_table_create, song_table_create, artist_table_create, time_table_create, songplay_table_create]
drop_table_queries = [songplay_table_drop, user_table_drop, song_table_drop, artist_table_drop, time_table_drop]