-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathint_chart_cat_db_maker.py
More file actions
196 lines (196 loc) · 11.6 KB
/
int_chart_cat_db_maker.py
File metadata and controls
196 lines (196 loc) · 11.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
<<<<<<< HEAD
import io
import zipfile
import nasdaqdatalink as ndl
import pandas as pd
import requests
from full_fred.fred import Fred as Fred2
import sqlite3
import csv
conn = sqlite3.connect(r"cat_database.db")
cursor = conn.cursor()
#cursor.execute("DROP TABLE categories")
#cursor.execute("DROP TABLE series")
cursor.execute('''
CREATE TABLE categories(
category_id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
parent_id INTEGER NOT NULL,
parent_name TEXT NOT NULL,
gparent_id INTEGER NOT NULL,
database TEXT NOT NULL);
''')
cursor.execute('''
CREATE TABLE series(
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
category_id INTEGER NOT NULL,
database TEXT NOT NULL
)
''')
ndl.ApiConfig.api_key = "HP2mLCTfC38KJsseJSos"
fred_series = Fred2("fredapikey.txt")
r = requests.get(
"https://raw.githubusercontent.com/joshuaulrich/freddy-mcfredface/master/all_fred_categories_and_parents.csv")
fred_categories_df = pd.read_csv(io.StringIO(r.text))
fred_data = csv.reader(io.StringIO(r.text))
next(fred_data)
cursor.executemany("INSERT OR IGNORE INTO categories (category_id, name, parent_id, parent_name, gparent_id, database) VALUES(?, ?, ?, ?, ?, 'FRED')", fred_data)
rows = cursor.execute("SELECT * FROM categories LIMIT 5").fetchall()
conn.commit()
r2 = requests.get("https://data.nasdaq.com/api/v3/databases/?api_key=HP2mLCTfC38KJsseJSos")
nasdaq_categories_df = pd.DataFrame(r2.json()["databases"]).query("premium == False").reset_index()
nasdaq_categories_df["id"] = nasdaq_categories_df["id"] + 40000
nasdaq_categories_df["parent_name"] = ["Commodity Based", "Money, Banking, & Finance", "Academic Data",
"Blockchain",
"Academic Data", "Blockchain", "Commodity Based",
"Money, Banking, & Finance",
"Blockchain", "International Data", "Academic Data", "International Data",
"Money, Banking, & Finance", "Money, Banking, & Finance",
"Money, Banking, & Finance", "Interest Rates", "International Data",
"International Data",
"Prices", "Money, Banking, & Finance", "Prices", "Money, Banking, & Finance",
"International Data", "Money, Banking, & Finance", "Academic Data",
"Money, Banking, & Finance", "International Data",
"Production and Business Activity",
"Money, Banking, & Finance", "Consumer opinion surveys",
"Money, Banking, & Finance", "Academic Data", "Institutions", "Institutions",
"Money, Banking, & Finance"]
nasdaq_categories_df["parent_id"] = [33583, 32991, 33060, 50000, 33060, 50000, 33583, 32991, 50000, 32263, 33060, 32263, 32991, 32991,
32991, 22, 32263, 32263, 32455, 32991, 32455, 32991, 32263, 32991, 33060, 32991, 32263, 1, 32991,
33261, 32991, 33060, 32956, 32956, 32991]
nasdaq_categories_df["gparent_id"] = [{"33583": 31, "32991": 0, "33060": 0, "50000": 0, "32263": 0, "22": 32991, "32455": 0, "1": 0, "33261": 33265, "32956": 32263}[f"{id}"] for id in nasdaq_categories_df["parent_id"]]
nasdaq_categories_df[["name", "id", "parent_id", "parent_name", "gparent_id", "database_code"]].rename({"id":"category_id", "database_code": "database"}, axis = 1).to_sql("categories", con=conn, if_exists='append', index=False)
categories_dict = {}
all_series_options = {}
for pcategory_name in pd.unique(fred_categories_df["parent_name"].values):
print(pcategory_name)
if pcategory_name not in ["Counties", "MSAs", "Parishes"]:
for index, row in fred_categories_df.loc[fred_categories_df["parent_name"] == pcategory_name, :].iterrows():
cat_dict = fred_series.get_series_in_a_category(row["id"], order_by="popularity")
if cat_dict is not None and "seriess" in cat_dict.keys() and len(cat_dict["seriess"]) > 0 and row["name"]:
df = pd.DataFrame(cat_dict["seriess"])
if not df.empty:
cursor.executemany(f"INSERT OR IGNORE INTO series (id, name, category_id, database) VALUES (?, ?, {row['id']}, 'FRED')", df[["id", "title"]].values.tolist())
else:
cursor.execute(f"DELETE FROM categories WHERE category_id == '{row['id']}'")
else:
cursor.execute(f"DELETE FROM categories WHERE category_id == '{row['id']}'")
else:
print(f"Deleting {pcategory_name}")
cursor.execute(f"DELETE FROM categories WHERE parent_name == '{pcategory_name}'")
for index, row in nasdaq_categories_df.iterrows():
print(row['name'], row['parent_name'])
try:
zipdata = zipfile.ZipFile(io.BytesIO(requests.get(
f"https://data.nasdaq.com/api/v3/databases/{row['database_code']}/metadata?api_key=HP2mLCTfC38KJsseJSos").content))
df = pd.read_csv(zipdata.open(zipdata.namelist()[0]))
if row["name"] not in ["Federal Reserve Economic Data", "London Bullion Market Association"] and not df.empty:
cursor.executemany(f"INSERT OR IGNORE INTO series (id, name, category_id, database) VALUES (?, ?, '{row['id']}', '{row['database_code']}')", df[["code", "name"]].values.tolist())
else:
cursor.execute(f"DELETE FROM categories WHERE category_id == '{row['id']}'")
except zipfile.BadZipFile:
cursor.execute(f"DELETE FROM categories WHERE category_id == '{row['id']}'")
conn.commit()
for row in cursor.execute("SELECT * FROM series LIMIT 15").fetchall():
print(row)
=======
import io
import zipfile
import nasdaqdatalink as ndl
import pandas as pd
import requests
from full_fred.fred import Fred as Fred2
import sqlite3
import csv
conn = sqlite3.connect(r"cat_database.db")
cursor = conn.cursor()
#cursor.execute("DROP TABLE categories")
#cursor.execute("DROP TABLE series")
cursor.execute('''
CREATE TABLE categories(
category_id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
parent_id INTEGER NOT NULL,
parent_name TEXT NOT NULL,
gparent_id INTEGER NOT NULL,
database TEXT NOT NULL);
''')
cursor.execute('''
CREATE TABLE series(
category_id INTEGER NOT NULL,
id TEXT NOT NULL,
name TEXT NOT NULL,
database TEXT NOT NULL
)
''')
cursor.execute("""
CREATE INDEX category_id ON series (category_id)
""")
ndl.ApiConfig.api_key = "HP2mLCTfC38KJsseJSos"
fred_series = Fred2("fredapikey.txt")
r = requests.get(
"https://raw.githubusercontent.com/joshuaulrich/freddy-mcfredface/master/all_fred_categories_and_parents.csv")
fred_categories_df = pd.read_csv(io.StringIO(r.text))
fred_data = csv.reader(io.StringIO(r.text))
next(fred_data)
cursor.executemany("INSERT OR IGNORE INTO categories (category_id, name, parent_id, parent_name, gparent_id, database) VALUES(?, ?, ?, ?, ?, 'FRED')", fred_data)
rows = cursor.execute("SELECT * FROM categories LIMIT 5").fetchall()
conn.commit()
r2 = requests.get("https://data.nasdaq.com/api/v3/databases/?api_key=HP2mLCTfC38KJsseJSos")
nasdaq_categories_df = pd.DataFrame(r2.json()["databases"]).query("premium == False").reset_index()
nasdaq_categories_df["id"] = nasdaq_categories_df["id"] + 40000
nasdaq_categories_df["parent_name"] = ["Commodity Based", "Money, Banking, & Finance", "Academic Data",
"Blockchain",
"Academic Data", "Blockchain", "Commodity Based",
"Money, Banking, & Finance",
"Blockchain", "International Data", "Academic Data", "International Data",
"Money, Banking, & Finance", "Money, Banking, & Finance",
"Money, Banking, & Finance", "Interest Rates", "International Data",
"International Data",
"Prices", "Money, Banking, & Finance", "Prices", "Money, Banking, & Finance",
"International Data", "Money, Banking, & Finance", "Academic Data",
"Money, Banking, & Finance", "International Data",
"Production and Business Activity",
"Money, Banking, & Finance", "Consumer opinion surveys",
"Money, Banking, & Finance", "Academic Data", "Institutions", "Institutions",
"Money, Banking, & Finance"]
nasdaq_categories_df["parent_id"] = [33583, 32991, 33060, 50000, 33060, 50000, 33583, 32991, 50000, 32263, 33060, 32263, 32991, 32991,
32991, 22, 32263, 32263, 32455, 32991, 32455, 32991, 32263, 32991, 33060, 32991, 32263, 1, 32991,
33261, 32991, 33060, 32956, 32956, 32991]
nasdaq_categories_df["gparent_id"] = [{"33583": 31, "32991": 0, "33060": 0, "50000": 0, "32263": 0, "22": 32991, "32455": 0, "1": 0, "33261": 33265, "32956": 32263}[f"{id}"] for id in nasdaq_categories_df["parent_id"]]
nasdaq_categories_df[["name", "id", "parent_id", "parent_name", "gparent_id", "database_code"]].rename({"id":"category_id", "database_code": "database"}, axis = 1).to_sql("categories", con=conn, if_exists='append', index=False)
categories_dict = {}
all_series_options = {}
for pcategory_name in pd.unique(fred_categories_df["parent_name"].values):
print(pcategory_name)
if pcategory_name not in ["Counties", "MSAs", "Parishes"]:
for index, row in fred_categories_df.loc[fred_categories_df["parent_name"] == pcategory_name, :].iterrows():
cat_dict = fred_series.get_series_in_a_category(row["id"], order_by="popularity")
if cat_dict is not None and "seriess" in cat_dict.keys() and len(cat_dict["seriess"]) > 0 and row["name"]:
df = pd.DataFrame(cat_dict["seriess"])
if not df.empty:
cursor.executemany(f"INSERT OR IGNORE INTO series (id, name, category_id, database) VALUES (?, ?, {row['id']}, 'FRED')", df[["id", "title"]].values.tolist())
else:
cursor.execute(f"DELETE FROM categories WHERE category_id == '{row['id']}'")
else:
cursor.execute(f"DELETE FROM categories WHERE category_id == '{row['id']}'")
else:
print(f"Deleting {pcategory_name}")
cursor.execute(f"DELETE FROM categories WHERE parent_name == '{pcategory_name}'")
for index, row in nasdaq_categories_df.iterrows():
print(row['name'], row['parent_name'])
try:
zipdata = zipfile.ZipFile(io.BytesIO(requests.get(
f"https://data.nasdaq.com/api/v3/databases/{row['database_code']}/metadata?api_key=HP2mLCTfC38KJsseJSos").content))
df = pd.read_csv(zipdata.open(zipdata.namelist()[0]))
if row["name"] not in ["Federal Reserve Economic Data", "London Bullion Market Association"] and not df.empty:
cursor.executemany(f"INSERT OR IGNORE INTO series (id, name, category_id, database) VALUES (?, ?, '{row['id']}', '{row['database_code']}')", df[["code", "name"]].values.tolist())
else:
cursor.execute(f"DELETE FROM categories WHERE category_id == '{row['id']}'")
except zipfile.BadZipFile:
cursor.execute(f"DELETE FROM categories WHERE category_id == '{row['id']}'")
conn.commit()
for row in cursor.execute("SELECT * FROM series LIMIT 15").fetchall():
print(row)
>>>>>>> 70d0c6876fcc65d16a9ee7764662b7c79d45eea0