-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathwiesbaden.py
More file actions
234 lines (156 loc) · 7.46 KB
/
wiesbaden.py
File metadata and controls
234 lines (156 loc) · 7.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
import requests;
import pandas as pd;
import csv;
class Genesis:
# dictionary with all sites that uses the genesis system
databases = {
"REGIO":"https://www.regionalstatistik.de/genesisws/"
,"DESTATIS":"https://www-genesis.destatis.de/genesisWS/"
}
def __init__(self, database, username, password):
# assign variales
self.__auth = "username={0}&password={1}".format(username, password)
self.database = database
# check if database exists
if database not in Genesis.databases:
raise ValueError("database is not availables")
# check userdata
## specifcy request components
databaseLink = Genesis.databases[self.database]
method = "rest/2020/helloworld/logincheck?"
additionalParameters = "&language=de"
## generate request uri
uri = databaseLink + method + self.__auth + additionalParameters
## query data
response = requests.get(uri)
## check result
if 'Fehler' in response.json()["Status"]:
raise ValueError("the username and/or password is incorrect")
# set pandas print options (the whole text have to be visible)
pd.set_option("display.max_colwidth", None)
def search_datacube(self, term):
"""search_datalist searches cubes with terms"""
# specifcy request components
databaseLink = Genesis.databases[self.database]
method = "rest/2020/find/find?"
searchObject = "&term=" + term
additionalParameters = "&category=cubes&pagelength=2500"
# generate request uri
uri = databaseLink + method + self.__auth + searchObject + additionalParameters
# query data
response = requests.get(uri)
# convert to data frame
df = pd.DataFrame(response.json()["Cubes"])
# return data frame
return df
def retrieve_datalist(self, tableseries):
"""retrieve_datalist retrieves a list of available data tables in a series"""
# specifcy request components
databaseLink = Genesis.databases[self.database]
method = "rest/2020/catalogue/cubes?"
searchObject = "&selection=" + tableseries
additionalParameters = "&pagelength=2500"
# generate request uri
uri = databaseLink + method + self.__auth + searchObject + additionalParameters
# query data
response = requests.get(uri)
# convert to data frame
df = pd.DataFrame(response.json()["List"])
# return data frame
return df
def retrieve_valuelabel(self, variablename):
"""retrieve_valuelabel retrieves value labels for variable"""
# specifcy request components
databaseLink = Genesis.databases[self.database]
method = "rest/2020/catalogue/values2variable?"
searchObject = "&name=" + variablename
additionalParameters = "&pagelength=25000"
# generate request uri
uri = databaseLink + method + self.__auth + searchObject + additionalParameters
# query data
response = requests.get(uri)
# convert to data frame
df = pd.DataFrame(response.json()["List"])
# return data frame
return df
def retrieve_metadata(self, tableseries):
"""retrieve_metadata retrieves meta data"""
# specifcy request components
databaseLink = Genesis.databases[self.database]
method = "rest/2020/metadata/cube?"
searchObject = "&name=" + tableseries
additionalParameters = "&pagelength=2500"
# generate request uri
uri = databaseLink + method + self.__auth + searchObject + additionalParameters
# query data
response = requests.get(uri)
# extract dimension and save result in list objects
objects = response.json()["Object"]["Structure"]["Axis"]
# extract measures and extend list objects
objects.extend(response.json()["Object"]["Structure"]["Contents"])
# convert objects to data frame
df = pd.DataFrame(objects)
# return data frame
return df
def retrieve_data(self, tableseries):
"""retrieve_data retrieves a single data cube"""
# ===============================
# Part 1 - retrieve column names
# ===============================
# specifcy request components
databaseLink = Genesis.databases[self.database]
method = "rest/2020/metadata/cube?"
searchObject = "&name=" + tableseries
additionalParameters = "&pagelength=2500"
# generate request uri
uri = databaseLink + method + self.__auth + searchObject + additionalParameters
# query metadata
response = requests.get(uri)
# list with all column names
columns = []
# list with numeric columns --> for type conversion needed
numericColumns = []
# first column = dummy column --> set cube id as name
columns.append('id'+response.json()["Object"]["Statistic"]["Code"])
# extract dimension column names and extend column list
for axis_object in response.json()["Object"]["Structure"]["Axis"]:
columns.append(axis_object["Code"])
# extract measure names and extend column list
for conent_object in response.json()["Object"]["Structure"]["Contents"]:
# the data output contains 4 columns per measure --> column _val is relevant
for var in ["_val","_qual","_lock","_err"]:
columns.append(conent_object["Code"]+var)
if var == "_val":
numericColumns.append(conent_object["Code"]+var)
# ===============================
# Part 2 - retrieve data values
# ===============================
# specifcy request components
databaseLink = Genesis.databases[self.database]
method = "rest/2020/data/cube?"
searchObject = "&name=" + tableseries
# generate request uri
uri = databaseLink + method + self.__auth + searchObject
# query metadata
response = requests.get(uri)
# convert content in nested lists
str = response.json()["Object"]["Content"]
content = list(csv.reader(response.json()["Object"]["Content"].splitlines(),delimiter=';'))
# delete header rows with metadata
# the last header row contains the word 'QEI'
firstRowData = 0
for row in content:
firstRowData+=1
if 'QEI' in row:
break
del content[:firstRowData]
# ===============================
# Part 3 - generate data frame
# ===============================
# convert objects to data frame and return result
df = pd.DataFrame(content, columns=columns)
# convert numeric columns to appropriate data type
for col in numericColumns:
df[col]=pd.to_numeric(df[col])
# return result
return df