-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathwsklima_parser.py
More file actions
executable file
·317 lines (267 loc) · 10.1 KB
/
wsklima_parser.py
File metadata and controls
executable file
·317 lines (267 loc) · 10.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
import os
import datetime as dt
import numpy as np
from lxml import etree
from io import StringIO, BytesIO
"""
__author__: kmunve
"""
def parse_get_data(xml_data):
"""
Converts the XML return from a wsklima.getData() call to a Python dictionary.
For more info on wsklima see eklima.met.no.
Structure of the returned Python dictionary
A dictionary with a key for each station:
station_dict['<station-id>']
...containing a dictionary with the following keys:
['index']: list of datetime object of the observation time
['<weather-element>']: dictionary with the following keys:
['val']: list of values at observation time
['q']: list of quality flags corresponding to 'val'
Usage:
Get XML file from eklima.met.no, e.g. by using wsklima_requests.py or
http://eklima.met.no/met/MetService?operation=getMetData
Hand the XML file/string to parse_get_data(xml_data), e.g.
sd = parse_get_data('54110.xml')
:param xml_data: string or file containing XML data
:return: station_dict
"""
station_dict = {} # station dictionary
try:
root = etree.parse(open(xml_data)) # do I need tree.getroot() - see after Exception
except FileNotFoundError:
if isinstance(xml_data, str):
root = etree.fromstring(xml_data)
elif isinstance(xml_data, bytes):
root = etree.parse(BytesIO(xml_data))
else:
print("Please provide a string, file or file object. Got {0}".format(type(xml_data)))
# root = tree.getroot()
# Get all item-tags that are a child of the timeStamp-tag
TSitems = root.xpath('//timeStamp/item')
# loop over all timestamps
for TS in TSitems:
# extract datetime
tstamp = TS.xpath('from')[0].text
# Convert string to datetime
_index = dt.datetime.strptime(tstamp, '%Y-%m-%dT%H:%M:%S.000Z')
# Get all item-tags that are a child of the current location-tag
LOCitmes = TS.xpath('location/item')
for LOC in LOCitmes:
# Retrieve station id
stat_id = LOC.xpath('id')[0].text
# Init new station dict if necessary
if stat_id not in station_dict.keys():
station_dict[stat_id] = {}
station_dict[stat_id]['index'] = []
# Append the time stamp
station_dict[stat_id]['index'].append(_index)
# Get all item-tags that are a child of the weatherElement-tag
WEitems = LOC.xpath('weatherElement/item')
# Loop over the weather elements
for WE in WEitems:
we_id = WE.xpath('id')[0].text # Retrieve parameter name
we_q = np.int(WE.xpath('quality')[0].text) # Retrieve quality flag
we_val = np.float(WE.xpath('value')[0].text) # Retrieve measured value
# Add a new dictionary if the weather element does not exist, yet.
if we_id not in station_dict[stat_id].keys():
station_dict[stat_id][we_id] = {}
station_dict[stat_id][we_id]['val'] = []
station_dict[stat_id][we_id]['q'] = []
# Append value and quality for the current time step
station_dict[stat_id][we_id]['val'].append(we_val)
station_dict[stat_id][we_id]['q'].append(we_q)
return station_dict
def parse_get_stations_properties(xml_data):
#TODO: need to fix the case when a file name is directly passed.
if os.path.isfile(xml_data):
root = etree.parse(xml_data)
# if isinstance(xml_data, str):
# root = etree.fromstring(xml_data)
elif isinstance(xml_data, bytes):
root = etree.parse(BytesIO(xml_data))
else:
print("Please provide a string, file or file object. Got {0}".format(type(xml_data)))
stations = root.xpath('//return/item')
"""
Available properties:
amsl
department
fromDay
fromMonth
fromYear
latDec
latLonFmt
lonDec
municipalityNo
name
stnr
toDay
toMonth
toYear
utm_e
utm_n
utm_zone
wmoNo
"""
stations_dict = {}
for station in stations:
amsl = station.xpath('amsl')[0].text
department = station.xpath('department')[0].text
fromDay = station.xpath('fromDay')[0].text
fromMonth = station.xpath('fromMonth')[0].text
fromYear = station.xpath('fromYear')[0].text
latDec = station.xpath('latDec')[0].text
latLonFmt = station.xpath('latLonFmt')[0].text
lonDec = station.xpath('lonDec')[0].text
municipalityNo = station.xpath('municipalityNo')[0].text
name = station.xpath('name')[0].text
stnr = station.xpath('stnr')[0].text
toDay = station.xpath('toDay')[0].text
toMonth = station.xpath('toMonth')[0].text
toYear = station.xpath('toYear')[0].text
utm_e = station.xpath('utm_e')[0].text
utm_n = station.xpath('utm_n')[0].text
utm_zone = station.xpath('utm_zone')[0].text
wmoNo = station.xpath('wmoNo')[0].text
_insert_stations_dict(stations_dict,
amsl,
department,
fromDay,
fromMonth,
fromYear,
latDec,
latLonFmt,
lonDec,
municipalityNo,
name,
stnr,
toDay,
toMonth,
toYear,
utm_e,
utm_n,
utm_zone,
wmoNo)
return stations_dict
def _insert_stations_dict(stations_dict,
amsl,
department,
fromDay,
fromMonth,
fromYear,
latDec,
latLonFmt,
lonDec,
municipalityNo,
name,
stnr,
toDay,
toMonth,
toYear,
utm_e,
utm_n,
utm_zone,
wmoNo):
stations_dict[stnr] = {}
stations_dict[stnr]['amsl'] = int(amsl)
stations_dict[stnr]['department'] = department
stations_dict[stnr]['fromDay'] = int(fromDay)
stations_dict[stnr]['fromMonth'] = int(fromMonth)
stations_dict[stnr]['fromYear'] = int(fromYear)
stations_dict[stnr]['latDec'] = float(latDec)
stations_dict[stnr]['latLonFmt'] = latLonFmt
stations_dict[stnr]['lonDec'] = float(lonDec)
stations_dict[stnr]['municipalityNo'] = int(municipalityNo)
stations_dict[stnr]['name'] = name
stations_dict[stnr]['stnr'] = int(stnr)
stations_dict[stnr]['toDay'] = int(toDay)
stations_dict[stnr]['toMonth'] = int(toMonth)
stations_dict[stnr]['toYear'] = int(toYear)
stations_dict[stnr]['utm_e'] = int(utm_e)
stations_dict[stnr]['utm_n'] = int(utm_n)
stations_dict[stnr]['utm_zone'] = int(utm_zone)
stations_dict[stnr]['wmoNo'] = int(wmoNo)
def parse_get_elements_from_timeserie_type_station(xml_data):
#TODO: need to fix the case when a file name is directly passed.
# if os.path.isfile(xml_data):
# root = etree.parse(xml_data)
if isinstance(xml_data, str):
root = etree.fromstring(xml_data)
elif isinstance(xml_data, bytes):
root = etree.parse(BytesIO(xml_data))
else:
print("Please provide a string, file or file object. Got {0}".format(type(xml_data)))
elements = root.xpath('//return/item')
"""
Available properties:
- description
- elemCode
- elemGroup
- elemNo
- fromdate
- todate
- language
- name
- unit
"""
elements_dict = {}
for element in elements:
description = element.xpath('description')[0].text
elemCode = element.xpath('elemCode')[0].text
elemGroup = element.xpath('elemGroup')[0].text
elemNo = element.xpath('elemNo')[0].text
fromdate = element.xpath('fromdate')[0].text
todate = element.xpath('todate')[0].text
language = element.xpath('language')[0].text
name = element.xpath('name')[0].text
unit = element.xpath('unit')[0].text
_insert_elements_dict(elements_dict,
description,
elemCode,
elemGroup,
elemNo,
fromdate,
todate,
language,
name,
unit)
return elements_dict
def _insert_elements_dict(elements_dict,
description,
elemCode,
elemGroup,
elemNo,
fromdate,
todate,
language,
name,
unit):
time_format = "%Y-%m-%dT%H:%M:%S.000Z"
elements_dict[elemCode] = {}
elements_dict[elemCode]['description'] = description
elements_dict[elemCode]['elemGroup'] = elemGroup
elements_dict[elemCode]['elemNo'] = int(elemNo)
try:
elements_dict[elemCode]['fromdate'] = dt.datetime.strptime(fromdate, time_format)
except TypeError:
elements_dict[elemCode]['fromdate'] = None
try:
elements_dict[elemCode]['todate'] = dt.datetime.strptime(todate, time_format)
except TypeError:
elements_dict[elemCode]['todate'] = None
elements_dict[elemCode]['language'] = language
elements_dict[elemCode]['name'] = name
elements_dict[elemCode]['unit'] = unit
def _test_parse_get_elements_from_timeserie_type_station():
pass
if __name__ == '__main__':
import pylab
sd = parse_get_data('54110.xml')
print(sd['54110'])
print(sd['12290'])
pylab.plot(sd['12290']['index'], sd['12290']['TA']['val'])
pylab.show()