-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathget_usbr_webdata.py
More file actions
executable file
·206 lines (181 loc) · 5.78 KB
/
get_usbr_webdata.py
File metadata and controls
executable file
·206 lines (181 loc) · 5.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
#!/usr/local/bin/python
import datetime, os, re, sys, time, requests
from datetime import timedelta
from requests.packages.urllib3.exceptions import InsecureRequestWarning
helpStr = '''
get_usbr_webdata v1.4.0
12 December 2016
POC: gunnar.a.leffler@usace.army.mil
USAGE:
get_usbr_webdata {daily|realtime} <lookback window> <stationlist>
EXAMPLES:
get last 2 days hours of realtime data:
get_usbr_webdata realtime 24h stations.realtime.list
get_usbr_webdata realtime 2 stations.list
get last weeks worth of daily data:
get_usbr_webdata daily 1w stations.daily.list
get_usbr_webdata daily 7 stations.list
'''
service = { "daily":"webarccsv.pl",
"realtime":"instant.pl",
"realtime2":"webdaycsv.pl",
}
nodataset = { "MISSING" : 0,
"NO RECORD" : 0,
"" : 0
}
debug = False
def div1000( s ):
output = ""
try:
output = str( float( s ) / 1000 )
except:
pass
return output
def help ():
print helpStr
def readAliasFile( path ): #reads an alias file and returns a dictionary
csv = readTSV( path )
alias = []
for line in csv:
alias.append( ( line.pop( 0 ), line ) )
return dict( alias )
def readTSV( path ):
lines = ( line.rstrip( '\n' ) for line in open( path, "r" ) )
output = []
for s in lines:
if len(s) > 1 and s[0] != '#': # ignore blank lines
row1 = s.split( '\t' )
output.append( row1 )
return output
def TD (input):
'''TD takes a relative time and turns it into a timedelta
input format: 1w7d6h9m'''
input = input.lower()
output = datetime.timedelta(seconds = 0)
t = ""
try:
for c in input:
if c =="w":
output += datetime.timedelta(weeks=float(t))
t = ""
elif c =="Y":
output += datetime.timedelta(days=float(t)*365)
t = ""
elif c =="d":
output += datetime.timedelta(days=float(t))
t = ""
elif c =="h":
output += datetime.timedelta(hours=float(t))
t = ""
elif c =="m":
output += datetime.timedelta(minutes=float(t))
t = ""
else:
if c != " ":
t += c
if output.total_seconds() == 0: #defaulting to days
output += datetime.timedelta(days=float(t))
except:
status = "Could not parse"+input+"3 into a time interval, defaulting to 3 days"
output = datetime.timedelta(days=7)
return output
# Removes cruft from scraped input
def stripGarbage( input ):
output = ""
if input[0] == "-":
output = "-"
for c in input:
if c.isdigit() or c == ".":
output += c
return output
def processInput( type, buffer ):
format = { "daily":"%m/%d/%Y",
"realtime":"%m/%d/%Y %H:%M" }
lines = buffer.split( '\n' )
flag = 0
output = []
errline = ""
for s in lines:
s = s.strip()
if "END DATA" in s:
flag = 0
if len(s) > 1 and flag > 1: #if line not blank or header / footer
try:
tokens = s.split(',')
tokens[1] = tokens[1].strip()
tokens[1].replace("Edited","") #USBR's new webservice appends "Edited" to QCd data
if tokens[1] not in nodataset:
output.append([datetime.datetime.strptime(tokens[0], format[type]),tokens[1]])
else :
if debug == True: print >> sys.stderr, errline+"\t"+s
except:
pass
if "BEGIN DATA" in s:
flag = 1
if "DATE" in s or "DATE TIME" in s and flag == 1:
errline = s
flag += 1
return output
def makeSHEF( type, locID, timeObj, tz, PEcode, value ):
output = ".A " + locID + " " + timeObj.strftime( "%Y%m%d" ) + " " + tz
if type == "daily":
output += " DH24/"
elif type == "realtime":
output += " DH" + timeObj.strftime( "%H%M" ) + "/DUE /"
output += PEcode + " " + value
return output
def populateURL( type, location, pecode, lookback ):
et = datetime.datetime.now()
#st = et - timedelta( days = int( lookback ) )
st = et - TD( lookback )
url = ( "https://www.usbr.gov/pn-bin/%s?parameter=%s%%20%s&syer=%s&"
"smnth=%s&sdy=%s&eyer=%s&emnth=%s&edy=%s" % ( service[type],
location,
pecode,
st.strftime( "%Y" ),
st.strftime( "%m" ),
st.strftime( "%d" ),
et.strftime( "%Y" ),
et.strftime( "%m" ),
et.strftime( "%d" ) ) )
if debug == True: print >> sys.stderr, url
return url
def getData( type, lookback, station_file ):
alias = readAliasFile( type + ".alias" )
for line in readTSV( station_file ):
try:
station, param, tz = line[0:3]
except:
station, param = line[0:2]
tz = "P"
if len(tz) != 1: tz = "P"
if param in alias:
pe, dtsep = alias[param][0:2]
url = populateURL( type, station, param, lookback )
input = processInput( type, requests.get( url, verify = False ).text )
if len (input) < 1 and type == "realtime":
url =url.replace(service["realtime"],service["realtime2"])
if debug == True:
print >> sys.stderr, "No data found trying alternate service:\n"+url
input = processInput( type, requests.get( url, verify = False ).text )
for n in input:
timestamp, value = n[0:2]
value = stripGarbage( value )
# SHEFIT -2 can't handle large numbers, so convert LS from af to kaf
if pe == "LS":
value = div1000( value )
print makeSHEF( type, station, timestamp, tz, pe + dtsep, value )
###############################################################################
# Entry Point
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
if len( sys.argv ) > 4:
if sys.argv[4] == "debug": debug = True
if len( sys.argv ) > 3:
type, lookback, station_file = sys.argv[1:4]
if re.match( r'(daily|realtime)', type ):
getData( type, lookback, station_file )
else:
help()
else:
help()