-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathspatial_join_areas.py
More file actions
245 lines (184 loc) · 12.1 KB
/
spatial_join_areas.py
File metadata and controls
245 lines (184 loc) · 12.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
import psycopg2
from psycopg2 import sql
def create_database_connection(db_params):
"""
Establish database connection to postgres
"""
connection = psycopg2.connect("dbname=%s user=%s port=%s host=%s password=%s" %(db_params['database_name'], db_params['user'], db_params['port'], db_params['host'], db_params['password']))
return connection
def check_data_exists(database_connection, data_name):
"""
Check the named dataset exists. Returns True if exists, False if not.
"""
# create cursor to access database
cursor = database_connection.cursor()
# run query to see if dataset exists
cursor.execute(sql.SQL('SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_name = {});').format(sql.Literal(data_name)))
res = cursor.fetchall()[0][0]
cursor.close()
return res
def check_fields_to_join(fields_to_join):
"""Check which fields have been passed to be joined
"""
if 'oa' in fields_to_join: oa = True
else: oa = False
if 'lad' in fields_to_join: lad = True
else: lad = False
if 'gor' in fields_to_join: gor = True
else: gor = False
return oa, lad, gor
def get_srid(database_connection, table, geom_field):
"""Get the SRID of a dataset
"""
# create cursor to access database
cursor = database_connection.cursor()
# run query to get srid
cursor.execute(sql.SQL('SELECT srid FROM public.geometry_columns WHERE f_table_name={};').format(sql.Literal(table)))
# fetch query result
res = cursor.fetchall()
# if more than one row returned, return an error
if len(res) > 1:
cursor.execute(sql.SQL('SELECT srid FROM public.geometry_columns WHERE f_table_name={} and f_geometry_column={};').format(sql.Literal(table), sql.Literal(geom_field)))
# fetch query result
res = cursor.fetchall()
if len(res) > 1:
return 'More than one table with the same name: %s' % res
# convert returned srid into an integer
srid = int(res[0][0])
# close cursor
cursor.close()
# return
return srid
def check_the_srid_of_the_data(database_connection, dataset_name, dataset_geom_field, join_dataset, join_dataset_geom_field):
"""Check the SRID of both geometries is the same
"""
# get srid of dataset
srid_data = get_srid(database_connection, dataset_name, dataset_geom_field)
# check the returned srid is not an error string
if isinstance(srid_data, str):
return False, srid_data
# get srid of areas to join
srid_join_areas = get_srid(database_connection, join_dataset, join_dataset_geom_field)
# check the returned srid is not an error string
if isinstance(srid_data, str):
return False, srid_data
# compare srid's - return an error if they are not equal
if srid_data != srid_join_areas:
return False, 'The SRIDs for the two datasets do not match - they are required to.'
return True
def main(database_connection=None, connection_parameters=None, dataset='', join_multiple_areas=True, areas_to_join='', join_data_is_areas=True, fields_to_join=[], dataset_id_field='gid', join_areas_id_field='gid', dataset_geom_field='geom', join_areas_geom_field='geom'):
"""
Run the spatial join over two tables existing in a postgres database.
fields_to_join:
- [{name:xxx, type:xxx},{name:yyy, type:yyy}]
"""
# check if a connection has been passed
if database_connection is None:
# if no connection, check database parameters has been passed
if connection_parameters is None:
# return an error to the user if parameters haven't been passed either
return False, 'No database connection passed or database connection parameters. At least one should be sent.'
# create a database connection using the passed parameters
database_connection = create_database_connection(connection_parameters)
# allow all calls to be committed when they are run
database_connection.autocommit = True
# check the two datasets exist
exists = check_data_exists(database_connection, dataset)
if not exists:
return "Error! Could not fine the dataset %s" % dataset
exists = check_data_exists(database_connection, areas_to_join)
if not exists:
return "Error! Could not fine the join dataset %s" % areas_to_join
# check the two datasets have matching srid's
matching_srids = check_the_srid_of_the_data(database_connection, dataset, dataset_geom_field, areas_to_join, join_areas_geom_field)
# if false, return an error to the user
if matching_srids is not True:
# get the length of the matching_srids - if greater than 1 it includes an error string
if len(matching_srids) > 1:
# An problem has been found when checking the SRIDs of the datasets
return 'Error!. %s' % matching_srids[1]
else:
# a generic error has occurred
return 'The two datasets have different SRIDs. Please correct this and try again'
# check to see what fields have been passed
oa, lad, gor = check_fields_to_join(fields_to_join)
# create cursor to access database
cursor = database_connection.cursor()
temp_table = '___temp'
# delete temp table if it exists
cursor.execute(sql.SQL("DROP TABLE IF EXISTS {};").format(sql.Identifier(temp_table)))
if join_multiple_areas:
# run join
if join_data_is_areas:
if lad is True and gor is True and oa is False:
cursor.execute(sql.SQL('SELECT b.{0} as gid, ARRAY_AGG(st_area(st_intersection(b.{1}, t.{2})) / st_area(b.{3})) as coverage, ARRAY_AGG(distinct t.geo_code) as lads, ARRAY_AGG(distinct t.geo_code_gor) as gors INTO {9} FROM ftables.{4} t, {5} b WHERE st_intersects(b.{6}, t.{7}) GROUP BY b.{8};').format(
sql.Identifier(dataset_id_field), # 0
sql.Identifier(dataset_geom_field), # 1
sql.Identifier(join_areas_geom_field), # 2
sql.Identifier(dataset_geom_field), # 3
sql.Identifier(areas_to_join), # 4
sql.Identifier(dataset), # 5
sql.Identifier(dataset_geom_field), # 6
sql.Identifier(join_areas_geom_field), # 7
sql.Identifier(dataset_id_field), # 8
sql.Identifier(temp_table)), # 9
[])
elif lad is True and gor is True and oa is True:
cursor.execute(sql.SQL('SELECT b.{0} as gid, ARRAY_AGG(st_area(st_intersection(b.{1}, t.{2})) / st_area(b.{3})) as coverage, ARRAY_AGG(distinct t.oa_code) as oas, ARRAY_AGG(distinct t.geo_code) as lads, ARRAY_AGG(distinct t.geo_code_gor) as gors INTO {9} FROM ftables.{4} t, {5} b WHERE st_intersects(b.{6}, t.{7}) GROUP BY b.{8};').format(
sql.Identifier(dataset_id_field), # 0
sql.Identifier(dataset_geom_field), # 1
sql.Identifier(join_areas_geom_field), # 2
sql.Identifier(dataset_geom_field), # 3
sql.Identifier(areas_to_join), # 4
sql.Identifier(dataset), # 5
sql.Identifier(dataset_geom_field), # 6
sql.Identifier(join_areas_geom_field), # 7
sql.Identifier(dataset_id_field), # 8
sql.Identifier(temp_table)), # 9
[])
else:
#cursor.execute(sql.SQL('SELECT b.{0} as gid INTO {2} FROM {1} b;').format(sql.Identifier('gid'), sql.Identifier(dataset), sql.Identifier('_temp')))
if lad is True and gor is True and oa is False:
cursor.execute(sql.SQL('SELECT b.{0} as gid, ARRAY_AGG(distinct t.geo_code) as lads, ARRAY_AGG(distinct t.geo_code_gor) as gors INTO {5} FROM ftables.{1} t, {2} b WHERE st_intersects(b.{3}, t.{4} GROUP BY b.{6};').format(sql.Identifier(dataset_id_field), sql.Identifier(areas_to_join), sql.Identifier(dataset), sql.Identifier(dataset_geom_field), sql.Identifier(join_areas_geom_field), sql.Identifier(temp_table), sql.Identifier(dataset_id_field)))
elif lad is True and gor is True and oa is True:
cursor.execute(sql.SQL('SELECT b.{0} as gid, ARRAY_AGG(distinct t.oa_code) as oas, ARRAY_AGG(distinct t.geo_code) as lads, ARRAY_AGG(distinct t.geo_code_gor) as gors INTO {5} FROM ftables.{1} t, {2} b WHERE st_intersects(b.{3}, t.{4} GROUP BY b.{6};').format(sql.Identifier(dataset_id_field), sql.Identifier(areas_to_join), sql.Identifier(dataset), sql.Identifier(dataset_geom_field), sql.Identifier(join_areas_geom_field), sql.Identifier(temp_table), sql.Identifier(dataset_id_field)))
# create fields in dataset to join areas to
#for field in fields_to_join:
cursor.execute(sql.SQL('ALTER TABLE {} ADD IF NOT EXISTS lads character varying[];').format(sql.Identifier(dataset)))
cursor.execute(sql.SQL('ALTER TABLE {} ADD IF NOT EXISTS gors character varying[];').format(sql.Identifier(dataset)))
# add field for coverage values if join involves areas
if join_data_is_areas:
cursor.execute(sql.SQL('ALTER TABLE {} ADD IF NOT EXISTS lad_coverage float[];').format(sql.Identifier(dataset)))
# create index on the ids to speed up the update
#temp = temp_table+'_gid_idx'
cursor.execute(sql.SQL('CREATE INDEX {0} ON {1} USING btree (gid);').format(sql.Identifier(temp_table+'_gid_idx'), sql.Identifier(temp_table)), [])
cursor.execute(sql.SQL('CREATE INDEX IF NOT EXISTS {0} ON {1} USING btree (gid);').format(sql.Identifier(dataset+'_gid_idx'), sql.Identifier(dataset)), [])
# run the update
cursor.execute(sql.SQL('UPDATE {0} as a SET lads = b.lads, gors = b.gors FROM {1} b WHERE a.{2} = b.gid;').format(sql.Identifier(dataset), sql.Identifier(temp_table), sql.Identifier(dataset_id_field)))
# remote the temp tables
#cursor.execute(sql.SQL('DROP INDEX IF EXISTS {0};').format(sql.Identifier(temp_table+'_gid_idx')), [])
#cursor.execute(sql.SQL('DROP TABLE IF EXISTS {0};').format(sql.Identifier(temp_table)))
# create index on new fields which are now populated
cursor.execute(sql.SQL('CREATE INDEX IF NOT EXISTS {0} ON {1} USING gin(lads);').format(sql.Identifier(dataset+'_lads_idx'), sql.Identifier(dataset)), [])
cursor.execute(sql.SQL('CREATE INDEX IF NOT EXISTS {0} ON {1} USING gin(gors);').format(sql.Identifier(dataset+'_gor_idx'), sql.Identifier(dataset)), [])
else:
# if join result is to find a single area rather than multiple
# create fields in dataset to join areas to
# for field in fields_to_join:
if oa:
cursor.execute(sql.SQL('ALTER TABLE {} ADD IF NOT EXISTS oa character varying;').format(sql.Identifier(dataset)))
if lad:
cursor.execute(sql.SQL('ALTER TABLE {} ADD IF NOT EXISTS lad character varying;').format(sql.Identifier(dataset)))
if gor:
cursor.execute(sql.SQL('ALTER TABLE {} ADD IF NOT EXISTS gor character varying;').format(sql.Identifier(dataset)))
# run spatial join
if lad is True and gor is True and oa is False:
cursor.execute(sql.SQL('UPDATE {0} a SET lad = b.lad_code, gor = b.gor_code FROM ftables.{1} b WHERE ST_Intersects(b.{2}, a.geom);').format(sql.Identifier(dataset), sql.Identifier(areas_to_join), sql.Identifier(dataset_geom_field)))
elif lad is True and gor is True and oa is True:
cursor.execute(sql.SQL('UPDATE {0} a SET oa = b.oa_code, lad = b.lad_code, gor = b.gor_code FROM ftables.{1} b WHERE ST_Intersects(b.{2}, a.geom);').format(sql.Identifier(dataset), sql.Identifier(areas_to_join), sql.Identifier(dataset_geom_field)))
# delete temp table if it exists
cursor.execute(sql.SQL("DROP TABLE IF EXISTS {};").format(sql.Identifier(temp_table)))
# close connection an cursor
cursor.close()
database_connection.close()
return True