|
| 1 | +'''Using a DuckDB database to get ONS Geography data''' |
| 2 | + |
| 3 | +import os |
| 4 | + |
| 5 | +import duckdb |
| 6 | + |
| 7 | +def get_ons_from_postcodes(postcodes): |
| 8 | + '''Get the ONS Geography data from the postcodes''' |
| 9 | + |
| 10 | + # The path is either data/ or /data/ depending on the environment |
| 11 | + db = '/data/ons_postcodes.duckdb' |
| 12 | + if not os.path.exists(db): |
| 13 | + db = 'data/ons_postcodes.duckdb' |
| 14 | + |
| 15 | + conn = duckdb.connect(database=db, read_only=True) |
| 16 | + |
| 17 | + # For all postcodes in the postcode array, remove any whitespace |
| 18 | + postcodes = [postcode.replace(' ', '') for postcode in postcodes] |
| 19 | + |
| 20 | + postcode_list = ','.join(f'\'{p}\'' for p in postcodes) |
| 21 | + |
| 22 | + query = conn.execute( # Not parameterised - fix this |
| 23 | + "select * from vw_postcodes where postcode IN (" + postcode_list + ")") |
| 24 | + |
| 25 | + descriptions = query.description |
| 26 | + |
| 27 | + postcode_data = query.fetchall() |
| 28 | + |
| 29 | + conn.close() |
| 30 | + if len(postcode_data) == 0: |
| 31 | + return None |
| 32 | + |
| 33 | + # Convert the data to a dictionary using the column names |
| 34 | + ons_data = [] |
| 35 | + # Loop through each row returned |
| 36 | + for row in postcode_data: |
| 37 | + ons_data_object = {} |
| 38 | + # Loop through each column in the row |
| 39 | + for i in range(len(row)): |
| 40 | + if descriptions[i][0] not in ['longitude', 'latitude', 'geometry']: |
| 41 | + ons_data_object[descriptions[i][0]] = row[i] |
| 42 | + ons_data.append(ons_data_object) |
| 43 | + |
| 44 | + return ons_data |
0 commit comments