Skip to content

Commit e95eb06

Browse files
authored
Merge pull request #3 from CenterForSpatialResearch/01-layout-rework-anthromes
01 layout rework anthromes
2 parents 4e3fbfc + 325bc3f commit e95eb06

278 files changed

Lines changed: 565431 additions & 5189 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,13 @@ node_modules/
1111
# Processing intermediates
1212
processing/geojson/
1313
processing/unoptimized_backup/
14+
processing/utilities/
1415

1516
#working docs
1617
.claude/
1718
PROJECT_PHASES.MD
1819

19-
archive/
20+
archive/
21+
22+
# Utilities
23+
utilities/

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ twosides/
3434

3535
## Commands
3636

37-
```bash
37+
```powershell
3838
# Install dependencies
3939
npm install
4040

processing/1_extract_geojson.py

Lines changed: 153 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
--skip-dissolve Keep individual cell polygons (no merging by anthrome code);
2121
best if you need per-cell cartogram-style outputs; larger files.
2222
--profile Folder name under processing/geojson for outputs.
23+
--boundaries Path to Natural Earth shapefile for country lookup (optional).
24+
When provided, adds 'country' (ISO3) and 'cellId' to each feature.
2325
"""
2426

2527
import os
@@ -34,14 +36,21 @@
3436
from rasterio.transform import from_origin
3537
from rasterio.warp import reproject
3638
from rasterio.crs import CRS
37-
from shapely.geometry import shape, mapping
39+
from shapely.geometry import shape, mapping, Point, box
3840
from shapely.geometry.polygon import orient
3941
from shapely.ops import unary_union
42+
from shapely import STRtree
43+
import shapefile # pyshp
4044
import warnings
4145

4246
# Suppress rasterio warnings
4347
warnings.filterwarnings('ignore', category=rasterio.errors.NotGeoreferencedWarning)
4448

49+
# Global for country lookup (loaded once)
50+
_country_index = None
51+
_country_geoms = None
52+
_country_codes = None
53+
4554
def parse_args():
4655
parser = argparse.ArgumentParser(description='Extract/dissolve anthrome GeoJSON from GeoTIFFs')
4756
parser.add_argument('--input-dir', type=Path, default=Path('../data/HYDE-3.5/baseline/anthromes_geotiff'),
@@ -58,8 +67,82 @@ def parse_args():
5867
help='Minimum object size (in pixels) to retain during sieve (0 to skip)')
5968
parser.add_argument('--skip-dissolve', action='store_true',
6069
help='Keep individual cell polygons (no dissolve by anthrome code)')
70+
parser.add_argument('--boundaries', type=Path, default=None,
71+
help='Path to Natural Earth shapefile for country lookup (adds cellId and country)')
6172
return parser.parse_args()
6273

74+
75+
def load_country_boundaries(shp_path):
76+
"""
77+
Load Natural Earth shapefile and build spatial index for country lookup.
78+
Returns (STRtree index, list of geometries, list of ISO3 codes).
79+
"""
80+
global _country_index, _country_geoms, _country_codes
81+
82+
if _country_index is not None:
83+
return _country_index, _country_geoms, _country_codes
84+
85+
print(f" Loading country boundaries from {shp_path}...")
86+
sf = shapefile.Reader(str(shp_path))
87+
fields = [f[0] for f in sf.fields[1:]]
88+
89+
# Find ISO_A3 field
90+
iso_idx = fields.index('ISO_A3') if 'ISO_A3' in fields else None
91+
if iso_idx is None:
92+
print(" Warning: ISO_A3 field not found, country lookup disabled")
93+
return None, None, None
94+
95+
geoms = []
96+
codes = []
97+
98+
for rec in sf.shapeRecords():
99+
geom = shape(rec.shape.__geo_interface__)
100+
iso3 = rec.record[iso_idx] if iso_idx is not None else None
101+
if geom.is_valid and iso3:
102+
geoms.append(geom)
103+
codes.append(iso3)
104+
105+
_country_geoms = geoms
106+
_country_codes = codes
107+
_country_index = STRtree(geoms)
108+
109+
print(f" Loaded {len(geoms)} country polygons")
110+
return _country_index, _country_geoms, _country_codes
111+
112+
113+
def compute_cell_id(centroid, transform, num_cols):
114+
"""
115+
Compute deterministic cell ID from centroid coordinates and grid transform.
116+
cellId = row * num_cols + col
117+
"""
118+
origin_x = transform.c # left edge
119+
origin_y = transform.f # top edge
120+
pixel_width = transform.a
121+
pixel_height = -transform.e # negative because y decreases downward
122+
123+
col = int((centroid.x - origin_x) / pixel_width)
124+
row = int((origin_y - centroid.y) / pixel_height)
125+
126+
return row * num_cols + col
127+
128+
129+
def lookup_country(centroid, index, geoms, codes):
130+
"""
131+
Look up country ISO3 code for a point using spatial index.
132+
Returns ISO3 code or None if not found.
133+
"""
134+
if index is None:
135+
return None
136+
137+
# Query spatial index for candidates
138+
candidates = index.query(centroid)
139+
140+
for i in candidates:
141+
if geoms[i].contains(centroid):
142+
return codes[i]
143+
144+
return None
145+
63146
def orient_geometry(geom, sign=-1.0):
64147
"""
65148
Safely orient Polygon/MultiPolygon (and collections) without assuming .exterior exists.
@@ -101,7 +184,8 @@ def resample_categorical(src, target_res):
101184

102185
return destination, dst_transform
103186

104-
def extract_geojson_from_tif(tif_path, output_path, target_res, simplify_tol, sieve_size, skip_dissolve):
187+
def extract_geojson_from_tif(tif_path, output_path, target_res, simplify_tol, sieve_size, skip_dissolve,
188+
country_index=None, country_geoms=None, country_codes=None):
105189
"""
106190
Extract GeoJSON polygons from a single GeoTIFF file with optimization.
107191
@@ -111,13 +195,19 @@ def extract_geojson_from_tif(tif_path, output_path, target_res, simplify_tol, si
111195
Args:
112196
tif_path: Path to input GeoTIFF file
113197
output_path: Path to output GeoJSON file
198+
country_index: Optional STRtree for country lookup
199+
country_geoms: Optional list of country geometries
200+
country_codes: Optional list of ISO3 codes
114201
"""
115202
from collections import defaultdict
116203

117204
with rasterio.open(tif_path) as src:
118205
# Read raster data (optionally resample first)
119206
image, transform = resample_categorical(src, target_res)
120207

208+
# Compute grid dimensions for cellId calculation
209+
num_rows, num_cols = image.shape
210+
121211
# Drop very small slivers before polygonization (optional)
122212
if sieve_size and sieve_size > 0:
123213
image = sieve(image, size=sieve_size, connectivity=8)
@@ -128,27 +218,50 @@ def extract_geojson_from_tif(tif_path, output_path, target_res, simplify_tol, si
128218
features = []
129219

130220
if skip_dissolve:
131-
# Keep individual cell polygons (optionally simplify each cell)
132-
for geom, value in shapes(image, transform=transform):
133-
anthrome_code = int(value)
134-
135-
if anthrome_code == nodata_value:
136-
continue
137-
138-
geom_shape = shape(geom)
139-
if simplify_tol and simplify_tol > 0:
140-
geom_shape = geom_shape.simplify(simplify_tol, preserve_topology=True)
141-
142-
# Normalize winding so D3 interprets polygons, not complements
143-
geom_shape = orient_geometry(geom_shape, sign=-1.0)
144-
145-
features.append({
146-
'type': 'Feature',
147-
'geometry': mapping(geom_shape),
148-
'properties': {
149-
'anthrome': anthrome_code
150-
}
151-
})
221+
# Create individual cell polygons by iterating through the grid
222+
# This guarantees each cell is its own polygon (no merging of same-value neighbors)
223+
origin_x = transform.c # left edge
224+
origin_y = transform.f # top edge
225+
pixel_width = transform.a
226+
pixel_height = -transform.e # positive value (y decreases downward)
227+
228+
for row in range(num_rows):
229+
for col in range(num_cols):
230+
anthrome_code = int(image[row, col])
231+
232+
if anthrome_code == nodata_value:
233+
continue
234+
235+
# Compute cell bounds
236+
min_x = origin_x + col * pixel_width
237+
max_x = min_x + pixel_width
238+
max_y = origin_y - row * pixel_height
239+
min_y = max_y - pixel_height
240+
241+
# Create box polygon for this cell
242+
geom_shape = box(min_x, min_y, max_x, max_y)
243+
244+
# Normalize winding so D3 interprets polygons, not complements
245+
geom_shape = orient_geometry(geom_shape, sign=-1.0)
246+
247+
# Compute cellId
248+
cell_id = row * num_cols + col
249+
250+
# Build properties (shortened names for file size reduction)
251+
props = {'a': anthrome_code, 'i': cell_id}
252+
253+
# Add country if boundary data available
254+
if country_index is not None:
255+
centroid_x = (min_x + max_x) / 2
256+
centroid_y = (min_y + max_y) / 2
257+
country = lookup_country(Point(centroid_x, centroid_y), country_index, country_geoms, country_codes)
258+
props['c'] = country # may be None for ocean cells
259+
260+
features.append({
261+
'type': 'Feature',
262+
'geometry': mapping(geom_shape),
263+
'properties': props
264+
})
152265
else:
153266
# Group geometries by anthrome value
154267
grouped = defaultdict(list)
@@ -180,7 +293,7 @@ def extract_geojson_from_tif(tif_path, output_path, target_res, simplify_tol, si
180293
'type': 'Feature',
181294
'geometry': mapping(simplified),
182295
'properties': {
183-
'anthrome': anthrome_code
296+
'a': anthrome_code
184297
}
185298
})
186299

@@ -224,6 +337,19 @@ def main():
224337
print(" Simplify: off")
225338
if args.sieve_size and args.sieve_size > 0:
226339
print(f" Sieve: drop components smaller than {args.sieve_size} px")
340+
341+
# Load country boundaries if provided
342+
country_index, country_geoms, country_codes = None, None, None
343+
if args.boundaries:
344+
if args.boundaries.exists():
345+
country_index, country_geoms, country_codes = load_country_boundaries(args.boundaries)
346+
if country_index:
347+
print(f" Country lookup: enabled")
348+
else:
349+
print(f" Country lookup: failed to load")
350+
else:
351+
print(f" ⚠️ Boundaries file not found: {args.boundaries}")
352+
227353
print(f" Output: {output_dir}\n")
228354

229355
total_features = 0
@@ -241,6 +367,9 @@ def main():
241367
simplify_tol=args.simplify,
242368
sieve_size=args.sieve_size,
243369
skip_dissolve=args.skip_dissolve,
370+
country_index=country_index,
371+
country_geoms=country_geoms,
372+
country_codes=country_codes,
244373
)
245374
total_features += feature_count
246375

0 commit comments

Comments
 (0)