Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,5 @@
**Action:** Use `db.query(Model.col1, Model.col2)` for read-heavy list endpoints and spatial candidate searches. Note that projected results are immutable `Row` objects, so use `db.query(Model).filter(...).update()` for atomic modifications.

## 2026-02-06 - Spatial Query Optimization
**Learning:** For small distances (e.g., < 1km), the Haversine formula is computationally expensive due to multiple trigonometric calls. An equirectangular approximation (Euclidean distance on scaled lat/lon) is ~4x faster and sufficiently accurate.
**Action:** Use `equirectangular_distance_squared` for filtering points within a small radius in tight loops, handling longitude wrapping at the International Date Line.
**Learning:** For small distances (e.g., < 1km), the Haversine formula is computationally expensive due to multiple trigonometric calls. An equirectangular approximation (Euclidean distance on scaled lat/lon) is ~4x faster and sufficiently accurate for pre-filtering.
**Action:** Use `equirectangular_distance_squared` as a fast pre-filter to identify candidates within radius, then compute accurate Haversine distance only for those candidates. Always handle longitude wrapping at the International Date Line. Return Haversine distances to callers for accurate great-circle measurements.
29 changes: 16 additions & 13 deletions backend/spatial_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,23 @@
from backend.models import Issue


# Earth's mean radius in meters
# Note: We use the mean radius (6371000m) rather than WGS84 equatorial radius (6378137m)
# because it provides better accuracy across all latitudes, not just at the equator.
# This is the standard choice for general geographic distance calculations.
EARTH_RADIUS_METERS = 6371000.0


def get_bounding_box(lat: float, lon: float, radius_meters: float) -> Tuple[float, float, float, float]:
"""
Calculate the bounding box coordinates for a given radius.
Returns (min_lat, max_lat, min_lon, max_lon).
"""
# Earth's radius in meters
R = 6378137.0

# Coordinate offsets in radians
# Prevent division by zero at poles
effective_lat = max(min(lat, 89.9), -89.9)
dlat = radius_meters / R
dlon = radius_meters / (R * math.cos(math.pi * effective_lat / 180.0))
dlat = radius_meters / EARTH_RADIUS_METERS
dlon = radius_meters / (EARTH_RADIUS_METERS * math.cos(math.pi * effective_lat / 180.0))

# Offset positions in decimal degrees
lat_offset = dlat * 180.0 / math.pi
Expand All @@ -35,9 +39,6 @@ def get_bounding_box(lat: float, lon: float, radius_meters: float) -> Tuple[floa
return min_lat, max_lat, min_lon, max_lon


EARTH_RADIUS_METERS = 6371000.0


def haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""
Calculate the great circle distance between two points
Expand Down Expand Up @@ -92,7 +93,8 @@ def find_nearby_issues(
) -> List[Tuple[Issue, float]]:
"""
Find issues within a specified radius of a target location.
Optimized to use equirectangular approximation for filtering.
Uses fast equirectangular approximation for pre-filtering candidates,
then computes accurate Haversine distance for final results.

Args:
issues: List of Issue objects to search through
Expand All @@ -101,7 +103,8 @@ def find_nearby_issues(
radius_meters: Search radius in meters (default 50m)

Returns:
List of tuples (issue, distance_meters) for issues within radius
List of tuples (issue, distance_meters) for issues within radius,
sorted by distance (closest first). Distance is great-circle (Haversine).
"""
nearby_issues = []

Expand All @@ -120,16 +123,16 @@ def find_nearby_issues(
lat_rad = issue.latitude * rad_factor
lon_rad = issue.longitude * rad_factor

# Use fast equirectangular approximation
# Fast pre-filter using squared equirectangular distance
dist_sq = equirectangular_distance_squared(
target_lat_rad, target_lon_rad,
lat_rad, lon_rad,
cos_lat
)

if dist_sq <= radius_sq:
# Calculate actual distance (sqrt of squared distance)
distance = math.sqrt(dist_sq)
# Calculate accurate great-circle distance for candidates that passed filter
distance = haversine_distance(target_lat, target_lon, issue.latitude, issue.longitude)
Copy link
Copy Markdown
Contributor

@cubic-dev-ai cubic-dev-ai Bot Feb 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: Missing radius check after computing Haversine distance. The equirectangular pre-filter can underestimate distances, allowing issues outside the radius to be included. The accurate Haversine distance should be verified against radius_meters before appending.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At backend/spatial_utils.py, line 135:

<comment>Missing radius check after computing Haversine distance. The equirectangular pre-filter can underestimate distances, allowing issues outside the radius to be included. The accurate Haversine distance should be verified against `radius_meters` before appending.</comment>

<file context>
@@ -120,16 +123,16 @@ def find_nearby_issues(
-            # Calculate actual distance (sqrt of squared distance)
-            distance = math.sqrt(dist_sq)
+            # Calculate accurate great-circle distance for candidates that passed filter
+            distance = haversine_distance(target_lat, target_lon, issue.latitude, issue.longitude)
             nearby_issues.append((issue, distance))
 
</file context>
Suggested change
distance = haversine_distance(target_lat, target_lon, issue.latitude, issue.longitude)
distance = haversine_distance(target_lat, target_lon, issue.latitude, issue.longitude)
if distance > radius_meters:
continue
Fix with Cubic

nearby_issues.append((issue, distance))

# Sort by distance (closest first)
Expand Down
41 changes: 41 additions & 0 deletions tests/test_spatial_deduplication.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,44 @@ def test_spatial_utils():

print("✓ Spatial utilities test passed")

def test_international_date_line_handling():
"""Test that longitude wrapping works correctly near the International Date Line"""
print("Testing International Date Line handling...")

# Test case 1: Points near +180/-180 boundary
# Point at 179.9°E and point at -179.9°W should be ~22km apart, not ~35978km
issues = [
Issue(id=1, latitude=0.0, longitude=179.9),
Issue(id=2, latitude=0.0, longitude=-179.9),
]

# Test from eastern side of IDL
nearby_east = find_nearby_issues(issues, 0.0, 179.9, radius_meters=30000)
print(f"Found {len(nearby_east)} issues within 30km from 179.9°E")
assert len(nearby_east) == 2, f"Expected 2 issues (both sides of IDL), got {len(nearby_east)}"

# Verify the cross-IDL distance is calculated correctly
cross_idl_distance = haversine_distance(0.0, 179.9, 0.0, -179.9)
print(f"Cross-IDL distance (179.9 to -179.9): {cross_idl_distance:.2f} meters")
assert cross_idl_distance < 25000, f"Cross-IDL distance should be ~22km, got {cross_idl_distance:.2f}m"

# Test case 2: High latitude near IDL
# At 60°N, longitude degrees are compressed (1° ≈ 55.6km)
issues_high_lat = [
Issue(id=3, latitude=60.0, longitude=179.5),
Issue(id=4, latitude=60.0, longitude=-179.5),
]

nearby_high_lat = find_nearby_issues(issues_high_lat, 60.0, 179.5, radius_meters=60000)
print(f"Found {len(nearby_high_lat)} issues at 60°N within 60km")
assert len(nearby_high_lat) == 2, f"Expected 2 issues at high latitude, got {len(nearby_high_lat)}"

high_lat_distance = haversine_distance(60.0, 179.5, 60.0, -179.5)
print(f"High latitude cross-IDL distance: {high_lat_distance:.2f} meters")
assert 50000 <= high_lat_distance <= 60000, f"High-lat cross-IDL distance should be ~55.6km, got {high_lat_distance:.2f}m"

Comment on lines +95 to +130
Copy link

Copilot AI Feb 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This new IDL test validates find_nearby_issues on an in-memory list, but the production endpoints first pre-filter in SQL using get_bounding_box + longitude >= min_lon AND longitude <= max_lon. Since get_bounding_box doesn’t currently handle International Date Line wrap, the API can still miss cross-IDL candidates even if find_nearby_issues is correct. Add a test that exercises the /api/issues/nearby endpoint (or a unit test for get_bounding_box’s wrap behavior) around ±180° to prevent regressions.

Copilot uses AI. Check for mistakes.
print("✓ International Date Line handling test passed")

def test_deduplication_api():
"""Test the deduplication API endpoints"""
print("Testing deduplication API...")
Expand Down Expand Up @@ -196,6 +234,9 @@ def test_verification_endpoint():
test_spatial_utils()
print()

test_international_date_line_handling()
print()

test_deduplication_api()
print()

Expand Down
205 changes: 205 additions & 0 deletions tests/test_spatial_utils_only.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
"""
Focused tests for spatial utility functions without API dependencies.
"""
import sys
import os

# Add backend to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))

from backend.spatial_utils import (
haversine_distance,
equirectangular_distance_squared,
find_nearby_issues,
EARTH_RADIUS_METERS
)
from backend.models import Issue
import math


def test_haversine_distance():
"""Test the Haversine distance calculation"""
print("Testing Haversine distance...")

# Test case 1: Short distance
distance = haversine_distance(19.0760, 72.8777, 19.0761, 72.8778)
print(f" Short distance: {distance:.2f} meters")
assert 10 <= distance <= 20, f"Expected ~11-15 meters, got {distance}"

# Test case 2: Cross-IDL distance at equator
cross_idl = haversine_distance(0.0, 179.9, 0.0, -179.9)
print(f" Cross-IDL distance (179.9 to -179.9): {cross_idl:.2f} meters")
assert cross_idl < 25000, f"Expected ~22km, got {cross_idl:.2f}m"

# Test case 3: High latitude cross-IDL (at 60°N, 1° longitude ≈ 55.6km)
high_lat = haversine_distance(60.0, 179.5, 60.0, -179.5)
print(f" High-lat cross-IDL distance (60°N): {high_lat:.2f} meters")
assert 50000 <= high_lat <= 60000, f"Expected ~55.6km, got {high_lat:.2f}m"

print("✓ Haversine distance tests passed")


def test_equirectangular_vs_haversine():
"""Test that equirectangular approximation is accurate for small distances"""
print("Testing equirectangular approximation accuracy...")

target_lat, target_lon = 19.0760, 72.8777
rad_factor = math.pi / 180.0
target_lat_rad = target_lat * rad_factor
target_lon_rad = target_lon * rad_factor
cos_lat = math.cos(target_lat_rad)

# Test at various small distances
test_points = [
(19.0761, 72.8778, "~15m"),
(19.0765, 72.8782, "~60m"),
(19.0770, 72.8787, "~140m"),
]

for lat2, lon2, desc in test_points:
haversine_dist = haversine_distance(target_lat, target_lon, lat2, lon2)

lat2_rad = lat2 * rad_factor
lon2_rad = lon2 * rad_factor
equirect_dist_sq = equirectangular_distance_squared(
target_lat_rad, target_lon_rad, lat2_rad, lon2_rad, cos_lat
)
equirect_dist = math.sqrt(equirect_dist_sq)

error_pct = abs(haversine_dist - equirect_dist) / haversine_dist * 100
print(f" {desc}: Haversine={haversine_dist:.2f}m, Equirect={equirect_dist:.2f}m, Error={error_pct:.3f}%")

# For small distances (<200m), error should be negligible (<0.1%)
if haversine_dist < 200:
assert error_pct < 0.1, f"Error too large for {desc}: {error_pct:.3f}%"

print("✓ Equirectangular approximation accuracy tests passed")


def test_international_date_line_handling():
"""Test that longitude wrapping works correctly near the International Date Line"""
print("Testing International Date Line handling...")

# Test case 1: Points near +180/-180 boundary at equator
issues = [
Issue(id=1, latitude=0.0, longitude=179.9),
Issue(id=2, latitude=0.0, longitude=-179.9),
]

# Test from eastern side of IDL
nearby_east = find_nearby_issues(issues, 0.0, 179.9, radius_meters=30000)
print(f" Found {len(nearby_east)} issues within 30km from 179.9°E")
assert len(nearby_east) == 2, f"Expected 2 issues (both sides of IDL), got {len(nearby_east)}"

# Verify distances
for issue, distance in nearby_east:
print(f" Issue {issue.id}: {distance:.2f}m")

# Test case 2: High latitude near IDL (at 60°N, longitude scale is compressed)
issues_high_lat = [
Issue(id=3, latitude=60.0, longitude=179.5),
Issue(id=4, latitude=60.0, longitude=-179.5),
]

nearby_high_lat = find_nearby_issues(issues_high_lat, 60.0, 179.5, radius_meters=60000)
print(f" Found {len(nearby_high_lat)} issues at 60°N within 60km")
assert len(nearby_high_lat) == 2, f"Expected 2 issues at high latitude, got {len(nearby_high_lat)}"

for issue, distance in nearby_high_lat:
print(f" Issue {issue.id}: {distance:.2f}m")
if issue.id == 3:
# Same location as target
assert distance < 100, f"Same location should be ~0m, got {distance:.2f}m"
elif issue.id == 4:
# Verify distance is reasonable (~55.6km across IDL)
assert 50000 <= distance <= 60000, f"Expected ~55.6km, got {distance:.2f}m"

# Test case 3: Verify IDL wrapping doesn't match distant points
issues_wrapped = [
Issue(id=5, latitude=0.0, longitude=179.0),
Issue(id=6, latitude=0.0, longitude=-179.0),
]

# With small radius, shouldn't match across IDL
Copy link
Copy Markdown
Contributor

@cubic-dev-ai cubic-dev-ai Bot Feb 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: Misleading comment contradicts test behavior. The comment says "shouldn't match across IDL" but the test uses a 250km radius and asserts that both issues ARE found (including across IDL). The variable name nearby_small is also misleading since 250km is not small.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At tests/test_spatial_utils_only.py, line 123:

<comment>Misleading comment contradicts test behavior. The comment says "shouldn't match across IDL" but the test uses a 250km radius and asserts that both issues ARE found (including across IDL). The variable name `nearby_small` is also misleading since 250km is not small.</comment>

<file context>
@@ -0,0 +1,205 @@
+        Issue(id=6, latitude=0.0, longitude=-179.0),
+    ]
+    
+    # With small radius, shouldn't match across IDL
+    nearby_small = find_nearby_issues(issues_wrapped, 0.0, 179.0, radius_meters=250000)
+    print(f"  Found {len(nearby_small)} issues within 250km from 179.0°E")
</file context>
Fix with Cubic

nearby_small = find_nearby_issues(issues_wrapped, 0.0, 179.0, radius_meters=250000)
print(f" Found {len(nearby_small)} issues within 250km from 179.0°E")
# Both should be found as they're ~222km apart
assert len(nearby_small) == 2, f"Expected 2 issues, got {len(nearby_small)}"
Comment on lines +123 to +127
Copy link

Copilot AI Feb 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment for this case is contradictory: it says “With small radius, shouldn't match across IDL”, but the radius is 250km and the assertions expect the cross‑IDL point (~222km away) to be included. Please update the comment to reflect what’s being tested (e.g., that cross‑IDL matching occurs only when the radius is large enough).

Copilot uses AI. Check for mistakes.

for issue, distance in nearby_small:
print(f" Issue {issue.id}: {distance:.2f}m")
if issue.id == 5:
assert distance < 100, f"Same location should be ~0m, got {distance:.2f}m"
elif issue.id == 6:
# At equator, 2° ≈ 222km
assert 200000 <= distance <= 230000, f"Cross-IDL should be ~222km, got {distance:.2f}m"

print("✓ International Date Line handling tests passed")


def test_find_nearby_issues():
"""Test the nearby issues finding function"""
print("Testing find_nearby_issues...")

issues = [
Issue(id=1, latitude=19.0760, longitude=72.8777),
Issue(id=2, latitude=19.0761, longitude=72.8778),
Issue(id=3, latitude=19.0860, longitude=72.8877),
]

# Test with 50m radius
nearby = find_nearby_issues(issues, 19.0760, 72.8777, radius_meters=50)
print(f" Found {len(nearby)} nearby issues within 50m")
assert len(nearby) == 2, f"Expected 2 nearby issues, got {len(nearby)}"

# Verify sorting by distance
assert nearby[0][1] <= nearby[1][1], "Issues should be sorted by distance"
print(f" Distances: {[f'{d:.2f}m' for _, d in nearby]}")

# Test with larger radius
nearby_large = find_nearby_issues(issues, 19.0760, 72.8777, radius_meters=2000)
print(f" Found {len(nearby_large)} nearby issues within 2km")
assert len(nearby_large) == 3, f"Expected 3 nearby issues, got {len(nearby_large)}"

print("✓ find_nearby_issues tests passed")


def test_earth_radius_consistency():
"""Test that EARTH_RADIUS_METERS is used consistently"""
print("Testing Earth radius constant consistency...")

# Verify the constant is defined
assert EARTH_RADIUS_METERS == 6371000.0, f"Expected 6371000.0, got {EARTH_RADIUS_METERS}"

# Verify it's being used in haversine
# We can indirectly test by checking if distance calculations are reasonable
distance = haversine_distance(0.0, 0.0, 0.0, 1.0) # 1 degree longitude at equator
expected = EARTH_RADIUS_METERS * math.radians(1.0) # ~111km

# Should be close (within 1%)
error_pct = abs(distance - expected) / expected * 100
print(f" 1° longitude at equator: {distance:.2f}m (expected ~{expected:.2f}m, error {error_pct:.3f}%)")
assert error_pct < 1.0, f"Distance calculation seems incorrect, error: {error_pct:.3f}%"

print("✓ Earth radius consistency tests passed")


if __name__ == "__main__":
print("Running spatial utility tests...\n")

test_haversine_distance()
print()

test_equirectangular_vs_haversine()
print()

test_international_date_line_handling()
print()

test_find_nearby_issues()
print()

test_earth_radius_consistency()
print()

print("All tests passed! ✓")
Loading