Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Change log

### 0.3.3 - 2026-01-15
- Clarified unsupported-file messaging to show allowed patterns as `*.{edges, nodes, points, lines, zones, polygons}.geojson`.
- Structure-level validation errors now report the uploaded filename (ZIP) instead of the temp extraction directory; added coverage.
- Added tests for the additional-properties hint and refreshed the unit test overview counts.
- Fixed [Task-2817](https://dev.azure.com/TDEI-UW/TDEI/_workitems/edit/2817)
- Fixed [Issue-2844](https://dev.azure.com/TDEI-UW/TDEI/_workitems/edit/2844)

### 0.3.2 - 2025-12-15
- Reject non-OSW GeoJSON files (anything beyond points/nodes/edges/lines/polygons/zones) with a clear error; added unit coverage for unsupported files and extension validation paths.
- Clarified extension handling expectations: external extension ZIPs are now considered invalid inputs in tests.
Expand Down
13 changes: 10 additions & 3 deletions src/python_osw_validation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@
from .zipfile_handler import ZipFileHandler
from .extracted_data_validator import ExtractedDataValidator, OSW_DATASET_FILES
from .version import __version__
from .helpers import _feature_index_from_error, _pretty_message, _rank_for
from .helpers import (
_add_additional_properties_hint,
_feature_index_from_error,
_pretty_message,
_rank_for,
)

SCHEMA_PATH = os.path.join(os.path.dirname(__file__), 'schema')
DEFAULT_DATASET_SCHEMAS = {
Expand Down Expand Up @@ -211,9 +216,10 @@ def validate(self, max_errors=20) -> ValidationResult:
# Validate the folder structure
validator = ExtractedDataValidator(self.extracted_dir)
if not validator.is_valid():
upload_name = os.path.basename(self.zipfile_path) if self.zipfile_path else self.extracted_dir
self.log_errors(
message=validator.error,
filename=self.extracted_dir,
filename=upload_name,
feature_index=None
)
return ValidationResult(False, self.errors, self.issues)
Expand Down Expand Up @@ -526,7 +532,8 @@ def validate_osw_errors(self, file_path: str, max_errors: int) -> bool:
for err in validator.iter_errors(geojson_data):
# legacy list (for backward compatibility)
if legacy_count < max_errors:
self.errors.append(f'Validation error: {getattr(err, "message", "")}')
raw_msg = _add_additional_properties_hint(getattr(err, "message", "") or "")
self.errors.append(f"Validation error: {raw_msg}")
legacy_count += 1
else:
# We've reached the legacy cap; stop work to match original performance
Expand Down
3 changes: 2 additions & 1 deletion src/python_osw_validation/extracted_data_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,9 @@ def is_valid(self) -> bool:
)
if unsupported_files:
allowed_fmt = ", ".join(allowed_keys)
allowed_names = f"*.{{{allowed_fmt}}}.geojson"
self.error = (f"Unsupported .geojson files present: {', '.join(unsupported_files)}. "
f"Allowed file types are {{{allowed_fmt}}}")
f"Allowed file names are {allowed_names}")
return False

required_files = [key for key, value in OSW_DATASET_FILES.items() if value['required']]
Expand Down
19 changes: 16 additions & 3 deletions src/python_osw_validation/helpers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,18 @@
from typing import Optional
import re

_ADDITIONAL_PROPERTIES_RE = re.compile(
r"Additional properties are not allowed \('(?P<tag>[^']+)' was unexpected\)"
)


def _add_additional_properties_hint(msg: str) -> str:
match = _ADDITIONAL_PROPERTIES_RE.search(msg)
if not match:
return msg
tag = match.group("tag")
return f"{msg}. If you want to carry this tag, change it to ext:{tag}"

def _feature_index_from_error(err) -> Optional[int]:
"""
Return the index after 'features' in the instance path, else None.
Expand Down Expand Up @@ -46,7 +58,7 @@ def _pretty_message(err, schema) -> str:
kind = _err_kind(err)

if kind == "Enum":
return _clean_enum_message(err)
return _add_additional_properties_hint(_clean_enum_message(err))

if kind == "AnyOf":
# Follow schema_path to the anyOf node; union of 'required' keys in branches.
Expand All @@ -73,12 +85,13 @@ def crawl(node):

if required:
props = ", ".join(sorted(required))
return f"must include one of: {props}"
return _add_additional_properties_hint(f"must include one of: {props}")
except Exception:
pass

# Default: first line from library message
return (getattr(err, "message", "") or "").split("\n")[0]
default_msg = (getattr(err, "message", "") or "").split("\n")[0]
return _add_additional_properties_hint(default_msg)


def _rank_for(err) -> tuple:
Expand Down
2 changes: 1 addition & 1 deletion src/python_osw_validation/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.3.2'
__version__ = '0.3.3'
2 changes: 1 addition & 1 deletion tests/unit_tests/test_extracted_data_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def test_unsupported_files_are_rejected(self):
self.assertEqual(
validator.error,
'Unsupported .geojson files present: something_else.geojson. '
'Allowed file types are {edges, nodes, points, lines, zones, polygons}'
'Allowed file names are *.{edges, nodes, points, lines, zones, polygons}.geojson'
)


Expand Down
18 changes: 18 additions & 0 deletions tests/unit_tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,18 @@ def __init__(self, instance_path=None, kind=None, validator=None, message="", sc
self.schema_path = schema_path if schema_path is not None else []


# ----- tests for _add_additional_properties_hint ------------------------------
class TestAddAdditionalPropertiesHint(unittest.TestCase):
def test_appends_ext_hint_for_unexpected_tag(self):
msg = "Additional properties are not allowed ('foo' was unexpected)"
expected = "Additional properties are not allowed ('foo' was unexpected). If you want to carry this tag, change it to ext:foo"
self.assertEqual(helpers._add_additional_properties_hint(msg), expected)

def test_leaves_other_messages_unchanged(self):
msg = "value must be one of [a, b, c]"
self.assertEqual(helpers._add_additional_properties_hint(msg), msg)


# ----- tests for _feature_index_from_error ------------------------------------
class TestFeatureIndexFromError(unittest.TestCase):
def test_feature_index_present(self):
Expand Down Expand Up @@ -98,6 +110,12 @@ def test_default_first_line_from_message(self):
e = FakeErr(kind=None, validator=None, message="first line only\nsecond line ignored")
self.assertEqual(helpers._pretty_message(e, schema={}), "first line only")

def test_additional_properties_hint_is_applied(self):
msg = "Additional properties are not allowed ('bar' was unexpected)"
e = FakeErr(kind=None, validator=None, message=msg)
expected = "Additional properties are not allowed ('bar' was unexpected). If you want to carry this tag, change it to ext:bar"
self.assertEqual(helpers._pretty_message(e, schema={}), expected)


# ----- tests for _rank_for -----------------------------------------------------
class TestRankFor(unittest.TestCase):
Expand Down
18 changes: 18 additions & 0 deletions tests/unit_tests/test_osw_validation_extras.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,24 @@ def _fake_validator(self, files, external_exts=None, valid=True, error="folder i

# ---------------- tests ----------------

def test_structure_error_uses_uploaded_filename(self):
"""Validator errors should reference the uploaded ZIP, not the temp extraction dir."""
with patch(_PATCH_ZIP) as PZip, patch(_PATCH_EV) as PVal:
z = MagicMock()
z.extract_zip.return_value = "/tmp/tmp123"
z.remove_extracted_files.return_value = None
PZip.return_value = z

PVal.return_value = self._fake_validator(files=[], valid=False, error="bad structure")

upload_path = "/uploads/user_dataset.zip"
res = OSWValidation(zipfile_path=upload_path).validate()

self.assertFalse(res.is_valid)
issue = res.issues[0]
self.assertEqual(issue["filename"], os.path.basename(upload_path))
self.assertIn("bad structure", issue["error_message"])

def test_missing_u_id_reports_error_without_keyerror(self):
"""Edges missing `_u_id` should report a friendly error instead of raising KeyError."""
fake_files = ["/tmp/nodes.geojson", "/tmp/edges.geojson"]
Expand Down
16 changes: 9 additions & 7 deletions unit_test_overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,21 @@

Below is a breakdown of all test scenarios (functions named `test_`) across the suite, plus a short description of the main areas they cover.

- **Total scenarios:** 116
- **Total scenarios:** 120
- **Unit test subset (tests/unit_tests) scenarios:** 106 (matches `coverage run --source=src/python_osw_validation -m unittest discover -v tests/unit_tests`)

| Test module | Scenarios | Focus |
| --- | ---: | --- |
| tests/test_schema_parity.py | 14 | Schema parity and customized-field handling |
| tests/unit_tests/test_extracted_data_validator.py | 11 | Folder/filename validation, duplicates/missing files, unsupported files |
| tests/unit_tests/test_helpers.py | 14 | Helper utilities (pretty messages, rankings, feature index extraction) |
| tests/unit_tests/test_helpers.py | 17 | Helper utilities (pretty messages, rankings, feature index extraction, additional-properties hint) |
| tests/unit_tests/test_osw_validation.py | 34 | End-to-end ZIP validation across assets (valid/invalid, schemas, serialization, unmatched IDs) |
| tests/unit_tests/test_osw_validation_extras.py | 34 | Edge cases: foreign-key checks, duplicate IDs, invalid geometries, extension file failures, JSON/OS errors, 0.2 disallowed content, unexpected exceptions |
| tests/unit_tests/test_osw_validation_extras.py | 35 | Edge cases: foreign-key checks, duplicate IDs, invalid geometries, extension file failures, JSON/OS errors, 0.2 disallowed content, unexpected exceptions, structure error messaging |
| tests/unit_tests/test_schema_definitions.py | 1 | Schema definition smoke check |
| tests/unit_tests/test_schema_metadata.py | 3 | Metadata/heuristic schema selection |
| tests/unit_tests/test_zipfile_handler.py | 5 | Zip extraction/creation lifecycle |

Method used: counted functions matching `def test_` within `tests/`.
Method used: counted functions matching `def test_` within `tests/` (full suite) and separately validated the `tests/unit_tests` subset via the coverage command above.

## Detail for heavy-hitter suites

Expand All @@ -28,7 +29,7 @@ Method used: counted functions matching `def test_` within `tests/`.
- Serialization error surfaced: `test_invalid_serialization_file`.
- Foreign-key cap: `test_unmatched_ids_limited_to_20`.

### tests/unit_tests/test_osw_validation_extras.py (34 scenarios)
### tests/unit_tests/test_osw_validation_extras.py (35 scenarios)
- Foreign-key checks: `test_missing_u_id_reports_error_without_keyerror`, `test_unmatched_u_id_is_limited_to_20`, `test_unmatched_w_id_is_limited_to_20`.
- JSON/IO parsing: `test_load_osw_file_reports_json_decode_error`, `test_load_osw_file_reports_os_error`, `test_validate_reports_json_decode_error`.
- GeoDataFrame read failures: `test_validate_reports_read_file_exception`.
Expand All @@ -39,6 +40,7 @@ Method used: counted functions matching `def test_` within `tests/`.
- Helpers and selection: `test_pick_schema_*`, `_get_colset*`, `test_load_osw_schema_reports_missing_file`.
- 0.2 disallowed content vs 0.3 allowed: `test_schema_02_rejects_tree_and_custom`, `test_schema_03_with_tree_tags_is_allowed`.
- Robustness: cleanup handling, zip extract failure, invalid folder structure, unexpected exception path (`test_cleanup_handles_locals_membership_error`, `test_zip_extract_failure_bubbles_as_error`, `test_extracted_data_validator_invalid`, `test_unexpected_exception_surfaces_unable_to_validate`, `test_issues_populated_for_invalid_zip`).
- Uploaded-name fidelity: structure errors report the uploaded filename instead of temp extraction dirs (`test_structure_error_uses_uploaded_filename`).

### tests/unit_tests/test_extracted_data_validator.py (11 scenarios)
- Valid layouts at root or nested, empty/invalid directories, and no-geojson detection.
Expand All @@ -51,9 +53,9 @@ Method used: counted functions matching `def test_` within `tests/`.
- 0.2 vs 0.3 parity: custom tags rejected on 0.2 where appropriate; accepted on 0.3.
- Extension fields allowed/blocked per schema version.

### tests/unit_tests/test_helpers.py (14 scenarios)
### tests/unit_tests/test_helpers.py (17 scenarios)
- Feature index extraction robustness across error shapes.
- Pretty-message compaction (enum, anyOf, ordering, trimming noise).
- Pretty-message compaction (enum, anyOf, ordering, trimming noise, additional-properties hint appended).
- Error ranking/tie-breaking and message selection helpers.

### tests/unit_tests/test_schema_definitions.py (1 scenario)
Expand Down