diff --git a/CHANGELOG.md b/CHANGELOG.md index 6325fdc..b8ffe38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Change log +### 0.3.3 - 2026-01-15 +- Clarified unsupported-file messaging to show allowed patterns as `*.{edges, nodes, points, lines, zones, polygons}.geojson`. +- Structure-level validation errors now report the uploaded filename (ZIP) instead of the temp extraction directory; added coverage. +- Added tests for the additional-properties hint and refreshed the unit test overview counts. +- Fixed [Task-2817](https://dev.azure.com/TDEI-UW/TDEI/_workitems/edit/2817) +- Fixed [Issue-2844](https://dev.azure.com/TDEI-UW/TDEI/_workitems/edit/2844) + ### 0.3.2 - 2025-12-15 - Reject non-OSW GeoJSON files (anything beyond points/nodes/edges/lines/polygons/zones) with a clear error; added unit coverage for unsupported files and extension validation paths. - Clarified extension handling expectations: external extension ZIPs are now considered invalid inputs in tests. diff --git a/src/python_osw_validation/__init__.py b/src/python_osw_validation/__init__.py index 22a0721..538d56d 100644 --- a/src/python_osw_validation/__init__.py +++ b/src/python_osw_validation/__init__.py @@ -9,7 +9,12 @@ from .zipfile_handler import ZipFileHandler from .extracted_data_validator import ExtractedDataValidator, OSW_DATASET_FILES from .version import __version__ -from .helpers import _feature_index_from_error, _pretty_message, _rank_for +from .helpers import ( + _add_additional_properties_hint, + _feature_index_from_error, + _pretty_message, + _rank_for, +) SCHEMA_PATH = os.path.join(os.path.dirname(__file__), 'schema') DEFAULT_DATASET_SCHEMAS = { @@ -211,9 +216,10 @@ def validate(self, max_errors=20) -> ValidationResult: # Validate the folder structure validator = ExtractedDataValidator(self.extracted_dir) if not validator.is_valid(): + upload_name = os.path.basename(self.zipfile_path) if self.zipfile_path else self.extracted_dir self.log_errors( message=validator.error, - filename=self.extracted_dir, + filename=upload_name, feature_index=None ) return ValidationResult(False, self.errors, self.issues) @@ -526,7 +532,8 @@ def validate_osw_errors(self, file_path: str, max_errors: int) -> bool: for err in validator.iter_errors(geojson_data): # legacy list (for backward compatibility) if legacy_count < max_errors: - self.errors.append(f'Validation error: {getattr(err, "message", "")}') + raw_msg = _add_additional_properties_hint(getattr(err, "message", "") or "") + self.errors.append(f"Validation error: {raw_msg}") legacy_count += 1 else: # We've reached the legacy cap; stop work to match original performance diff --git a/src/python_osw_validation/extracted_data_validator.py b/src/python_osw_validation/extracted_data_validator.py index 52abdf1..8e4ac62 100644 --- a/src/python_osw_validation/extracted_data_validator.py +++ b/src/python_osw_validation/extracted_data_validator.py @@ -104,8 +104,9 @@ def is_valid(self) -> bool: ) if unsupported_files: allowed_fmt = ", ".join(allowed_keys) + allowed_names = f"*.{{{allowed_fmt}}}.geojson" self.error = (f"Unsupported .geojson files present: {', '.join(unsupported_files)}. " - f"Allowed file types are {{{allowed_fmt}}}") + f"Allowed file names are {allowed_names}") return False required_files = [key for key, value in OSW_DATASET_FILES.items() if value['required']] diff --git a/src/python_osw_validation/helpers.py b/src/python_osw_validation/helpers.py index 91e0808..68cc0f3 100644 --- a/src/python_osw_validation/helpers.py +++ b/src/python_osw_validation/helpers.py @@ -1,6 +1,18 @@ from typing import Optional import re +_ADDITIONAL_PROPERTIES_RE = re.compile( + r"Additional properties are not allowed \('(?P[^']+)' was unexpected\)" +) + + +def _add_additional_properties_hint(msg: str) -> str: + match = _ADDITIONAL_PROPERTIES_RE.search(msg) + if not match: + return msg + tag = match.group("tag") + return f"{msg}. If you want to carry this tag, change it to ext:{tag}" + def _feature_index_from_error(err) -> Optional[int]: """ Return the index after 'features' in the instance path, else None. @@ -46,7 +58,7 @@ def _pretty_message(err, schema) -> str: kind = _err_kind(err) if kind == "Enum": - return _clean_enum_message(err) + return _add_additional_properties_hint(_clean_enum_message(err)) if kind == "AnyOf": # Follow schema_path to the anyOf node; union of 'required' keys in branches. @@ -73,12 +85,13 @@ def crawl(node): if required: props = ", ".join(sorted(required)) - return f"must include one of: {props}" + return _add_additional_properties_hint(f"must include one of: {props}") except Exception: pass # Default: first line from library message - return (getattr(err, "message", "") or "").split("\n")[0] + default_msg = (getattr(err, "message", "") or "").split("\n")[0] + return _add_additional_properties_hint(default_msg) def _rank_for(err) -> tuple: diff --git a/src/python_osw_validation/version.py b/src/python_osw_validation/version.py index 73e3bb4..80eb7f9 100644 --- a/src/python_osw_validation/version.py +++ b/src/python_osw_validation/version.py @@ -1 +1 @@ -__version__ = '0.3.2' +__version__ = '0.3.3' diff --git a/tests/unit_tests/test_extracted_data_validator.py b/tests/unit_tests/test_extracted_data_validator.py index f6ee874..35444cf 100644 --- a/tests/unit_tests/test_extracted_data_validator.py +++ b/tests/unit_tests/test_extracted_data_validator.py @@ -106,7 +106,7 @@ def test_unsupported_files_are_rejected(self): self.assertEqual( validator.error, 'Unsupported .geojson files present: something_else.geojson. ' - 'Allowed file types are {edges, nodes, points, lines, zones, polygons}' + 'Allowed file names are *.{edges, nodes, points, lines, zones, polygons}.geojson' ) diff --git a/tests/unit_tests/test_helpers.py b/tests/unit_tests/test_helpers.py index 4a4e316..63c010f 100644 --- a/tests/unit_tests/test_helpers.py +++ b/tests/unit_tests/test_helpers.py @@ -12,6 +12,18 @@ def __init__(self, instance_path=None, kind=None, validator=None, message="", sc self.schema_path = schema_path if schema_path is not None else [] +# ----- tests for _add_additional_properties_hint ------------------------------ +class TestAddAdditionalPropertiesHint(unittest.TestCase): + def test_appends_ext_hint_for_unexpected_tag(self): + msg = "Additional properties are not allowed ('foo' was unexpected)" + expected = "Additional properties are not allowed ('foo' was unexpected). If you want to carry this tag, change it to ext:foo" + self.assertEqual(helpers._add_additional_properties_hint(msg), expected) + + def test_leaves_other_messages_unchanged(self): + msg = "value must be one of [a, b, c]" + self.assertEqual(helpers._add_additional_properties_hint(msg), msg) + + # ----- tests for _feature_index_from_error ------------------------------------ class TestFeatureIndexFromError(unittest.TestCase): def test_feature_index_present(self): @@ -98,6 +110,12 @@ def test_default_first_line_from_message(self): e = FakeErr(kind=None, validator=None, message="first line only\nsecond line ignored") self.assertEqual(helpers._pretty_message(e, schema={}), "first line only") + def test_additional_properties_hint_is_applied(self): + msg = "Additional properties are not allowed ('bar' was unexpected)" + e = FakeErr(kind=None, validator=None, message=msg) + expected = "Additional properties are not allowed ('bar' was unexpected). If you want to carry this tag, change it to ext:bar" + self.assertEqual(helpers._pretty_message(e, schema={}), expected) + # ----- tests for _rank_for ----------------------------------------------------- class TestRankFor(unittest.TestCase): diff --git a/tests/unit_tests/test_osw_validation_extras.py b/tests/unit_tests/test_osw_validation_extras.py index 3f7ef52..6a03289 100644 --- a/tests/unit_tests/test_osw_validation_extras.py +++ b/tests/unit_tests/test_osw_validation_extras.py @@ -73,6 +73,24 @@ def _fake_validator(self, files, external_exts=None, valid=True, error="folder i # ---------------- tests ---------------- + def test_structure_error_uses_uploaded_filename(self): + """Validator errors should reference the uploaded ZIP, not the temp extraction dir.""" + with patch(_PATCH_ZIP) as PZip, patch(_PATCH_EV) as PVal: + z = MagicMock() + z.extract_zip.return_value = "/tmp/tmp123" + z.remove_extracted_files.return_value = None + PZip.return_value = z + + PVal.return_value = self._fake_validator(files=[], valid=False, error="bad structure") + + upload_path = "/uploads/user_dataset.zip" + res = OSWValidation(zipfile_path=upload_path).validate() + + self.assertFalse(res.is_valid) + issue = res.issues[0] + self.assertEqual(issue["filename"], os.path.basename(upload_path)) + self.assertIn("bad structure", issue["error_message"]) + def test_missing_u_id_reports_error_without_keyerror(self): """Edges missing `_u_id` should report a friendly error instead of raising KeyError.""" fake_files = ["/tmp/nodes.geojson", "/tmp/edges.geojson"] diff --git a/unit_test_overview.md b/unit_test_overview.md index 36db5bb..7405c52 100644 --- a/unit_test_overview.md +++ b/unit_test_overview.md @@ -2,20 +2,21 @@ Below is a breakdown of all test scenarios (functions named `test_`) across the suite, plus a short description of the main areas they cover. -- **Total scenarios:** 116 +- **Total scenarios:** 120 +- **Unit test subset (tests/unit_tests) scenarios:** 106 (matches `coverage run --source=src/python_osw_validation -m unittest discover -v tests/unit_tests`) | Test module | Scenarios | Focus | | --- | ---: | --- | | tests/test_schema_parity.py | 14 | Schema parity and customized-field handling | | tests/unit_tests/test_extracted_data_validator.py | 11 | Folder/filename validation, duplicates/missing files, unsupported files | -| tests/unit_tests/test_helpers.py | 14 | Helper utilities (pretty messages, rankings, feature index extraction) | +| tests/unit_tests/test_helpers.py | 17 | Helper utilities (pretty messages, rankings, feature index extraction, additional-properties hint) | | tests/unit_tests/test_osw_validation.py | 34 | End-to-end ZIP validation across assets (valid/invalid, schemas, serialization, unmatched IDs) | -| tests/unit_tests/test_osw_validation_extras.py | 34 | Edge cases: foreign-key checks, duplicate IDs, invalid geometries, extension file failures, JSON/OS errors, 0.2 disallowed content, unexpected exceptions | +| tests/unit_tests/test_osw_validation_extras.py | 35 | Edge cases: foreign-key checks, duplicate IDs, invalid geometries, extension file failures, JSON/OS errors, 0.2 disallowed content, unexpected exceptions, structure error messaging | | tests/unit_tests/test_schema_definitions.py | 1 | Schema definition smoke check | | tests/unit_tests/test_schema_metadata.py | 3 | Metadata/heuristic schema selection | | tests/unit_tests/test_zipfile_handler.py | 5 | Zip extraction/creation lifecycle | -Method used: counted functions matching `def test_` within `tests/`. +Method used: counted functions matching `def test_` within `tests/` (full suite) and separately validated the `tests/unit_tests` subset via the coverage command above. ## Detail for heavy-hitter suites @@ -28,7 +29,7 @@ Method used: counted functions matching `def test_` within `tests/`. - Serialization error surfaced: `test_invalid_serialization_file`. - Foreign-key cap: `test_unmatched_ids_limited_to_20`. -### tests/unit_tests/test_osw_validation_extras.py (34 scenarios) +### tests/unit_tests/test_osw_validation_extras.py (35 scenarios) - Foreign-key checks: `test_missing_u_id_reports_error_without_keyerror`, `test_unmatched_u_id_is_limited_to_20`, `test_unmatched_w_id_is_limited_to_20`. - JSON/IO parsing: `test_load_osw_file_reports_json_decode_error`, `test_load_osw_file_reports_os_error`, `test_validate_reports_json_decode_error`. - GeoDataFrame read failures: `test_validate_reports_read_file_exception`. @@ -39,6 +40,7 @@ Method used: counted functions matching `def test_` within `tests/`. - Helpers and selection: `test_pick_schema_*`, `_get_colset*`, `test_load_osw_schema_reports_missing_file`. - 0.2 disallowed content vs 0.3 allowed: `test_schema_02_rejects_tree_and_custom`, `test_schema_03_with_tree_tags_is_allowed`. - Robustness: cleanup handling, zip extract failure, invalid folder structure, unexpected exception path (`test_cleanup_handles_locals_membership_error`, `test_zip_extract_failure_bubbles_as_error`, `test_extracted_data_validator_invalid`, `test_unexpected_exception_surfaces_unable_to_validate`, `test_issues_populated_for_invalid_zip`). +- Uploaded-name fidelity: structure errors report the uploaded filename instead of temp extraction dirs (`test_structure_error_uses_uploaded_filename`). ### tests/unit_tests/test_extracted_data_validator.py (11 scenarios) - Valid layouts at root or nested, empty/invalid directories, and no-geojson detection. @@ -51,9 +53,9 @@ Method used: counted functions matching `def test_` within `tests/`. - 0.2 vs 0.3 parity: custom tags rejected on 0.2 where appropriate; accepted on 0.3. - Extension fields allowed/blocked per schema version. -### tests/unit_tests/test_helpers.py (14 scenarios) +### tests/unit_tests/test_helpers.py (17 scenarios) - Feature index extraction robustness across error shapes. -- Pretty-message compaction (enum, anyOf, ordering, trimming noise). +- Pretty-message compaction (enum, anyOf, ordering, trimming noise, additional-properties hint appended). - Error ranking/tie-breaking and message selection helpers. ### tests/unit_tests/test_schema_definitions.py (1 scenario)