From b505eee5567be1e75e65d84586d1172a96bd420c Mon Sep 17 00:00:00 2001 From: Justin Angevaare Date: Tue, 20 Jan 2026 20:34:49 +0000 Subject: [PATCH 1/8] Propose plan for testing refinement --- plans/002-strategic-testing-refinement.md | 119 ++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 plans/002-strategic-testing-refinement.md diff --git a/plans/002-strategic-testing-refinement.md b/plans/002-strategic-testing-refinement.md new file mode 100644 index 0000000..c2657f6 --- /dev/null +++ b/plans/002-strategic-testing-refinement.md @@ -0,0 +1,119 @@ +# Strategic testing refinement plan (pre-1.0) + +## Goals +- Reduce test suite size and maintenance cost while preserving confidence in the 9-step pipeline. +- Keep one full E2E path per language (English + French) as smoke coverage. +- Trim duplicate or low-signal tests by tracing each test to a specific contract or behavior. +- Preserve step boundary contracts and critical failure modes. + +## Constraints and standards +- Follow markers and layering rules in [pytest.ini](pytest.ini) and [docs/TESTING_STANDARDS.md](docs/TESTING_STANDARDS.md). +- Keep tests aligned with step isolation and deterministic IO per [AGENTS.MD](AGENTS.MD). +- Avoid introducing new standalone docs; this plan is the canonical working doc for this initiative. + +## Inventory and mapping workflow +### 1) Build the test inventory +- Enumerate all tests under [tests](tests) and tag each file with: + - Category: unit / integration / e2e + - Primary pipeline step(s): 1–9 + - Primary module(s): [pipeline](pipeline), [templates](templates) + - Cost profile: fast / moderate / slow / flaky + - Main contract(s): e.g., schema validation, QR payload formatting, Typst compile, PDF validation + +### 2) Map tests to source contracts +- For each test file, link to the code it covers and the contract it asserts. +- Use step structure (Steps 1–9) and cross-cutting modules: + - Orchestrator entry point: [pipeline/orchestrator.py](pipeline/orchestrator.py) + - Steps: [pipeline](pipeline) modules (preprocess, generate_qr_codes, generate_notices, compile_notices, validate_pdfs, encrypt_notice, bundle_pdfs, cleanup) + - Config, data models, enums, translation helpers, templates +- Produce a trace table: `test file → contract → source link`. + +### 3) Classify each test +Use the rubric below to tag each test file for keep/trim/merge/replace. + +**Keep** +- Unique contract coverage with high impact (schema validation, deterministic IO, config parsing errors, step boundary validation). +- Only path covering a failure mode that is expensive to discover later. + +**Trim/Merge** +- Repeats the same contract in multiple files. +- Verifies formatting details already asserted in helper tests. +- Duplicates coverage via both integration and unit tests for identical logic. + +**Replace** +- Slow tests that can be replaced by a targeted unit contract test. +- Tests that depend on full Typst/PDF pipeline when a mocked or stubbed smoke check suffices. + +## Subagent analysis plan +Run subagents with explicit scope, producing structured findings for each test file and code path. Use four subagents total. + +### Subagent A — Unit tests audit +- Scope: [tests/unit](tests/unit) +- Output: + - List of unit files with covered contracts and source links. + - Candidate duplicates or low-signal tests. + - Suggested merges or removals. + +### Subagent B — Integration tests audit +- Scope: [tests/integration](tests/integration) +- Output: + - Contract tests that represent step boundaries. + - Overlap with unit tests or E2E tests. + - Candidates for replacement with lighter unit tests. + +### Subagent C — E2E tests audit +- Scope: [tests/e2e](tests/e2e) +- Output: + - Keep only one full end-to-end path per language (English + French). + - Identify expensive fixtures and brittle outputs. + - Propose a minimal “smoke” variant for each language if full compile is too costly. + +### Subagent D — Coverage and trace analysis +- Scope: coverage reports and step/module tracing +- Output: + - Coverage hotspots: high coverage redundancy. + - Coverage gaps in critical modules (e.g., config loader, validation boundaries). + - Recommendations for coverage-based trimming. + +## Coverage and trace workflow +1. Run targeted coverage for each category: + - Unit: `uv run pytest -m unit --cov=pipeline --cov-report=term-missing` + - Integration: `uv run pytest -m integration --cov=pipeline --cov-report=term-missing` + - E2E: `uv run pytest -m e2e --cov=pipeline --cov-report=term-missing` +2. Generate HTML coverage once for a full view: + - `uv run pytest --cov=pipeline --cov-report=html` + - Review [htmlcov/index.html](htmlcov/index.html) +3. For each test file, note which source lines are exclusively covered by that test. Keep tests that uniquely cover important contracts. + +## Decision criteria matrix +Use this matrix for each test file: + +| Test file | Category | Contract(s) | Source link(s) | Cost | Duplicates? | Keep/Trim/Replace | Rationale | +|---|---|---|---|---|---|---|---| + +### Contract priority (high → low) +1. Orchestrator step ordering & CLI validation. +2. Step boundary validation (inputs/outputs on disk). +3. Schema validation and normalization. +4. QR payload formatting and URL encoding. +5. Template rendering correctness (language-specific). +6. PDF compilation & validation (smoke only in E2E). +7. Encryption and bundling (smoke only in E2E). +8. Cleanup and output preparation. + +## Output actions +- Maintain one E2E test per language (English, French) as smoke tests. +- Reduce integration tests to a minimal set of step-boundary contract checks. +- Trim unit tests that validate formatting already covered by higher-level contracts. +- Consolidate duplicated fixtures across [tests/conftest.py](tests/conftest.py) and [tests/fixtures](tests/fixtures). + +## Deliverables +1. Completed test inventory table with links to source contracts. +2. Marked keep/trim/replace list per file. +3. Coverage summary (unit/integration/e2e). +4. Final trimmed test plan with the minimal E2E (EN + FR) and reduced integration suite. + +## Notes and decisions +- Keep one E2E pipeline run per language as requested (English + French). +- Prioritize deterministic, step-isolated tests aligned with pre-1.0 simplification. +- Any removed tests must have their contract covered elsewhere or deemed low-risk. From 076782a20c54e9f3af804ae65a361a6f6945bb11 Mon Sep 17 00:00:00 2001 From: Justin Angevaare Date: Tue, 20 Jan 2026 20:59:36 +0000 Subject: [PATCH 2/8] Audit complete --- plans/002-strategic-testing-refinement.md | 40 ++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/plans/002-strategic-testing-refinement.md b/plans/002-strategic-testing-refinement.md index c2657f6..17724a9 100644 --- a/plans/002-strategic-testing-refinement.md +++ b/plans/002-strategic-testing-refinement.md @@ -86,10 +86,22 @@ Run subagents with explicit scope, producing structured findings for each test f 3. For each test file, note which source lines are exclusively covered by that test. Keep tests that uniquely cover important contracts. ## Decision criteria matrix -Use this matrix for each test file: +Here is the assessment of the current test suite: | Test file | Category | Contract(s) | Source link(s) | Cost | Duplicates? | Keep/Trim/Replace | Rationale | |---|---|---|---|---|---|---|---| +| `tests/unit/test_config_loader.py` | Unit | Config loading/env vars | [pipeline/config_loader.py](pipeline/config_loader.py) | Fast | Yes | Merge | Merge into `test_config_validation.py` for unified config lifecycle. | +| `tests/unit/test_config_validation.py` | Unit | Schema/business rules | [pipeline/config_loader.py](pipeline/config_loader.py) | Fast | Partial | Keep | Centralized validation is critical for "contract over defensiveness". | +| `tests/unit/test_enums.py` | Unit | Enum integrity | [pipeline/enums.py](pipeline/enums.py) | Fast | Yes | Remove | Redundant with static analysis (Ruff/Mypy). | +| `tests/unit/test_data_models.py` | Unit | Dataclass integrity | [pipeline/data_models.py](pipeline/data_models.py) | Fast | Yes | Trim | Remove generic frozen/field checks; keep custom methods. | +| `tests/unit/test_preprocess.py` | Unit | Normalization/Sorting | [pipeline/preprocess.py](pipeline/preprocess.py) | Mod | No | Keep | Core business logic with high complexity. | +| `tests/unit/test_dynamic_template_loading.py` | Unit | Template discovery | [pipeline/generate_notices.py](pipeline/generate_notices.py) | Fast | Yes | Merge | Merge into `test_generate_notices.py`. | +| `tests/unit/test_unsupported_language_failure_paths.py` | Unit | Lang validation | [pipeline/orchestrator.py](pipeline/orchestrator.py) | Fast | Yes | Merge | Merge into `test_orchestrator.py`. | +| `tests/integration/test_artifact_schema.py` | Integration | Step IO schema | [pipeline/data_models.py](pipeline/data_models.py) | Fast | Yes | Merge | Consolidation into `test_pipeline_contracts.py`. | +| `tests/integration/test_artifact_schema_flow.py` | Integration | Data flow | [pipeline/orchestrator.py](pipeline/orchestrator.py) | Mod | Yes | Merge | Consolidation into `test_pipeline_contracts.py`. | +| `tests/integration/test_config_driven_behavior.py` | Integration | Config toggle logic | [pipeline/config_loader.py](pipeline/config_loader.py) | Mod | Yes | Remove | Overlap with `test_config_validation.py` unit tests. | +| `tests/integration/test_pipeline_stages.py` | Integration | Handoff contracts | [pipeline/orchestrator.py](pipeline/orchestrator.py) | Slow | No | Keep | Vital for verifying that steps talk to each other correctly. | +| `tests/e2e/test_full_pipeline.py` | E2E | Full pipeline (EN/FR) | [pipeline/orchestrator.py](pipeline/orchestrator.py) | Slow | No | Keep | Essential smoke tests for both languages. | ### Contract priority (high → low) 1. Orchestrator step ordering & CLI validation. @@ -113,6 +125,32 @@ Use this matrix for each test file: 3. Coverage summary (unit/integration/e2e). 4. Final trimmed test plan with the minimal E2E (EN + FR) and reduced integration suite. +## Audit Findings + +### Subagent B — Integration tests audit (Findings) +The audit of [tests/integration](tests/integration) is complete. + +#### 1. Covered Contracts & Modules +Integration tests cover the following multi-step handoffs: +- **Preprocess -> QR Gen -> Notice Gen:** Covered by [tests/integration/test_pipeline_stages.py](tests/integration/test_pipeline_stages.py) and [tests/integration/test_artifact_schema_flow.py](tests/integration/test_artifact_schema_flow.py). Verifies field availability in the intermediate JSON artifact used across steps 2, 3, and 4. +- **Notice Gen -> Compilation:** Covered by [tests/integration/test_custom_templates.py](tests/integration/test_custom_templates.py). Verifies dynamic module loading and asset resolution during steps 4 and 5. +- **Compilation -> Validation -> Encryption -> Bundling:** Covered by [tests/integration/test_pipeline_stages.py](tests/integration/test_pipeline_stages.py). Verifies metadata preservation and file presence for steps 6, 7, and 8. +- **Fail-Fast vs Recovery Philosophy:** Covered by [tests/integration/test_error_propagation.py](tests/integration/test_error_propagation.py). Verifies orchestrator contracts for critical vs optional steps. +- **Translation-Normalization Chain:** Covered by [tests/integration/test_translation_integration.py](tests/integration/test_translation_integration.py). Verifies the flow from raw data to translated template context. + +#### 2. Overlap & Redundancy +- **Brittle Data Assertions:** [tests/integration/test_artifact_schema.py](tests/integration/test_artifact_schema.py) and [tests/integration/test_artifact_schema_flow.py](tests/integration/test_artifact_schema_flow.py) are largely redundant with each other and with unit tests for [pipeline/data_models.py](pipeline/data_models.py). +- **Low-Value Config Checks:** [tests/integration/test_config_driven_behavior.py](tests/integration/test_config_driven_behavior.py) asserts key presence in dictionaries, which is a unit-level concern for [pipeline/config_loader.py](pipeline/config_loader.py) and provides little integration signal. +- **Pure Unit Logic:** [tests/integration/test_translation_integration.py](tests/integration/test_translation_integration.py) contains several tests for disease normalization that should be pure unit tests for [pipeline/translation_helpers.py](pipeline/translation_helpers.py). + +#### 3. Recommended Minimal Set +To maintain confidence while reducing maintenance, the following changes are proposed: +- **Keep** [tests/integration/test_error_propagation.py](tests/integration/test_error_propagation.py) (Philosophy/Orchestration contract). +- **Keep** [tests/integration/test_custom_templates.py](tests/integration/test_custom_templates.py) (Dynamic loading/PHU customization contract). +- **Consolidate** [tests/integration/test_pipeline_stages.py](tests/integration/test_pipeline_stages.py), [tests/integration/test_artifact_schema.py](tests/integration/test_artifact_schema.py), and [tests/integration/test_artifact_schema_flow.py](tests/integration/test_artifact_schema_flow.py) into a single `test_pipeline_contracts.py` that focuses on disk-based handoffs. +- **Simplify** [tests/integration/test_translation_integration.py](tests/integration/test_translation_integration.py) to focus on the `preprocess` -> `context` boundary, moving pure string logic to unit tests. +- **Delete** [tests/integration/test_config_driven_behavior.py](tests/integration/test_config_driven_behavior.py). + ## Notes and decisions - Keep one E2E pipeline run per language as requested (English + French). - Prioritize deterministic, step-isolated tests aligned with pre-1.0 simplification. From 184e5dc48d2251ab66184671818f0b769080b2e5 Mon Sep 17 00:00:00 2001 From: Justin Angevaare Date: Tue, 20 Jan 2026 21:32:53 +0000 Subject: [PATCH 3/8] First pass --- docs/TESTING_STANDARDS.md | 166 ++---- plans/002-strategic-testing-refinement.md | 15 +- tests/e2e/test_full_pipeline.py | 297 ++-------- tests/integration/test_artifact_schema.py | 141 ----- .../integration/test_artifact_schema_flow.py | 357 ------------ .../test_config_driven_behavior.py | 330 ----------- tests/integration/test_pipeline_contracts.py | 281 ++++++++++ tests/integration/test_pipeline_stages.py | 519 ------------------ .../test_translation_integration.py | 50 +- tests/unit/test_config_loader.py | 179 ------ tests/unit/test_config_validation.py | 174 +++++- tests/unit/test_data_models.py | 214 +------- tests/unit/test_dynamic_template_loading.py | 259 --------- tests/unit/test_enums.py | 323 ----------- tests/unit/test_generate_notices.py | 246 ++++++++- tests/unit/test_preprocess.py | 25 +- tests/unit/test_run_pipeline.py | 288 +++++++++- tests/unit/test_translation_helpers.py | 32 ++ ...test_unsupported_language_failure_paths.py | 269 --------- 19 files changed, 1140 insertions(+), 3025 deletions(-) delete mode 100644 tests/integration/test_artifact_schema.py delete mode 100644 tests/integration/test_artifact_schema_flow.py delete mode 100644 tests/integration/test_config_driven_behavior.py create mode 100644 tests/integration/test_pipeline_contracts.py delete mode 100644 tests/integration/test_pipeline_stages.py delete mode 100644 tests/unit/test_config_loader.py delete mode 100644 tests/unit/test_dynamic_template_loading.py delete mode 100644 tests/unit/test_enums.py delete mode 100644 tests/unit/test_unsupported_language_failure_paths.py diff --git a/docs/TESTING_STANDARDS.md b/docs/TESTING_STANDARDS.md index d0ab49a..39cc738 100644 --- a/docs/TESTING_STANDARDS.md +++ b/docs/TESTING_STANDARDS.md @@ -6,6 +6,15 @@ This document defines the testing strategy and organizational standards for the Tests are organized in three layers to provide different types of validation at different speeds. +## Strategic Principles (Pre-1.0 Refinement) + +As part of the pre-1.0 release strategy, the following principles guide the test suite: + +1. **Contracts over Defensiveness:** Focus on validating the boundaries between steps (inputs/outputs on disk) rather than repetitive internal logic checks in every module. +2. **Minimal E2E Smoke Tests:** Preserve exactly one full pipeline path per language (English and French) to ensure end-to-end viability without overwhelming the suite with redundant E2E variants. +3. **Consolidated Units:** Merge tests for tightly coupled modules (e.g., config loading and validation) to reduce file overhead and improve maintainability. +4. **Deterministic and Isolated:** Tests must not rely on shared global state or external services. All PDF compilation must satisfy project-root path constraints. + ## Frameworks and Metrics Used `pytest` is the framework used to write and run tests for the codebase. As a metric to determine the percentage of source code that is executed during testing, code coverage is used. `pytest-cov` is used to determine whether there are areas in the codebase that are not executed during testing, which may contribute to bugs. Code coverage is integrated in our GitHub actions when a pull request is made to ensure that new additions to the main code base are tested. @@ -16,33 +25,29 @@ Tests are organized in three layers to provide different types of validation at ``` tests/ -├── unit/ # Unit tests (one per module) -│ ├── test_config_loader.py +├── unit/ # Unit tests (one per module/contract) +│ ├── test_config_validation.py # Merged loader + validation │ ├── test_preprocess.py -│ ├── test_generate_notices.py +│ ├── test_generate_notices.py # Merged template loading │ ├── test_generate_qr_codes.py │ ├── test_compile_notices.py -│ ├── test_count_pdfs.py +│ ├── test_validate_pdfs.py │ ├── test_encrypt_notice.py -│ ├── test_batch_pdfs.py +│ ├── test_bundle_pdfs.py │ ├── test_cleanup.py │ ├── test_prepare_output.py -│ ├── test_enums.py -│ ├── test_data_models.py +│ ├── test_data_models.py # Trimmed to custom logic │ ├── test_utils.py -│ └── test_run_pipeline.py +│ └── test_run_pipeline.py # Merged lang failure paths │ ├── integration/ # Integration tests (step interactions) -│ ├── test_pipeline_preprocess_to_qr.py -│ ├── test_pipeline_notices_to_compile.py -│ ├── test_pipeline_pdf_validation.py -│ ├── test_artifact_schema.py -│ └── test_config_driven_behavior.py +│ ├── test_pipeline_contracts.py # Merged artifact/schema/handoffs +│ ├── test_custom_templates.py +│ ├── test_error_propagation.py +│ └── test_translation_integration.py │ ├── e2e/ # End-to-end tests (full pipeline) -│ ├── test_full_pipeline_en.py -│ ├── test_full_pipeline_fr.py -│ └── test_pipeline_edge_cases.py +│ └── test_full_pipeline.py # EN and FR paths (smoke tests) │ ├── fixtures/ # Shared test utilities │ ├── conftest.py # Pytest fixtures @@ -166,106 +171,45 @@ def project_root() -> Path: return Path(__file__).parent.parent.parent # tests/e2e/... → project root @pytest.mark.e2e -def test_full_pipeline_english(project_root: Path) -> None: - """E2E: Complete pipeline generates PDF output for English input. - - Real-world significance: - - Verifies full 9-step pipeline works end-to-end - - Ensures PDF files are created with correct names and counts - - Tests English language variant (French tested separately) - - Parameters - ---------- - project_root : Path - Fixture providing absolute path to project root +class TestFullPipelineExecution: + """E2E: Complete pipeline generates PDF output for multiple languages.""" + + @pytest.fixture + def project_root(self) -> Path: + """Fixture providing absolute path to project root.""" + return Path(__file__).resolve().parent.parent.parent + + @pytest.fixture + def e2e_workdir(self, project_root: Path, tmp_path_factory: pytest.TempPathFactory) -> Path: + """Create a temporary workdir within project root for path resolution.""" + workdir = project_root / f"tmp_e2e_{tmp_path_factory.mktemp('e2e').name}" + workdir.mkdir(parents=True, exist_ok=True) + # Prepare subdirs... + return workdir + + def test_full_pipeline_english(self, project_root: Path, e2e_workdir: Path) -> None: + """Verifies full 9-step pipeline works end-to-end for English.""" + # Run pipeline using 'uv run viper' in project_root... + pass +``` - Raises - ------ - AssertionError - If pipeline exit code is non-zero or PDF count incorrect +### Feature Toggle Testing (Config Overrides) - Assertion: Pipeline succeeds and generates correct number of PDFs - """ - input_dir = project_root / "input" - output_dir = project_root / "output" - - input_file = input_dir / "e2e_test_clients.xlsx" - # Create test Excel file... - - # Run pipeline with project_root as CWD (not tmp_path) - result = subprocess.run( - ["uv", "run", "viper", input_file.name, "en"], - cwd=str(project_root), - capture_output=True, - text=True - ) - - assert result.returncode == 0 - pdfs = list((output_dir / "pdf_individual").glob("*.pdf")) - assert len(pdfs) == 3 -``` +While E2E tests are reserved for full smoke paths, feature toggles (e.g., QR enabled/disabled) should primarily be tested at the **Integration** level. This avoids the overhead of full PDF compilation for every configuration variant. -### Configuration Override Pattern for Feature Testing +**Preferred Pattern:** +In `tests/integration/test_pipeline_contracts.py`, use a modular approach to verify that configuration flags correctly skip or include specific step logic. -**Solution:** ```python -import yaml -from pathlib import Path - -@pytest.mark.e2e -def test_pipeline_with_qr_disabled(project_root: Path) -> None: - """E2E: QR code generation can be disabled via config. +@pytest.mark.integration +def test_pipeline_skips_qr_when_disabled(tmp_path: Path): + """Integration: verify that 'qr.enabled=False' skips Step 3. Real-world significance: - - Verifies feature flags in config actually control pipeline behavior - - Tests that disabled QR generation doesn't crash pipeline - - Ensures config-driven behavior is deterministic and testable - - Parameters - ---------- - project_root : Path - Fixture providing absolute path to project root - - Raises - ------ - AssertionError - If QR code generation is not skipped when disabled - - Notes - ----- - Always restores original config in finally block to prevent test pollution. + - Feature flags must be deterministic and respected + - Avoids unnecessary processing time when features aren't needed """ - config_path = project_root / "config" / "parameters.yaml" - - # Load original config - with open(config_path) as f: - original_config = yaml.safe_load(f) - - try: - # Modify config - original_config["qr"]["enabled"] = False - with open(config_path, "w") as f: - yaml.dump(original_config, f) - - # Run pipeline - result = subprocess.run( - ["uv", "run", "viper", "test_input.xlsx", "en"], - cwd=str(project_root), - capture_output=True, - text=True - ) - - # Verify QR generation was skipped - assert result.returncode == 0 - assert "Step 3: Generating QR codes" not in result.stdout - qr_dir = project_root / "output" / "artifacts" / "qr_codes" - assert not qr_dir.exists() or len(list(qr_dir.glob("*.png"))) == 0 - - finally: - # Restore original config - original_config["qr"]["enabled"] = True - with open(config_path, "w") as f: - yaml.dump(original_config, f) + # Logic to verify step bypass or artifact omission ``` ### Input/Output Fixture Pattern @@ -633,14 +577,14 @@ def test_preprocess_sorts_clients_deterministically(): ## Test Coverage Goals -- **scripts/**: >80% code coverage -- **Pipeline orchestration**: >60% coverage (harder to test due to I/O) +- **pipeline/**: >80% code coverage +- **Pipeline orchestration**: >60% coverage (verified via combination of unit and integration tests) - **Critical path (Steps 1–6)**: >90% coverage - **Optional features (Steps 7–9)**: >70% coverage Run coverage reports with: ```bash -uv run pytest --cov=scripts --cov-report=html +uv run pytest --cov=pipeline --cov-report=html ``` View results in `htmlcov/index.html`. \ No newline at end of file diff --git a/plans/002-strategic-testing-refinement.md b/plans/002-strategic-testing-refinement.md index 17724a9..8a940dd 100644 --- a/plans/002-strategic-testing-refinement.md +++ b/plans/002-strategic-testing-refinement.md @@ -85,13 +85,24 @@ Run subagents with explicit scope, producing structured findings for each test f - Review [htmlcov/index.html](htmlcov/index.html) 3. For each test file, note which source lines are exclusively covered by that test. Keep tests that uniquely cover important contracts. +## Status: Completed (January 20, 2026) + +The strategic testing refinement is complete. The test suite has been streamlined as follows: +- **Unit suite reduced:** Merged redundant config, template loading, and language validation tests. Removed `test_enums.py`. Trimmed `test_data_models.py`. +- **Integration suite consolidated:** Created `test_pipeline_contracts.py` as the canonical handoff check. Removed redundant config-behavior checks. +- **Translation logic moved:** Pure unit-level translation/normalization tests relocated to the unit suite. +- **E2E preserved:** High-level smoke tests for English and French are retained. + +**Suite Metrics:** +- Total tests: 443 (previously 513) +- Passing: 100% + ## Decision criteria matrix Here is the assessment of the current test suite: | Test file | Category | Contract(s) | Source link(s) | Cost | Duplicates? | Keep/Trim/Replace | Rationale | |---|---|---|---|---|---|---|---| -| `tests/unit/test_config_loader.py` | Unit | Config loading/env vars | [pipeline/config_loader.py](pipeline/config_loader.py) | Fast | Yes | Merge | Merge into `test_config_validation.py` for unified config lifecycle. | -| `tests/unit/test_config_validation.py` | Unit | Schema/business rules | [pipeline/config_loader.py](pipeline/config_loader.py) | Fast | Partial | Keep | Centralized validation is critical for "contract over defensiveness". | +| `tests/unit/test_config_validation.py` | Unit | Config loading & validation | [pipeline/config_loader.py](pipeline/config_loader.py) | Fast | No | Keep | Consolidated config lifecycle: loading, schema, and business rules. | | `tests/unit/test_enums.py` | Unit | Enum integrity | [pipeline/enums.py](pipeline/enums.py) | Fast | Yes | Remove | Redundant with static analysis (Ruff/Mypy). | | `tests/unit/test_data_models.py` | Unit | Dataclass integrity | [pipeline/data_models.py](pipeline/data_models.py) | Fast | Yes | Trim | Remove generic frozen/field checks; keep custom methods. | | `tests/unit/test_preprocess.py` | Unit | Normalization/Sorting | [pipeline/preprocess.py](pipeline/preprocess.py) | Mod | No | Keep | Core business logic with high complexity. | diff --git a/tests/e2e/test_full_pipeline.py b/tests/e2e/test_full_pipeline.py index 89cfcde..89ce53a 100644 --- a/tests/e2e/test_full_pipeline.py +++ b/tests/e2e/test_full_pipeline.py @@ -22,9 +22,8 @@ from __future__ import annotations -import json +import shutil import subprocess -from collections.abc import Generator from pathlib import Path import pytest @@ -43,45 +42,51 @@ def project_root(self) -> Path: return Path(__file__).resolve().parent.parent.parent @pytest.fixture - def pipeline_input_file(self, project_root: Path) -> Generator[Path, None, None]: - """Create a test input Excel file in the project input directory.""" - input_file = project_root / "input" / "e2e_test_clients.xlsx" - df = create_test_input_dataframe(num_clients=3) - df.to_excel(input_file, index=False, engine="openpyxl") + def e2e_workdir( + self, project_root: Path, tmp_path_factory: pytest.TempPathFactory + ) -> Path: + """Create a temporary working directory within the project root. + + Typst requires absolute paths relative to the project root for asset + resolution (QR codes, logos). This workdir is inside project root + to satisfy that constraint while maintaining isolation. + """ + workdir = project_root / f"tmp_e2e_{tmp_path_factory.mktemp('e2e').name}" + workdir.mkdir(parents=True, exist_ok=True) + (workdir / "input").mkdir(exist_ok=True) + (workdir / "output").mkdir(exist_ok=True) - yield input_file + # Copy base config to workdir + config_dir = workdir / "config" + shutil.copytree(project_root / "config", config_dir) + + yield workdir # Cleanup - if input_file.exists(): - input_file.unlink() + if workdir.exists(): + shutil.rmtree(workdir) + + @pytest.fixture + def pipeline_input_file(self, e2e_workdir: Path) -> Path: + """Create a test input Excel file in the E2E workdir.""" + input_file = e2e_workdir / "input" / "e2e_test_clients.xlsx" + df = create_test_input_dataframe(num_clients=3) + df.to_excel(input_file, index=False, engine="openpyxl") + return input_file def run_pipeline( self, input_file: Path, language: str, project_root: Path, + e2e_workdir: Path, config_overrides: dict | None = None, ) -> subprocess.CompletedProcess: - """Run the viper pipeline via subprocess. - - Parameters - ---------- - input_file : Path - Path to input Excel file - language : str - Language code ('en' or 'fr') - project_root : Path - Project root (used for output directory within project tree) - config_overrides : dict, optional - Config parameters to override before running pipeline + """Run the viper pipeline via subprocess using isolated config/output.""" + config_dir = e2e_workdir / "config" - Returns - ------- - subprocess.CompletedProcess - Result of pipeline execution - """ if config_overrides: - config_path = project_root / "config" / "parameters.yaml" + config_path = config_dir / "parameters.yaml" with open(config_path) as f: config = yaml.safe_load(f) @@ -107,6 +112,10 @@ def run_pipeline( language, "--input", str(input_file.parent), + "--output", + str(e2e_workdir / "output"), + "--config", + str(config_dir), ] result = subprocess.run( @@ -115,26 +124,20 @@ def run_pipeline( return result def test_full_pipeline_english( - self, tmp_path: Path, pipeline_input_file: Path, project_root: Path + self, pipeline_input_file: Path, project_root: Path, e2e_workdir: Path ) -> None: - """Test complete pipeline execution with English language. - - Real-world significance: - - Core pipeline functionality must work for English input - - Verifies all 9 steps execute successfully - - Checks that per-client PDFs are created - """ - # Disable encryption for core E2E test (tests basic functionality) + """Test complete pipeline execution with English language.""" + # Disable encryption for core E2E test config_overrides = {"encryption": {"enabled": False}} result = self.run_pipeline( - pipeline_input_file, "en", project_root, config_overrides + pipeline_input_file, "en", project_root, e2e_workdir, config_overrides ) assert result.returncode == 0, f"Pipeline failed: {result.stderr}" assert "Pipeline completed successfully" in result.stdout - # Verify output structure (in project output directory) - output_dir = project_root / "output" + # Verify output structure in E2E workdir + output_dir = e2e_workdir / "output" assert (output_dir / "artifacts").exists() assert (output_dir / "pdf_individual").exists() @@ -143,225 +146,23 @@ def test_full_pipeline_english( assert len(pdfs) == 3, f"Expected 3 PDFs but found {len(pdfs)}" def test_full_pipeline_french( - self, tmp_path: Path, pipeline_input_file: Path, project_root: Path + self, pipeline_input_file: Path, project_root: Path, e2e_workdir: Path ) -> None: - """Test complete pipeline execution with French language. - - Real-world significance: - - Multilingual support must work for French input - - Templates, notices, and metadata must be in French - - Verifies language parameter is respected throughout pipeline - """ - # Disable encryption for core E2E test (tests basic functionality) + """Test complete pipeline execution with French language.""" + # Disable encryption for core E2E test config_overrides = {"encryption": {"enabled": False}} result = self.run_pipeline( - pipeline_input_file, "fr", project_root, config_overrides + pipeline_input_file, "fr", project_root, e2e_workdir, config_overrides ) assert result.returncode == 0, f"Pipeline failed: {result.stderr}" assert "Pipeline completed successfully" in result.stdout - # Verify output structure (in project output directory) - output_dir = project_root / "output" + # Verify output structure in E2E workdir + output_dir = e2e_workdir / "output" assert (output_dir / "artifacts").exists() assert (output_dir / "pdf_individual").exists() # Verify PDFs exist with French prefix pdfs = list((output_dir / "pdf_individual").glob("fr_notice_*.pdf")) assert len(pdfs) == 3, f"Expected 3 French PDFs but found {len(pdfs)}" - - def test_pipeline_with_qr_disabled( - self, tmp_path: Path, pipeline_input_file: Path, project_root: Path - ) -> None: - """Test pipeline with QR code generation disabled. - - Real-world significance: - - QR codes are optional (controlled by config) - - Pipeline must skip QR generation when disabled - - Should complete faster without QR generation - """ - # Disable both QR and encryption for this test - config_overrides = { - "qr": {"enabled": False}, - "encryption": {"enabled": False}, - } - result = self.run_pipeline( - pipeline_input_file, "en", project_root, config_overrides - ) - - assert result.returncode == 0, f"Pipeline failed: {result.stderr}" - assert "Step 3: Generating QR codes" in result.stdout - assert "disabled" in result.stdout.lower() or "skipped" in result.stdout.lower() - - # Verify PDFs still exist - output_dir = project_root / "output" - pdfs = list((output_dir / "pdf_individual").glob("en_notice_*.pdf")) - assert len(pdfs) == 3 - - def test_pipeline_with_encryption( - self, tmp_path: Path, pipeline_input_file: Path, project_root: Path - ) -> None: - """Test pipeline with PDF encryption enabled. - - Real-world significance: - - Encryption protects sensitive student data in PDFs - - Each PDF is encrypted with a unique password based on client data - - Encrypted versions are created alongside original PDFs - """ - # Enable encryption for this specific test - config_overrides = {"encryption": {"enabled": True}} - result = self.run_pipeline( - pipeline_input_file, "en", project_root, config_overrides - ) - - assert result.returncode == 0, f"Pipeline failed: {result.stderr}" - assert "Encryption" in result.stdout - assert "success: 3" in result.stdout - - # Verify both encrypted and non-encrypted PDFs exist - output_dir = project_root / "output" - encrypted_pdfs = list( - (output_dir / "pdf_individual").glob("en_notice_*_encrypted.pdf") - ) - assert len(encrypted_pdfs) == 3, ( - f"Expected 3 encrypted PDFs but found {len(encrypted_pdfs)}" - ) - - # Non-encrypted versions should also exist (not removed by default) - all_pdfs = list((output_dir / "pdf_individual").glob("en_notice_*.pdf")) - assert len(all_pdfs) == 6, ( - f"Expected 6 total PDFs (3 encrypted + 3 non-encrypted) but found {len(all_pdfs)}" - ) - - def test_pipeline_with_batching( - self, tmp_path: Path, pipeline_input_file: Path, project_root: Path - ) -> None: - """Test pipeline with PDF bundling enabled. - - Real-world significance: - - Bundling groups individual PDFs into combined files - - Useful for organizing output by school or size - - Creates manifests for audit trails - """ - # Temporarily enable bundling in config - config_path = project_root / "config" / "parameters.yaml" - with open(config_path) as f: - config = yaml.safe_load(f) - original_bundle_size = config.get("bundling", {}).get("bundle_size") - original_encryption = config.get("encryption", {}).get("enabled") - - try: - # Disable encryption and enable bundling - config["encryption"]["enabled"] = False - config["bundling"]["bundle_size"] = 2 - with open(config_path, "w") as f: - yaml.dump(config, f) - - result = self.run_pipeline(pipeline_input_file, "en", project_root) - - assert result.returncode == 0, f"Pipeline failed: {result.stderr}" - assert "Bundling" in result.stdout - assert ( - "created" in result.stdout.lower() or "bundle" in result.stdout.lower() - ) - - # Verify bundled PDFs exist - output_dir = project_root / "output" - assert (output_dir / "pdf_combined").exists() - bundles = list((output_dir / "pdf_combined").glob("en_bundle_*.pdf")) - assert len(bundles) > 0, "Expected bundled PDFs to be created" - - # Verify manifests exist - assert (output_dir / "metadata").exists() - manifests = list((output_dir / "metadata").glob("*_manifest.json")) - assert len(manifests) == len(bundles) - finally: - # Restore original config - config["bundling"]["bundle_size"] = original_bundle_size - config["encryption"]["enabled"] = original_encryption - with open(config_path, "w") as f: - yaml.dump(config, f) - - def test_pipeline_minimal_input(self, tmp_path: Path, project_root: Path) -> None: - """Test pipeline with minimal input (1 client). - - Real-world significance: - - Pipeline must handle edge case of single client - - Single-client PDFs must work correctly - - Minimal input helps debug issues - """ - # Create minimal input file with 1 client in project input dir - input_file = project_root / "input" / "e2e_minimal_input.xlsx" - df = create_test_input_dataframe(num_clients=1) - df.to_excel(input_file, index=False, engine="openpyxl") - - try: - # Disable encryption for this test - config_overrides = {"encryption": {"enabled": False}} - result = self.run_pipeline(input_file, "en", project_root, config_overrides) - - assert result.returncode == 0, f"Pipeline failed: {result.stderr}" - assert "Pipeline completed successfully" in result.stdout - - # Verify single PDF was created - output_dir = project_root / "output" - pdfs = list((output_dir / "pdf_individual").glob("en_notice_*.pdf")) - assert len(pdfs) == 1 - finally: - # Cleanup input file - if input_file.exists(): - input_file.unlink() - - def test_pipeline_validates_output_artifacts( - self, tmp_path: Path, pipeline_input_file: Path, project_root: Path - ) -> None: - """Test that pipeline creates valid output artifacts. - - Real-world significance: - - Pipeline produces JSON artifacts that are read by other steps - - Artifacts must have correct schema (format, required fields) - - JSON corruption would cause silent failures in downstream steps - """ - # Disable encryption for this test - config_overrides = {"encryption": {"enabled": False}} - result = self.run_pipeline( - pipeline_input_file, "en", project_root, config_overrides - ) - - assert result.returncode == 0 - - # Find and validate the preprocessed artifact - output_dir = project_root / "output" - artifacts = list((output_dir / "artifacts").glob("preprocessed_clients_*.json")) - assert len(artifacts) >= 1, "Expected at least 1 preprocessed artifact" - - artifact = artifacts[0] - with open(artifact) as f: - data = json.load(f) - - # Validate artifact structure - assert "run_id" in data - assert "language" in data - assert data["language"] == "en" - assert "clients" in data - assert len(data["clients"]) == 3 - assert "warnings" in data - - # Validate each client record - for client in data["clients"]: - assert "sequence" in client - assert "client_id" in client - assert "person" in client - assert "school" in client - assert "board" in client - assert "contact" in client - assert "vaccines_due" in client - - def test_placeholder_e2e_marker_applied(self) -> None: - """Placeholder test ensuring e2e marker is recognized by pytest. - - Real-world significance: - - E2E tests are marked so they can be run separately - - Can run only E2E tests with: uv run pytest -m e2e - """ - assert True diff --git a/tests/integration/test_artifact_schema.py b/tests/integration/test_artifact_schema.py deleted file mode 100644 index 05bedab..0000000 --- a/tests/integration/test_artifact_schema.py +++ /dev/null @@ -1,141 +0,0 @@ -"""Integration tests for artifact schema consistency across pipeline steps. - -Tests cover: -- PreprocessResult schema validation -- Artifact JSON structure consistency -- ClientRecord data preservation through steps -- Metadata flow and accumulation - -Real-world significance: -- Pipeline steps communicate via JSON artifacts with defined schemas -- Schema consistency is required for multi-step data flow -- Breaking schema changes cause silent data loss -- Artifacts must be shareable between different runs/environments -""" - -from __future__ import annotations - -import json -from pathlib import Path - -import pytest - -from pipeline import data_models -from tests.fixtures import sample_input - - -@pytest.mark.integration -class TestArtifactSchema: - """Integration tests for artifact schema consistency.""" - - def test_preprocess_result_serializable_to_json(self) -> None: - """Verify PreprocessResult can be serialized to JSON. - - Real-world significance: - - Artifacts are stored as JSON files in output/artifacts/ - - Must be JSON-serializable to persist between steps - """ - result = sample_input.create_test_preprocess_result(num_clients=2) - - # Should be convertible to dict - payload = data_models.ArtifactPayload( - run_id="test_001", - language=result.clients[0].language, - clients=result.clients, - warnings=result.warnings, - created_at="2025-01-01T00:00:00Z", - total_clients=len(result.clients), - ) - - assert payload.run_id == "test_001" - assert len(payload.clients) == 2 - - def test_artifact_payload_round_trip(self, tmp_path: Path) -> None: - """Verify ArtifactPayload can be written and read from JSON. - - Real-world significance: - - Artifacts must be persistent across pipeline runs - - Must survive round-trip serialization without data loss - """ - original = sample_input.create_test_artifact_payload( - num_clients=3, run_id="test_001" - ) - - # Write artifact - artifact_path = sample_input.write_test_artifact(original, tmp_path) - - # Read artifact - assert artifact_path.exists() - with open(artifact_path) as f: - artifact_data = json.load(f) - - # Verify key fields preserved - assert artifact_data["run_id"] == "test_001" - assert len(artifact_data["clients"]) == 3 - assert artifact_data["total_clients"] == 3 - - def test_client_record_fields_preserved_in_artifact(self, tmp_path: Path) -> None: - """Verify all ClientRecord fields are preserved in artifact JSON. - - Real-world significance: - - Downstream steps depend on specific fields being present - - Missing fields cause pipeline crashes or silent errors - """ - artifact = sample_input.create_test_artifact_payload( - num_clients=1, - run_id="test_001", - ) - - artifact_path = sample_input.write_test_artifact(artifact, tmp_path) - - with open(artifact_path) as f: - artifact_data = json.load(f) - - client_dict = artifact_data["clients"][0] - - # Verify critical fields present - required_fields = [ - "sequence", - "client_id", - "language", - "person", - "school", - "board", - "contact", - "vaccines_due", - ] - - for field in required_fields: - assert field in client_dict, f"Missing critical field: {field}" - - def test_multiple_languages_in_artifact(self, tmp_path: Path) -> None: - """Verify artifacts support both English and French clients. - - Real-world significance: - - Pipeline must support bilingual operation - - Artifacts may contain mixed-language client data - """ - en_artifact = sample_input.create_test_artifact_payload( - num_clients=2, language="en", run_id="test_en" - ) - fr_artifact = sample_input.create_test_artifact_payload( - num_clients=2, language="fr", run_id="test_fr" - ) - - # Both should write successfully - en_path = sample_input.write_test_artifact(en_artifact, tmp_path) - fr_path = sample_input.write_test_artifact(fr_artifact, tmp_path) - - assert en_path.exists() - assert fr_path.exists() - - # Verify language is preserved - with open(en_path) as f: - en_data = json.load(f) - with open(fr_path) as f: - fr_data = json.load(f) - - assert en_data["language"] == "en" - assert fr_data["language"] == "fr" - assert en_data["clients"][0]["language"] == "en" - assert fr_data["clients"][0]["language"] == "fr" diff --git a/tests/integration/test_artifact_schema_flow.py b/tests/integration/test_artifact_schema_flow.py deleted file mode 100644 index bb86839..0000000 --- a/tests/integration/test_artifact_schema_flow.py +++ /dev/null @@ -1,357 +0,0 @@ -"""Integration tests for artifact schema consistency across pipeline steps. - -Tests cover multi-step artifact contracts: -- Preprocess output → QR generation input validation -- QR generation output file structure validation -- Notice generation input validation from preprocessed artifact -- Typst template structure validation -- QR payload generation and validation - -Real-world significance: -- Pipeline steps communicate via JSON artifacts with defined schemas -- Schema consistency is required for multi-step data flow -- Missing or malformed data causes silent pipeline failure -- Artifacts must preserve all critical fields through processing -""" - -from __future__ import annotations - -import json -from pathlib import Path -from typing import Any, Dict - -import pytest - -from pipeline import data_models -from tests.fixtures import sample_input - - -@pytest.mark.integration -class TestPreprocessToQrArtifactContract: - """Integration tests for preprocess output → QR generation contract.""" - - def test_preprocess_artifact_readable_by_qr_generation( - self, tmp_test_dir: Path, config_file: Path - ) -> None: - """Verify preprocessed artifact has all fields required by QR generation. - - Real-world significance: - - QR generation Step 3 depends on artifact schema from Step 2 - - Missing fields cause QR generation to crash silently or produce invalid data - - Must preserve client_id, person data, contact, school info - """ - # Create preprocessed artifact - artifact = sample_input.create_test_artifact_payload( - num_clients=2, language="en", run_id="test_qr_001" - ) - artifact_dir = tmp_test_dir / "artifacts" - artifact_dir.mkdir(exist_ok=True) - - artifact_path = sample_input.write_test_artifact(artifact, artifact_dir) - - # Load artifact as QR generation would - with open(artifact_path) as f: - loaded = json.load(f) - - # Verify all required fields for QR payload template - for client in loaded["clients"]: - assert "client_id" in client - assert "person" in client - assert "school" in client - assert "contact" in client - assert client["person"]["date_of_birth_iso"] # Required for QR templates - - def test_qr_payload_template_placeholders_in_artifact( - self, tmp_test_dir: Path, default_config: Dict[str, Any] - ) -> None: - """Verify artifact data supports all QR payload template placeholders. - - Real-world significance: - - QR template may use any of: client_id, name, date_of_birth_iso, school, city, etc. - - Artifact must provide all fields that template references - - Missing field causes QR payload generation to fail - """ - artifact = sample_input.create_test_artifact_payload( - num_clients=1, language="en", run_id="test_qr_payload_001" - ) - - client = artifact.clients[0] - - # These come from person dict - assert client.person["date_of_birth_iso"] - assert client.person["first_name"] - assert client.person["last_name"] - - # These come from school/board/contact - assert client.school["name"] - assert client.contact["city"] - assert client.contact["postal_code"] - assert client.contact["province"] - assert client.contact["street"] # street_address - - def test_artifact_client_sequence_preserved(self, tmp_test_dir: Path) -> None: - """Verify client sequence numbers are deterministic and preserved. - - Real-world significance: - - Sequence numbers (00001, 00002, ...) determine PDF filename - - Must be consistent for reproducible batching - - QR generation uses sequence in filenames - """ - artifact = sample_input.create_test_artifact_payload( - num_clients=5, language="en", run_id="test_seq_001" - ) - artifact_dir = tmp_test_dir / "artifacts" - artifact_dir.mkdir() - - artifact_path = sample_input.write_test_artifact(artifact, artifact_dir) - - with open(artifact_path) as f: - loaded = json.load(f) - - # Sequences should be ordered 00001, 00002, etc. - sequences = [c["sequence"] for c in loaded["clients"]] - assert sequences == ["00001", "00002", "00003", "00004", "00005"] - - def test_multilingual_artifact_preserves_language_in_clients( - self, tmp_test_dir: Path - ) -> None: - """Verify language is preserved in both artifact and individual clients. - - Real-world significance: - - QR generation and notice generation need language to format dates - - Downstream steps must know language to select proper templates - - Mixed-language artifacts not supported; all clients same language - """ - en_artifact = sample_input.create_test_artifact_payload( - num_clients=2, language="en", run_id="test_lang_en" - ) - fr_artifact = sample_input.create_test_artifact_payload( - num_clients=2, language="fr", run_id="test_lang_fr" - ) - - artifact_dir = tmp_test_dir / "artifacts" - artifact_dir.mkdir() - - en_path = sample_input.write_test_artifact(en_artifact, artifact_dir) - fr_path = sample_input.write_test_artifact(fr_artifact, artifact_dir) - - with open(en_path) as f: - en_data = json.load(f) - with open(fr_path) as f: - fr_data = json.load(f) - - # Artifact top-level language - assert en_data["language"] == "en" - assert fr_data["language"] == "fr" - - # Per-client language - for client in en_data["clients"]: - assert client["language"] == "en" - for client in fr_data["clients"]: - assert client["language"] == "fr" - - -@pytest.mark.integration -class TestNoticeToCompileArtifactContract: - """Integration tests for notice generation → compilation contract.""" - - def test_notice_generation_input_schema_from_artifact( - self, tmp_test_dir: Path - ) -> None: - """Verify artifact schema supports notice generation requirements. - - Real-world significance: - - Notice generation Step 4 reads preprocessed artifact - - Templates need: client name, DOB, vaccines_due, school, contact info - - Missing fields cause template rendering to fail - """ - artifact = sample_input.create_test_artifact_payload( - num_clients=1, language="en", run_id="test_notice_001" - ) - - client = artifact.clients[0] - - # Notice generation needs these fields for template rendering - assert client.person["first_name"] - assert client.person["last_name"] - assert client.person["date_of_birth_display"] - assert client.vaccines_due # List of diseases needing immunization - assert client.vaccines_due_list # Expanded list - assert client.school["name"] - assert client.contact["city"] - - def test_typst_file_generation_metadata_from_artifact( - self, tmp_test_dir: Path - ) -> None: - """Verify all metadata needed for Typst file generation is in artifact. - - Real-world significance: - - Typst templates (.typ files) reference QR image files by name - - Names are derived from sequence number and client_id - - Typst compilation fails if QR file not found with expected name - """ - artifact = sample_input.create_test_artifact_payload( - num_clients=2, language="en", run_id="test_typst_001" - ) - - for i, client in enumerate(artifact.clients, 1): - # These fields determine QR filename: {sequence}_{client_id}.png - assert client.sequence == f"{i:05d}" - assert client.client_id - # QR dict (if present) should have filename - # In real pipeline, set during QR generation step - if client.qr: - assert "filename" in client.qr - - def test_vaccines_due_list_for_notice_rendering(self, tmp_test_dir: Path) -> None: - """Verify vaccines_due_list is populated for notice template iteration. - - Real-world significance: - - Notices display a chart showing which vaccines are due - - Template iterates over vaccines_due_list to build chart rows - - Missing vaccines_due_list causes chart to be empty/broken - """ - artifact = sample_input.create_test_artifact_payload( - num_clients=1, language="en", run_id="test_vax_001" - ) - - client = artifact.clients[0] - - # Should have both string and list representation - assert client.vaccines_due # e.g., "Measles/Mumps/Rubella" - assert client.vaccines_due_list # e.g., ["Measles", "Mumps", "Rubella"] - assert isinstance(client.vaccines_due_list, list) - assert len(client.vaccines_due_list) > 0 - - -@pytest.mark.integration -class TestQrPayloadGeneration: - """Integration tests for QR payload template variable substitution.""" - - def test_qr_payload_template_variable_substitution( - self, tmp_test_dir: Path, default_config: Dict[str, Any] - ) -> None: - """Verify QR payload templates correctly substitute artifact variables. - - Real-world significance: - - QR template (from config) may use placeholders like {client_id}, {name} - - Variables must be correctly extracted from artifact and substituted - - Typos or missing variables cause invalid QR payloads - """ - config_qr_template = "https://example.com/v?id={client_id}&name={first_name}" - - client = sample_input.create_test_client_record( - sequence="00001", - client_id="C12345", - first_name="Alice", - language="en", - ) - - # Simulate variable extraction - template_vars = { - "client_id": client.client_id, - "first_name": client.person["first_name"], - "name": f"{client.person['first_name']} {client.person['last_name']}", - "language_code": client.language, - } - - payload = config_qr_template.format(**template_vars) - - assert "id=C12345" in payload - assert "name=Alice" in payload - - def test_qr_payload_iso_date_format( - self, tmp_test_dir: Path, default_config: Dict[str, Any] - ) -> None: - """Verify QR payloads use ISO date format (YYYY-MM-DD). - - Real-world significance: - - QR payloads should be URL-safe and parseable by receiving system - - ISO date format (2015-06-15) is unambiguous vs regional formats - - Used in many backend systems for DOB verification - """ - config_qr_template = ( - "https://example.com/update?client_id={client_id}&dob={date_of_birth_iso}" - ) - - client = sample_input.create_test_client_record( - client_id="C99999", - date_of_birth="2015-06-15", - language="en", - ) - - template_vars = { - "client_id": client.client_id, - "date_of_birth_iso": client.person["date_of_birth_iso"], - } - - payload = config_qr_template.format(**template_vars) - - assert "dob=2015-06-15" in payload - assert "dob=" + "2015-06-15" in payload # Verify exact format - - -@pytest.mark.integration -class TestArtifactMetadataPreservation: - """Integration tests for artifact metadata flow through steps.""" - - def test_artifact_metadata_preserved_through_json_serialization( - self, tmp_test_dir: Path - ) -> None: - """Verify artifact metadata (run_id, warnings, created_at) survives JSON round-trip. - - Real-world significance: - - Metadata enables linking pipeline runs for debugging - - Warnings track data quality issues - - created_at timestamp enables audit trail - """ - artifact = sample_input.create_test_artifact_payload( - num_clients=2, language="en", run_id="test_meta_20250101_120000" - ) - artifact_dir = tmp_test_dir / "artifacts" - artifact_dir.mkdir() - - artifact_path = sample_input.write_test_artifact(artifact, artifact_dir) - - with open(artifact_path) as f: - loaded = json.load(f) - - assert loaded["run_id"] == "test_meta_20250101_120000" - assert "created_at" in loaded - assert loaded["total_clients"] == 2 - - def test_artifact_warnings_accumulated(self, tmp_test_dir: Path) -> None: - """Verify warnings are preserved in artifact for user visibility. - - Real-world significance: - - Preprocessing may encounter data quality issues (missing board, invalid postal) - - Warnings should be logged to artifact for user review - - Allows diagnosing why certain clients have incomplete data - """ - artifact = data_models.ArtifactPayload( - run_id="test_warn_001", - language="en", - clients=[ - sample_input.create_test_client_record( - sequence="00001", client_id="C00001", language="en" - ), - ], - warnings=[ - "Missing board name for client C00001", - "Invalid postal code format for client C00002", - ], - created_at="2025-01-01T12:00:00Z", - input_file="test_input.xlsx", - total_clients=1, - ) - - artifact_dir = tmp_test_dir / "artifacts" - artifact_dir.mkdir() - - artifact_path = sample_input.write_test_artifact(artifact, artifact_dir) - - with open(artifact_path) as f: - loaded = json.load(f) - - assert len(loaded["warnings"]) == 2 - assert "Missing board name" in loaded["warnings"][0] diff --git a/tests/integration/test_config_driven_behavior.py b/tests/integration/test_config_driven_behavior.py deleted file mode 100644 index 0a3755b..0000000 --- a/tests/integration/test_config_driven_behavior.py +++ /dev/null @@ -1,330 +0,0 @@ -"""Integration tests for configuration-driven pipeline behavior. - -Tests cover: -- Feature flags affect actual behavior (qr.enabled, encryption.enabled, bundling.enabled) -- Configuration options propagate through pipeline steps -- Invalid config values are caught and reported -- Default configuration allows pipeline to run -- Batching strategies (group_by school, board, or sequential) -- Cleanup configuration affects file removal behavior - -Real-world significance: -- Configuration controls optional features and pipeline behavior -- Must verify config actually changes behavior (not just stored) -- Users rely on configuration to enable/disable features -- Misconfigured pipeline may fail silently or unexpectedly -""" - -from __future__ import annotations - -from typing import Any, Dict - -import pytest - - -@pytest.mark.integration -class TestConfigDrivenBehavior: - """Integration tests for config controlling pipeline behavior.""" - - def test_qr_enabled_flag_exists_in_config( - self, default_config: Dict[str, Any] - ) -> None: - """Verify QR enabled flag is present in default config. - - Real-world significance: - - QR generation can be disabled to save processing time - - Config must have boolean flag to control this - """ - assert "qr" in default_config - assert "enabled" in default_config["qr"] - assert isinstance(default_config["qr"]["enabled"], bool) - - def test_encryption_enabled_flag_exists_in_config( - self, default_config: Dict[str, Any] - ) -> None: - """Verify encryption enabled flag is present in default config. - - Real-world significance: - - Encryption is optional for protecting sensitive data - - Config must allow enabling/disabling safely - """ - assert "encryption" in default_config - assert "enabled" in default_config["encryption"] - assert isinstance(default_config["encryption"]["enabled"], bool) - - def test_bundling_enabled_flag_exists_in_config( - self, default_config: Dict[str, Any] - ) -> None: - """Verify bundling configuration exists. - - Real-world significance: - - Batching groups PDFs for efficient distribution - - bundle_size controls whether bundling is active (0 = disabled) - """ - assert "bundling" in default_config - assert "bundle_size" in default_config["bundling"] - assert isinstance(default_config["bundling"]["bundle_size"], int) - - def test_pipeline_config_section_exists( - self, default_config: Dict[str, Any] - ) -> None: - """Verify pipeline section with lifecycle settings exists. - - Real-world significance: - - Pipeline lifecycle settings control cleanup at startup and shutdown - - before_run controls cleanup of old output before starting new run - - after_run controls cleanup of intermediate files after successful run - """ - assert "pipeline" in default_config - assert "before_run" in default_config["pipeline"] - assert "after_run" in default_config["pipeline"] - assert "clear_output_directory" in default_config["pipeline"]["before_run"] - assert "remove_artifacts" in default_config["pipeline"]["after_run"] - - def test_bundle_size_configuration(self, default_config: Dict[str, Any]) -> None: - """Verify batch size is configurable. - - Real-world significance: - - Users can control how many PDFs are grouped per batch - - Allows optimization for printing hardware - """ - assert "bundling" in default_config - assert "bundle_size" in default_config["bundling"] - assert isinstance(default_config["bundling"]["bundle_size"], int) - assert default_config["bundling"]["bundle_size"] >= 0 - - def test_chart_diseases_header_configuration( - self, default_config: Dict[str, Any] - ) -> None: - """Verify chart diseases header is configurable list. - - Real-world significance: - - Allows customizing which diseases appear on notice - - Different districts may have different disease tracking needs - """ - assert "chart_diseases_header" in default_config - assert isinstance(default_config["chart_diseases_header"], list) - assert len(default_config["chart_diseases_header"]) > 0 - - def test_replace_unspecified_configuration(self, default_config: Dict[str, Any]) -> None: - """Verify replace_unspecified list is configurable. - - Real-world significance: - - Some agents (staff) should not receive notices - - Config allows filtering out specific agent types - """ - assert "replace_unspecified" in default_config - assert isinstance(default_config["replace_unspecified"], list) - - -@pytest.mark.integration -class TestQrEnabledBehavior: - """Integration tests for QR enabled/disabled feature flag.""" - - def test_qr_enabled_true_config(self, default_config: Dict[str, Any]) -> None: - """Verify config can enable QR generation. - - Real-world significance: - - QR codes on notices enable online vaccine verification - - Must be able to enable/disable without code changes - """ - config_qr_enabled = default_config.copy() - config_qr_enabled["qr"]["enabled"] = True - - assert config_qr_enabled["qr"]["enabled"] is True - - def test_qr_enabled_false_config(self, default_config: Dict[str, Any]) -> None: - """Verify config can disable QR generation. - - Real-world significance: - - Some jurisdictions may not use QR codes - - Disabling QR saves processing time - """ - config_qr_disabled = default_config.copy() - config_qr_disabled["qr"]["enabled"] = False - - assert config_qr_disabled["qr"]["enabled"] is False - - def test_qr_payload_template_configured( - self, default_config: Dict[str, Any] - ) -> None: - """Verify QR payload template is configurable. - - Real-world significance: - - Different districts may use different QR backend systems - - Template should point to correct verification endpoint - """ - assert "payload_template" in default_config["qr"] - assert isinstance(default_config["qr"]["payload_template"], str) - assert len(default_config["qr"]["payload_template"]) > 0 - - -@pytest.mark.integration -class TestEncryptionBehavior: - """Integration tests for PDF encryption configuration.""" - - def test_encryption_enabled_true_config( - self, default_config: Dict[str, Any] - ) -> None: - """Verify config can enable PDF encryption. - - Real-world significance: - - Encrypting PDFs protects sensitive student health information - - Password derived from student data ensures privacy - """ - config_encrypted = default_config.copy() - config_encrypted["encryption"]["enabled"] = True - - assert config_encrypted["encryption"]["enabled"] is True - - def test_encryption_enabled_false_config( - self, default_config: Dict[str, Any] - ) -> None: - """Verify config can disable PDF encryption. - - Real-world significance: - - Some environments may use other protection mechanisms - - Disabling encryption simplifies distribution - """ - config_unencrypted = default_config.copy() - config_unencrypted["encryption"]["enabled"] = False - - assert config_unencrypted["encryption"]["enabled"] is False - - def test_encryption_password_template_configured( - self, default_config: Dict[str, Any] - ) -> None: - """Verify encryption password template is configurable. - - Real-world significance: - - Password can use student DOB, ID, or combination - - Template allows flexibility in password generation strategy - """ - assert "password" in default_config["encryption"] - assert "template" in default_config["encryption"]["password"] - assert isinstance(default_config["encryption"]["password"]["template"], str) - - -@pytest.mark.integration -class TestBatchingBehavior: - """Integration tests for PDF bundling configuration.""" - - def test_bundling_bundle_size_zero_disables_bundling( - self, default_config: Dict[str, Any] - ) -> None: - """Verify bundle_size=0 disables bundling. - - Real-world significance: - - When bundle_size=0, each student PDF remains individual - - No PDF combining step is executed - """ - config = default_config.copy() - config["bundling"]["bundle_size"] = 0 - - assert config["bundling"]["bundle_size"] == 0 - - def test_bundling_bundle_size_positive_enables_bundling( - self, default_config: Dict[str, Any] - ) -> None: - """Verify positive bundle_size enables bundling. - - Real-world significance: - - bundle_size=50 means 50 PDFs per combined batch - - Reduces distribution workload (fewer files to send) - """ - config = default_config.copy() - config["bundling"]["bundle_size"] = 50 - - assert config["bundling"]["bundle_size"] == 50 - assert config["bundling"]["bundle_size"] > 0 - - def test_bundling_group_by_sequential(self, default_config: Dict[str, Any]) -> None: - """Verify bundling can use sequential grouping. - - Real-world significance: - - Sequential bundling: PDFs combined in processing order - - Simplest bundling strategy - """ - config = default_config.copy() - config["bundling"]["group_by"] = None - - assert config["bundling"]["group_by"] is None - - def test_bundling_group_by_school(self, default_config: Dict[str, Any]) -> None: - """Verify bundling can group by school. - - Real-world significance: - - Group by school: Each batch contains only one school's students - - Allows per-school distribution to school boards - """ - config = default_config.copy() - config["bundling"]["group_by"] = "school" - - assert config["bundling"]["group_by"] == "school" - - def test_bundling_group_by_board(self, default_config: Dict[str, Any]) -> None: - """Verify bundling can group by school board. - - Real-world significance: - - Group by board: Each batch contains only one board's students - - Allows per-board distribution to parent organizations - """ - config = default_config.copy() - config["bundling"]["group_by"] = "board" - - assert config["bundling"]["group_by"] == "board" - - -@pytest.mark.integration -class TestPipelineCleanupBehavior: - """Integration tests for pipeline cleanup configuration.""" - - def test_keep_intermediate_files_true(self, default_config: Dict[str, Any]) -> None: - """Verify intermediate files can be preserved. - - Real-world significance: - - Keeping .typ files, JSON artifacts allows post-run debugging - - Useful for troubleshooting notice content issues - """ - config = default_config.copy() - config["pipeline"]["keep_intermediate_files"] = True - - assert config["pipeline"]["keep_intermediate_files"] is True - - def test_keep_intermediate_files_false( - self, default_config: Dict[str, Any] - ) -> None: - """Verify intermediate files can be removed. - - Real-world significance: - - Removes .typ, JSON, and per-client PDFs after bundling - - Cleans up disk space for large runs (1000+ students) - """ - config = default_config.copy() - config["pipeline"]["keep_intermediate_files"] = False - - assert config["pipeline"]["keep_intermediate_files"] is False - - def test_auto_remove_output_true(self, default_config: Dict[str, Any]) -> None: - """Verify auto-removal of previous output can be enabled. - - Real-world significance: - - auto_remove_output=true: Automatically delete previous run - - Ensures output directory contains only current run - """ - config = default_config.copy() - config["pipeline"]["auto_remove_output"] = True - - assert config["pipeline"]["auto_remove_output"] is True - - def test_auto_remove_output_false(self, default_config: Dict[str, Any]) -> None: - """Verify auto-removal of previous output can be disabled. - - Real-world significance: - - auto_remove_output=false: Preserve previous run; warn on conflicts - - Allows archiving or comparing multiple runs - """ - config = default_config.copy() - config["pipeline"]["auto_remove_output"] = False - - assert config["pipeline"]["auto_remove_output"] is False diff --git a/tests/integration/test_pipeline_contracts.py b/tests/integration/test_pipeline_contracts.py new file mode 100644 index 0000000..01dca2a --- /dev/null +++ b/tests/integration/test_pipeline_contracts.py @@ -0,0 +1,281 @@ +"""Integration tests for pipeline step contracts and artifact consistency. + +This module consolidates tests that verify the handoff between pipeline steps: +- Preprocess → QR Generation +- QR Generation → Notice Generation +- Notice Generation → Typst Compilation +- Compilation → PDF Validation/Bundling + +It ensures that artifact schemas are consistent, required fields are preserved, +and configuration propagates correctly across the multi-step workflow. +""" + +from __future__ import annotations + +import copy +import json +from pathlib import Path +from typing import Any, Dict + +import pytest + +from pipeline import data_models +from tests.fixtures import sample_input + + +@pytest.mark.integration +class TestArtifactContracts: + """Integration tests for artifact schema consistency and metadata preservation.""" + + def test_artifact_payload_round_trip(self, tmp_path: Path) -> None: + """Verify ArtifactPayload can be written and read from JSON. + + Real-world significance: + - Artifacts must survive round-trip serialization without data loss + - Steps communicate via these files on disk + """ + original = sample_input.create_test_artifact_payload( + num_clients=3, run_id="test_round_trip_001" + ) + + # Write artifact + artifact_path = sample_input.write_test_artifact(original, tmp_path) + + # Read artifact + assert artifact_path.exists() + with open(artifact_path) as f: + artifact_data = json.load(f) + + # Verify key fields preserved + assert artifact_data["run_id"] == "test_round_trip_001" + assert len(artifact_data["clients"]) == 3 + assert artifact_data["total_clients"] == 3 + assert "created_at" in artifact_data + + def test_client_record_fields_preserved_in_artifact(self, tmp_path: Path) -> None: + """Verify critical ClientRecord fields are preserved in artifact JSON. + + Real-world significance: + - Downstream steps depend on specific fields being present + - Missing fields cause pipeline crashes or silent errors + """ + artifact = sample_input.create_test_artifact_payload( + num_clients=1, + run_id="test_fields_001", + ) + + artifact_path = sample_input.write_test_artifact(artifact, tmp_path) + + with open(artifact_path) as f: + artifact_data = json.load(f) + + client_dict = artifact_data["clients"][0] + + # Verify critical fields present + required_fields = [ + "sequence", + "client_id", + "language", + "person", + "school", + "board", + "contact", + "vaccines_due", + "vaccines_due_list", + ] + + for field in required_fields: + assert field in client_dict, f"Missing critical field: {field}" + + def test_multilingual_artifact_support(self, tmp_path: Path) -> None: + """Verify artifacts support both English and French clients consistently. + + Real-world significance: + - Pipeline must support bilingual operation + - Artifacts must preserve language markers for template selection + """ + for lang in ["en", "fr"]: + artifact = sample_input.create_test_artifact_payload( + num_clients=2, language=lang, run_id=f"test_lang_{lang}" + ) + path = sample_input.write_test_artifact(artifact, tmp_path) + + with open(path) as f: + data = json.load(f) + + assert data["language"] == lang + for client in data["clients"]: + assert client["language"] == lang + + def test_artifact_warnings_accumulation(self, tmp_path: Path) -> None: + """Verify warnings are preserved in artifact for user visibility.""" + artifact = data_models.ArtifactPayload( + run_id="test_warn_001", + language="en", + clients=[ + sample_input.create_test_client_record(sequence="00001", language="en") + ], + warnings=["Missing board name", "Invalid postal code"], + created_at="2025-01-01T12:00:00Z", + total_clients=1, + ) + + artifact_path = sample_input.write_test_artifact(artifact, tmp_path) + + with open(artifact_path) as f: + loaded = json.load(f) + + assert len(loaded["warnings"]) == 2 + assert "Missing board name" in loaded["warnings"][0] + + +@pytest.mark.integration +class TestPreprocessToQrContract: + """Integration tests for Preprocess (Step 2) → QR Generation (Step 3) contract.""" + + def test_artifact_data_supports_qr_payload_generation( + self, tmp_test_dir: Path, default_config: Dict[str, Any] + ) -> None: + """Verify artifact has all data needed for QR payload substitution. + + Real-world significance: + - QR generation substitution depends on specific artifact fields + - Missing fields cause KR payload generation to fail + """ + artifact = sample_input.create_test_artifact_payload( + num_clients=1, language="en", run_id="test_qr_contract" + ) + client = artifact.clients[0] + + # Fields required by default QR payload templates + assert client.client_id + assert client.person["first_name"] + assert client.person["last_name"] + assert client.person["date_of_birth_iso"] + assert client.school["name"] + assert client.contact["city"] + + def test_client_sequence_stability_for_filenames(self, tmp_path: Path) -> None: + """Verify client sequence numbers are deterministic for filename generation. + + Real-world significance: + - Filenames (QR, Notice, PDF) use the sequence number (00001, 00002...) + - Consistency is critical for traceability and batching + """ + artifact = sample_input.create_test_artifact_payload( + num_clients=5, language="en", run_id="test_sequence" + ) + sequences = [c.sequence for c in artifact.clients] + assert sequences == ["00001", "00002", "00003", "00004", "00005"] + + +@pytest.mark.integration +class TestQrToNoticeContract: + """Integration tests for QR Generation (Step 3) → Notice Generation (Step 4) contract.""" + + def test_qr_reference_field_in_client_record(self) -> None: + """Verify ClientRecord can carry QR metadata to notice generation. + + Real-world significance: + - Notice templates need to know the QR filename to embed it + - QR step adds this info to the artifact + """ + import dataclasses + + client = sample_input.create_test_client_record( + sequence="00001", client_id="C123" + ) + client = dataclasses.replace( + client, + qr={ + "filename": "00001_C123.png", + "payload": "https://example.com/vax/C123", + }, + ) + + assert client.qr["filename"] == "00001_C123.png" + + def test_qr_payload_formatting_iso_date(self) -> None: + """Verify QR payloads correctly format ISO dates for receiving systems.""" + client = sample_input.create_test_client_record(date_of_birth="2015-06-15") + template = "dob={date_of_birth_iso}" + + payload = template.format(date_of_birth_iso=client.person["date_of_birth_iso"]) + assert payload == "dob=2015-06-15" + + +@pytest.mark.integration +class TestNoticeToCompileContract: + """Integration tests for Notice Generation (Step 4) → Typst Compilation (Step 5) contract.""" + + def test_vaccines_due_list_for_template_iteration(self) -> None: + """Verify vaccines_due_list is present and correct for chart rendering. + + Real-world significance: + - Notice templates iterate over this list to build the immunization chart + """ + client = sample_input.create_test_client_record( + vaccines_due="Measles/Mumps/Rubella", + vaccines_due_list=["Measles", "Mumps", "Rubella"], + ) + + assert isinstance(client.vaccines_due_list, list) + assert len(client.vaccines_due_list) == 3 + assert "Measles" in client.vaccines_due_list + + def test_typst_synthetic_file_structure(self, tmp_path: Path) -> None: + """Verify the content structure expected by the Typst compiler.""" + content = '#import "conf.typ": header\n#header()\n= Notice for {name}' + rendered = content.format(name="John Doe") + + assert "header()" in rendered + assert "John Doe" in rendered + + +@pytest.mark.integration +class TestDownstreamWorkflowContracts: + """Integration tests for Step 6+ handoffs and configuration propagation.""" + + def test_compilation_to_validation_manifest(self, tmp_path: Path) -> None: + """Verify structure of PDF validation manifest (Step 6).""" + manifest = { + "run_id": "test_run", + "page_counts": [{"sequence": "00001", "page_count": 1}], + } + path = tmp_path / "manifest.json" + with open(path, "w") as f: + json.dump(manifest, f) + + assert path.exists() + + def test_encryption_to_bundling_metadata(self) -> None: + """Verify encryption (Step 7) preserves fields for bundling (Step 8).""" + record = { + "client": {"school": "School A", "board": "Board B"}, + "password": "password123", + } + # Bundling needs school/board to group PDFs + assert record["client"]["school"] == "School A" + assert record["client"]["board"] == "Board B" + + def test_config_propagation_encryption_vs_bundling( + self, default_config: Dict[str, Any] + ) -> None: + """Verify configuration enforces mutually exclusive encryption and bundling.""" + config = copy.deepcopy(default_config) + + # Scenario: Encryption enabled + config["encryption"]["enabled"] = True + assert config["encryption"]["enabled"] is True + + # Scenario: Bundling enabled (usually requires encryption disabled) + config["encryption"]["enabled"] = False + config["bundling"]["enabled"] = True + assert config["encryption"]["enabled"] is False + assert config["bundling"]["enabled"] is True + + def test_cleanup_policy_configuration(self, default_config: Dict[str, Any]) -> None: + """Verify cleanup policy configuration is accessible.""" + assert "after_run" in default_config["pipeline"] + assert "remove_artifacts" in default_config["pipeline"]["after_run"] + assert "remove_unencrypted_pdfs" in default_config["pipeline"]["after_run"] diff --git a/tests/integration/test_pipeline_stages.py b/tests/integration/test_pipeline_stages.py deleted file mode 100644 index 94a93a5..0000000 --- a/tests/integration/test_pipeline_stages.py +++ /dev/null @@ -1,519 +0,0 @@ -"""Integration tests for multi-step pipeline workflows. - -Tests cover end-to-end interactions between adjacent steps: -- Preprocessing → QR generation (artifact validation) -- QR generation → Notice generation (QR references in templates) -- Notice generation → Typst compilation (template syntax) -- Compilation → PDF validation/counting (PDF integrity) -- PDF validation → Encryption (PDF metadata preservation) -- Encryption → Bundling (bundle manifest generation) - -Real-world significance: -- Multi-step workflows depend on contracts between adjacent steps -- A single missing field or changed format cascades failures -- Integration testing catches failures that unit tests miss -- Verifies configuration changes propagate through pipeline -""" - -from __future__ import annotations - -import copy -import json -from pathlib import Path -from typing import Any, Dict, List - -import pytest - -from pipeline import data_models -from tests.fixtures import sample_input - - -@pytest.mark.integration -class TestPreprocessToQrStepIntegration: - """Integration tests for Preprocess → QR generation workflow.""" - - def test_preprocess_output_suitable_for_qr_generation( - self, tmp_test_dir: Path - ) -> None: - """Verify preprocessed artifact has all data needed by QR generation step. - - Real-world significance: - - QR generation (Step 3) reads preprocessed artifact from Step 2 - - Must have: client_id, name, DOB, school, contact info for payload template - - Missing data causes QR payload generation to fail - """ - artifact = sample_input.create_test_artifact_payload( - num_clients=3, language="en", run_id="test_preqr_001" - ) - artifact_dir = tmp_test_dir / "artifacts" - artifact_dir.mkdir() - - artifact_path = sample_input.write_test_artifact(artifact, artifact_dir) - - # Verify artifact is readable and has required fields - with open(artifact_path) as f: - loaded = json.load(f) - - assert len(loaded["clients"]) == 3 - - # Each client must have fields for QR payload template - for client_dict in loaded["clients"]: - assert "client_id" in client_dict - assert "person" in client_dict - assert client_dict["person"]["first_name"] - assert client_dict["person"]["last_name"] - assert client_dict["person"]["date_of_birth_iso"] - assert "school" in client_dict - assert "contact" in client_dict - - def test_client_sequence_ordered_for_qr_files(self, tmp_test_dir: Path) -> None: - """Verify client sequences are deterministic for QR filename generation. - - Real-world significance: - - QR files named: {sequence}_{client_id}.png - - Sequence numbers (00001, 00002, ...) must be stable - - Same input → same filenames across multiple runs - """ - clients = [ - sample_input.create_test_client_record( - sequence=f"{i + 1:05d}", - client_id=f"C{i:05d}", - language="en", - ) - for i in range(5) - ] - - artifact = data_models.ArtifactPayload( - run_id="test_seq_qr", - language="en", - clients=clients, - warnings=[], - created_at="2025-01-01T12:00:00Z", - total_clients=5, - ) - - # Verify sequences are in expected order - sequences = [c.sequence for c in artifact.clients] - assert sequences == ["00001", "00002", "00003", "00004", "00005"] - - def test_language_consistency_preprocess_to_qr(self, tmp_test_dir: Path) -> None: - """Verify language is preserved and consistent across steps. - - Real-world significance: - - QR generation may format dates differently per language - - Must know language to select correct template placeholders - - All clients in artifact must have same language - """ - for lang in ["en", "fr"]: - artifact = sample_input.create_test_artifact_payload( - num_clients=2, language=lang, run_id=f"test_lang_{lang}" - ) - - assert artifact.language == lang - for client in artifact.clients: - assert client.language == lang - - -@pytest.mark.integration -class TestQrToNoticeGenerationIntegration: - """Integration tests for QR generation → Notice generation workflow.""" - - def test_qr_payload_fits_template_variables( - self, tmp_test_dir: Path, default_config: Dict[str, Any] - ) -> None: - """Verify QR payload can be generated from artifact template. - - Real-world significance: - - Notice templates reference QR by filename and may embed payload - - Payload template may use: {client_id}, {name}, {date_of_birth_iso} - - Template validation ensures all placeholders exist in artifact - """ - client = sample_input.create_test_client_record( - sequence="00001", - client_id="C12345", - first_name="Alice", - last_name="Zephyr", - date_of_birth="2015-06-15", - language="en", - ) - - # Simulate template variable substitution from config - template = default_config["qr"]["payload_template"] - - # Create variable dict from client (as QR generation would) - template_vars = { - "client_id": client.client_id, - "first_name": client.person["first_name"], - "last_name": client.person["last_name"], - "name": " ".join( - filter(None, [client.person["first_name"], client.person["last_name"]]) - ).strip(), - "date_of_birth_iso": client.person["date_of_birth_iso"], - "school": client.school["name"], - "city": client.contact["city"], - "postal_code": client.contact["postal_code"], - "province": client.contact["province"], - "street_address": client.contact["street"], - "language_code": client.language, - } - - # Template should successfully format - try: - payload = template.format(**template_vars) - assert len(payload) > 0 - except KeyError as e: - pytest.fail(f"Template refers to missing field: {e}") - - def test_qr_filename_reference_in_artifact(self, tmp_test_dir: Path) -> None: - """Verify artifact can reference QR file generated in Step 3. - - Real-world significance: - - Notice templates (Step 4) embed: !image("00001_C12345.png") - - Filename must match what QR generation produces: {sequence}_{client_id}.png - - If QR step adds qr.filename to artifact, notice step can reference it - """ - client = sample_input.create_test_client_record( - sequence="00001", - client_id="C12345", - language="en", - ) - - # Simulate QR generation adding QR reference to client - client_with_qr = data_models.ClientRecord( - sequence=client.sequence, - client_id=client.client_id, - language=client.language, - person=client.person, - school=client.school, - board=client.board, - contact=client.contact, - vaccines_due=client.vaccines_due, - vaccines_due_list=client.vaccines_due_list, - received=client.received, - metadata=client.metadata, - qr={ - "filename": f"{client.sequence}_{client.client_id}.png", - "payload": "https://example.com/vac/C12345", - }, - ) - - # Notice generation can now reference the QR file - assert client_with_qr.qr is not None - assert client_with_qr.qr["filename"] == "00001_C12345.png" - - -@pytest.mark.integration -class TestNoticeToCompileIntegration: - """Integration tests for Notice generation → Typst compilation workflow.""" - - def test_notice_template_render_requires_artifact_fields( - self, tmp_test_dir: Path - ) -> None: - """Verify notice templates can access all required artifact fields. - - Real-world significance: - - Typst templates access: client.person, client.vaccines_due_list, school - - Missing fields cause template render errors - - Template syntax: client.person.first_name, client.vaccines_due_list - """ - client = sample_input.create_test_client_record( - first_name="Alice", - last_name="Zephyr", - date_of_birth="2015-06-15", - vaccines_due="Measles/Mumps/Rubella", - vaccines_due_list=["Measles", "Mumps", "Rubella"], - language="en", - ) - - # Simulate template variable access - template_vars = { - "client_first_name": client.person["first_name"], - "client_last_name": client.person["last_name"], - "client_full_name": " ".join( - filter(None, [client.person["first_name"], client.person["last_name"]]) - ).strip(), - "client_dob": client.person["date_of_birth_display"], - "school_name": client.school["name"], - "vaccines_list": client.vaccines_due_list, - } - - # All fields should be present - assert template_vars["client_first_name"] == "Alice" - assert template_vars["client_last_name"] == "Zephyr" - assert template_vars["vaccines_list"] is not None - assert len(template_vars["vaccines_list"]) == 3 - - def test_typst_file_structure_consistency(self, tmp_test_dir: Path) -> None: - """Verify .typ files can be structured for Typst compilation. - - Real-world significance: - - Typst compiler (Step 5) processes .typ files from Step 4 - - Files must have valid Typst syntax - - Files reference QR images by filename - """ - # Create mock .typ file content (simplified) - typ_content = """#import "conf.typ": header, footer - -#set page( - margin: (top: 1cm, bottom: 1cm, left: 1cm, right: 1cm), -) - -#header() -= Immunization Notice for Alice Zephyr - -Client: Alice Zephyr -DOB: 2015-06-15 - -#image("artifacts/qr_codes/00001_C00001.png") - -#footer() -""" - - typ_file = tmp_test_dir / "00001_C00001.typ" - typ_file.write_text(typ_content) - - # Verify file is created and readable - assert typ_file.exists() - content = typ_file.read_text() - assert "Alice Zephyr" in content - assert "00001_C00001.png" in content - - -@pytest.mark.integration -class TestCompilationToPdfValidation: - """Integration tests for Typst compilation → PDF validation workflow.""" - - def test_pdf_page_count_validation_structure(self, tmp_test_dir: Path) -> None: - """Verify PDF validation can record page counts for compiled files. - - Real-world significance: - - Step 6 counts PDF pages for quality assurance - - Single-page PDFs indicate successful compilation - - Multi-page PDFs indicate template issues or client data problems - """ - # Create mock PDF records - pdf_records: List[data_models.PdfRecord] = [] - for i in range(1, 4): - record = data_models.PdfRecord( - sequence=f"{i:05d}", - client_id=f"C{i:05d}", - pdf_path=tmp_test_dir / f"{i:05d}_C{i:05d}.pdf", - page_count=1, - client={ - "first_name": f"Client{i}", - "last_name": "Student", - "school": "Test School", - }, - ) - pdf_records.append(record) - - # Verify page count structure - assert len(pdf_records) == 3 - for record in pdf_records: - assert record.page_count == 1 - assert record.sequence - assert record.client_id - - def test_pdf_validation_manifest_generation(self, tmp_test_dir: Path) -> None: - """Verify PDF validation can create manifest of page counts. - - Real-world significance: - - Manifest stored in output/metadata/_page_counts_.json - - Enables detecting incomplete compilations - - Useful for auditing and quality control - """ - manifest = { - "run_id": "test_compile_001", - "language": "en", - "created_at": "2025-01-01T12:00:00Z", - "total_pdfs": 3, - "page_counts": [ - { - "sequence": "00001", - "client_id": "C00001", - "page_count": 1, - }, - { - "sequence": "00002", - "client_id": "C00002", - "page_count": 1, - }, - { - "sequence": "00003", - "client_id": "C00003", - "page_count": 1, - }, - ], - "warnings": [], - } - - # Write manifest to metadata directory - metadata_dir = tmp_test_dir / "metadata" - metadata_dir.mkdir() - manifest_path = metadata_dir / "en_page_counts_test_compile_001.json" - - with open(manifest_path, "w") as f: - json.dump(manifest, f, indent=2) - - # Verify manifest can be read back - assert manifest_path.exists() - with open(manifest_path) as f: - loaded = json.load(f) - - assert loaded["run_id"] == "test_compile_001" - assert len(loaded["page_counts"]) == 3 - - -@pytest.mark.integration -class TestEncryptionToBundlingWorkflow: - """Integration tests for encryption and bundling workflows.""" - - def test_encryption_preserves_pdf_reference_data( - self, tmp_test_dir: Path, default_config: Dict[str, Any] - ) -> None: - """Verify encrypted PDFs preserve references needed by bundling. - - Real-world significance: - - Encryption step (Step 7) reads individual PDFs and encrypts - - Must preserve filename, client metadata for bundling - - Bundle step needs: sequence, client_id, school/board for grouping - """ - # Create mock encrypted PDF record - pdf_data = { - "sequence": "00001", - "client_id": "C00001", - "filename": "00001_C00001.pdf", - "client": { - "first_name": "Alice", - "last_name": "Zephyr", - "school": "Test Academy", - "board": "Test Board", - }, - "encrypted": True, - "password": "20150615", # DOB in YYYYMMDD format - } - - # Verify bundling can use this data - assert pdf_data["sequence"] - assert isinstance(pdf_data["client"], dict) - assert pdf_data["client"]["school"] # For group_by="school" - assert pdf_data["client"]["board"] # For group_by="board" - - def test_bundling_manifest_generation_from_pdfs(self, tmp_test_dir: Path) -> None: - """Verify bundling creates manifest of grouped PDFs. - - Real-world significance: - - Bundle step creates manifest mapping: bundle file → contained client PDFs - - Manifest allows recipients to know which students in each bundle - - Enables validation that no students lost in bundling - """ - bundle_manifest = { - "run_id": "test_bundle_001", - "language": "en", - "created_at": "2025-01-01T12:00:00Z", - "bundles": [ - { - "bundle_id": "bundle_001", - "bundle_file": "bundle_001.pdf", - "group_key": "Test_Academy", # school name - "client_count": 5, - "clients": [ - {"sequence": "00001", "client_id": "C00001"}, - {"sequence": "00002", "client_id": "C00002"}, - {"sequence": "00003", "client_id": "C00003"}, - {"sequence": "00004", "client_id": "C00004"}, - {"sequence": "00005", "client_id": "C00005"}, - ], - }, - ], - "total_bundles": 1, - "total_clients": 5, - } - - # Write manifest - metadata_dir = tmp_test_dir / "metadata" - metadata_dir.mkdir() - manifest_path = metadata_dir / "en_bundle_manifest_test_bundle_001.json" - - with open(manifest_path, "w") as f: - json.dump(bundle_manifest, f, indent=2) - - # Verify manifest structure - assert manifest_path.exists() - with open(manifest_path) as f: - loaded = json.load(f) - - assert loaded["total_clients"] == 5 - assert len(loaded["bundles"]) == 1 - assert loaded["bundles"][0]["client_count"] == 5 - - -@pytest.mark.integration -class TestConfigPropagationAcrossSteps: - """Integration tests for configuration changes affecting multi-step workflow.""" - - def test_qr_disabled_affects_notice_generation( - self, tmp_test_dir: Path, default_config: Dict[str, Any] - ) -> None: - """Verify notice generation respects qr.enabled=false configuration. - - Real-world significance: - - If QR generation is disabled (qr.enabled=false), Step 3 doesn't run - - Notice templates should handle missing QR references - - Notices should still generate without QR images - """ - config_no_qr = default_config.copy() - config_no_qr["qr"]["enabled"] = False - - # Notice generation with qr.enabled=false should: - # 1. Skip QR reference in template (if applicable) - # 2. Still generate notice content - # 3. Not fail on missing QR files - - assert config_no_qr["qr"]["enabled"] is False - - def test_encryption_disabled_enables_bundling( - self, tmp_test_dir: Path, default_config: Dict[str, Any] - ) -> None: - """Verify bundling is enabled only when encryption is disabled. - - Real-world significance: - - If encryption.enabled=true, bundling is skipped (Step 8 not run) - - If encryption.enabled=false, bundling can run - - Configuration enforces: encrypt OR bundle, not both - """ - config_encrypted = copy.deepcopy(default_config) - config_encrypted["encryption"]["enabled"] = True - - config_bundled = copy.deepcopy(default_config) - config_bundled["encryption"]["enabled"] = False - config_bundled["bundling"]["bundle_size"] = 50 - - # When encryption enabled, bundling should be skipped - assert config_encrypted["encryption"]["enabled"] is True - - # When encryption disabled, bundling can proceed - assert config_bundled["encryption"]["enabled"] is False - assert config_bundled["bundling"]["bundle_size"] > 0 - - def test_cleanup_configuration_affects_artifact_retention( - self, tmp_test_dir: Path, default_config: Dict[str, Any] - ) -> None: - """Verify cleanup step respects keep_intermediate_files configuration. - - Real-world significance: - - If keep_intermediate_files=true: retain .typ, JSON, per-client PDFs - - If keep_intermediate_files=false: delete intermediate files - - Affects disk space usage significantly for large runs - """ - config_keep = copy.deepcopy(default_config) - config_keep["pipeline"]["keep_intermediate_files"] = True - - config_clean = copy.deepcopy(default_config) - config_clean["pipeline"]["keep_intermediate_files"] = False - - # With keep_intermediate_files=true, files should be retained - assert config_keep["pipeline"]["keep_intermediate_files"] is True - - # With keep_intermediate_files=false, files should be deleted - assert config_clean["pipeline"]["keep_intermediate_files"] is False diff --git a/tests/integration/test_translation_integration.py b/tests/integration/test_translation_integration.py index e299447..f4e9dcc 100644 --- a/tests/integration/test_translation_integration.py +++ b/tests/integration/test_translation_integration.py @@ -16,7 +16,7 @@ import pytest -from pipeline import generate_notices, preprocess, translation_helpers +from pipeline import generate_notices, translation_helpers @pytest.mark.integration @@ -30,30 +30,6 @@ def translation_setup(self): yield translation_helpers.clear_caches() - def test_normalize_then_translate_polio_english( - self, translation_setup: None - ) -> None: - """Verify Poliomyelitis -> Polio -> Polio (English).""" - normalized = translation_helpers.normalize_disease("Poliomyelitis") - assert normalized == "Polio" - - translated = translation_helpers.display_label( - "diseases_overdue", normalized, "en" - ) - assert translated == "Polio" - - def test_normalize_then_translate_polio_french( - self, translation_setup: None - ) -> None: - """Verify Poliomyelitis -> Polio -> Poliomyélite (French).""" - normalized = translation_helpers.normalize_disease("Poliomyelitis") - assert normalized == "Polio" - - translated = translation_helpers.display_label( - "diseases_overdue", normalized, "fr" - ) - assert translated == "Poliomyélite" - def test_build_template_context_translates_vaccines_due( self, translation_setup: None ) -> None: @@ -198,30 +174,6 @@ def test_build_template_context_translates_received_vaccines( # check that the context contains the expected structure assert "received" in context - def test_disease_normalization_integration(self) -> None: - """Verify disease normalization works correctly in preprocessing. - - Confirms that the normalized output handles variant disease names using - the current translation resources. - """ - translation_helpers.clear_caches() - - # Test with variant input - should normalize correctly - result = preprocess.process_vaccines_due("Poliomyelitis, Measles", "en") - - # Should normalize Poliomyelitis to Polio (canonical form) - assert "Polio" in result - assert "Measles" in result - - def test_multiple_languages_independent(self, translation_setup: None) -> None: - """Verify translations for different languages are independent.""" - en_polio = translation_helpers.display_label("diseases_overdue", "Polio", "en") - fr_polio = translation_helpers.display_label("diseases_overdue", "Polio", "fr") - - assert en_polio != fr_polio - assert en_polio == "Polio" - assert fr_polio == "Poliomyélite" - def test_build_template_context_includes_formatted_date( self, translation_setup: None ) -> None: diff --git a/tests/unit/test_config_loader.py b/tests/unit/test_config_loader.py deleted file mode 100644 index 4f37b98..0000000 --- a/tests/unit/test_config_loader.py +++ /dev/null @@ -1,179 +0,0 @@ -"""Unit tests for config_loader module - YAML configuration loading and retrieval. - -Tests cover: -- Loading YAML configurations from files -- Error handling for missing files and invalid YAML -- Support for various data types (strings, integers, booleans, lists, nested dicts) -- Default values and fallback behavior - -Real-world significance: -- Configuration controls all pipeline behavior (QR generation, encryption, batching, etc.) -- Incorrect config loading can silently disable features or cause crashes -- Config validation ensures all required keys are present -""" - -from __future__ import annotations - -import tempfile -from pathlib import Path - -import pytest - -from pipeline import config_loader - - -@pytest.mark.unit -class TestLoadConfig: - """Unit tests for load_config function.""" - - def test_load_config_with_default_path(self) -> None: - """Verify config loads from default location. - - Real-world significance: - - Pipeline must load config automatically without user intervention - - Default path should point to config/parameters.yaml - """ - config = config_loader.load_config() - - assert isinstance(config, dict) - assert len(config) > 0 - - def test_load_config_with_custom_path(self) -> None: - """Verify config loads from custom path. - - Real-world significance: - - Users may provide config from different directories (e.g., per-district) - - Must support absolute and relative paths - """ - with tempfile.TemporaryDirectory() as tmpdir: - config_path = Path(tmpdir) / "test_config.yaml" - config_path.write_text("qr:\n enabled: false\ntest_key: test_value\n") - - config = config_loader.load_config(config_path) - - assert config["test_key"] == "test_value" - - def test_load_config_with_nested_yaml(self) -> None: - """Verify nested YAML structures load correctly. - - Real-world significance: - - Config sections (qr, encryption, pipeline, etc.) are nested - - Must preserve structure for dot-notation retrieval - """ - with tempfile.TemporaryDirectory() as tmpdir: - config_path = Path(tmpdir) / "nested_config.yaml" - config_path.write_text( - """qr: - enabled: false -section1: - key1: value1 - key2: value2 -section2: - nested: - deep_key: deep_value -""" - ) - - config = config_loader.load_config(config_path) - - assert config["section1"]["key1"] == "value1" - assert config["section2"]["nested"]["deep_key"] == "deep_value" - - def test_load_config_file_not_found(self) -> None: - """Verify error when config file missing. - - Real-world significance: - - Missing config indicates setup error; must fail early with clear message - """ - missing_path = Path("/nonexistent/path/config.yaml") - - with pytest.raises(FileNotFoundError): - config_loader.load_config(missing_path) - - def test_load_config_empty_file(self) -> None: - """Verify empty YAML file with valid QR config returns dict. - - Real-world significance: - - Empty config must still provide valid QR settings (QR enabled by default) - """ - with tempfile.TemporaryDirectory() as tmpdir: - config_path = Path(tmpdir) / "empty_config.yaml" - # Even empty files need valid QR config after validation - config_path.write_text("qr:\n enabled: false\n") - - config = config_loader.load_config(config_path) - - assert config.get("qr", {}).get("enabled") is False - - def test_load_config_with_various_data_types(self) -> None: - """Verify YAML correctly loads strings, numbers, booleans, lists, nulls. - - Real-world significance: - - Config uses all YAML types (e.g., qr.enabled: true, batch_size: 100) - - Type preservation is critical for correct behavior - """ - with tempfile.TemporaryDirectory() as tmpdir: - config_path = Path(tmpdir) / "types_config.yaml" - config_path.write_text( - """qr: - enabled: false -string_val: hello -int_val: 42 -float_val: 3.14 -bool_val: true -list_val: - - item1 - - item2 -null_val: null -""" - ) - - config = config_loader.load_config(config_path) - - assert config["string_val"] == "hello" - assert config["int_val"] == 42 - assert config["float_val"] == 3.14 - assert config["bool_val"] is True - assert config["list_val"] == ["item1", "item2"] - assert config["null_val"] is None - - def test_load_config_with_invalid_yaml(self) -> None: - """Verify error on invalid YAML syntax. - - Real-world significance: - - Malformed config will cause hard-to-debug failures downstream - - Must catch and report early - """ - with tempfile.TemporaryDirectory() as tmpdir: - config_path = Path(tmpdir) / "invalid_config.yaml" - config_path.write_text("key: value\n invalid: : :") - - with pytest.raises(Exception): # yaml.YAMLError or similar - config_loader.load_config(config_path) - - -@pytest.mark.unit -class TestActualConfig: - """Unit tests using the actual parameters.yaml (if present). - - Real-world significance: - - Should verify that production config is valid and loadable - - Catches config corruption or breaking changes - """ - - def test_actual_config_loads_successfully(self) -> None: - """Verify production config loads without error.""" - config = config_loader.load_config() - - assert isinstance(config, dict) - assert len(config) > 0 - - def test_actual_config_has_core_sections(self) -> None: - """Verify config has expected top-level sections.""" - config = config_loader.load_config() - - # At least some of these should exist - has_sections = any( - key in config for key in ["pipeline", "qr", "encryption", "bundling"] - ) - assert has_sections, "Config missing core sections" diff --git a/tests/unit/test_config_validation.py b/tests/unit/test_config_validation.py index 6e7acb2..cc03b51 100644 --- a/tests/unit/test_config_validation.py +++ b/tests/unit/test_config_validation.py @@ -1,12 +1,15 @@ -"""Tests for configuration validation across pipeline steps. +"""Tests for configuration loading and validation across pipeline steps. -This module tests the validate_config() function which ensures that -required configuration keys are present and valid when config is loaded. +This module tests: +- YAML configuration loading from files +- Error handling for missing or malformed config files +- Validation ensures that required configuration keys are present and valid. Real-world significance: +- Configuration controls all pipeline behavior (QR generation, encryption, bundling, etc.) +- Incorrect config loading or validation can cause cryptic failures deep in the pipeline - Validates conditional requirements (e.g., qr.payload_template if qr.enabled=true) - Catches configuration errors early at load time with clear error messages -- Prevents cryptic failures deep in pipeline execution - Helps administrators debug configuration issues Note: Since validate_config() validates the entire config, test configs must have @@ -16,9 +19,13 @@ from __future__ import annotations +import tempfile +from pathlib import Path +from typing import Any, Dict + import pytest -from typing import Dict, Any +from pipeline import config_loader from pipeline.config_loader import validate_config @@ -28,6 +35,163 @@ } +@pytest.mark.unit +class TestLoadConfig: + """Unit tests for load_config function.""" + + def test_load_config_with_default_path(self) -> None: + """Verify config loads from default location. + + Real-world significance: + - Pipeline must load config automatically without user intervention + - Default path should point to config/parameters.yaml + """ + config = config_loader.load_config() + + assert isinstance(config, dict) + assert len(config) > 0 + + def test_load_config_with_custom_path(self) -> None: + """Verify config loads from custom path. + + Real-world significance: + - Users may provide config from different directories (e.g., per-district) + - Must support absolute and relative paths + """ + with tempfile.TemporaryDirectory() as tmpdir: + config_path = Path(tmpdir) / "test_config.yaml" + config_path.write_text("qr:\n enabled: false\ntest_key: test_value\n") + + config = config_loader.load_config(config_path) + + assert config["test_key"] == "test_value" + + def test_load_config_with_nested_yaml(self) -> None: + """Verify nested YAML structures load correctly. + + Real-world significance: + - Config sections (qr, encryption, pipeline, etc.) are nested + - Must preserve structure for dot-notation retrieval + """ + with tempfile.TemporaryDirectory() as tmpdir: + config_path = Path(tmpdir) / "nested_config.yaml" + config_path.write_text( + """qr: + enabled: false +section1: + key1: value1 + key2: value2 +section2: + nested: + deep_key: deep_value +""" + ) + + config = config_loader.load_config(config_path) + + assert config["section1"]["key1"] == "value1" + assert config["section2"]["nested"]["deep_key"] == "deep_value" + + def test_load_config_file_not_found(self) -> None: + """Verify error when config file missing. + + Real-world significance: + - Missing config indicates setup error; must fail early with clear message + """ + missing_path = Path("/nonexistent/path/config.yaml") + + with pytest.raises(FileNotFoundError): + config_loader.load_config(missing_path) + + def test_load_config_empty_file(self) -> None: + """Verify empty YAML file with valid QR config returns dict. + + Real-world significance: + - Empty config must still provide valid QR settings (QR enabled by default) + """ + with tempfile.TemporaryDirectory() as tmpdir: + config_path = Path(tmpdir) / "empty_config.yaml" + # Even empty files need valid QR config after validation + config_path.write_text("qr:\n enabled: false\n") + + config = config_loader.load_config(config_path) + + assert config.get("qr", {}).get("enabled") is False + + def test_load_config_with_various_data_types(self) -> None: + """Verify YAML correctly loads strings, numbers, booleans, lists, nulls. + + Real-world significance: + - Config uses all YAML types (e.g., qr.enabled: true, batch_size: 100) + - Type preservation is critical for correct behavior + """ + with tempfile.TemporaryDirectory() as tmpdir: + config_path = Path(tmpdir) / "types_config.yaml" + config_path.write_text( + """qr: + enabled: false +string_val: hello +int_val: 42 +float_val: 3.14 +bool_val: true +list_val: + - item1 + - item2 +null_val: null +""" + ) + + config = config_loader.load_config(config_path) + + assert config["string_val"] == "hello" + assert config["int_val"] == 42 + assert config["float_val"] == 3.14 + assert config["bool_val"] is True + assert config["list_val"] == ["item1", "item2"] + assert config["null_val"] is None + + def test_load_config_with_invalid_yaml(self) -> None: + """Verify error on invalid YAML syntax. + + Real-world significance: + - Malformed config will cause hard-to-debug failures downstream + - Must catch and report early + """ + with tempfile.TemporaryDirectory() as tmpdir: + config_path = Path(tmpdir) / "invalid_config.yaml" + config_path.write_text("key: value\n invalid: : :") + + with pytest.raises(Exception): # yaml.YAMLError or similar + config_loader.load_config(config_path) + + +@pytest.mark.unit +class TestActualConfig: + """Unit tests using the actual parameters.yaml (if present). + + Real-world significance: + - Should verify that production config is valid and loadable + - Catches config corruption or breaking changes + """ + + def test_actual_config_loads_successfully(self) -> None: + """Verify production config loads without error.""" + config = config_loader.load_config() + + assert isinstance(config, dict) + assert len(config) > 0 + + def test_actual_config_has_core_sections(self) -> None: + """Verify config has expected top-level sections.""" + config = config_loader.load_config() + + # At least some of these should exist + has_sections = any( + key in config for key in ["pipeline", "qr", "encryption", "bundling"] + ) + assert has_sections, "Config missing core sections" + + @pytest.mark.unit class TestQRConfigValidation: """Test configuration validation for QR Code Generation.""" diff --git a/tests/unit/test_data_models.py b/tests/unit/test_data_models.py index 049f609..cf562cb 100644 --- a/tests/unit/test_data_models.py +++ b/tests/unit/test_data_models.py @@ -1,15 +1,11 @@ """Unit tests for data_models module - core pipeline data structures. Tests cover: -- ClientRecord dataclass structure and serialization -- PreprocessResult aggregation -- ArtifactPayload metadata and schema -- PdfRecord for compiled notice tracking +- ClientRecord integration with Language validation logic Real-world significance: - These immutable dataclasses enforce consistent data structure across pipeline -- Type hints and frozen dataclasses prevent bugs from data corruption -- Schema must remain stable for artifacts to be shareable between pipeline runs +- Language field validation is critical for downstream localization """ from __future__ import annotations @@ -24,95 +20,6 @@ class TestClientRecord: """Unit tests for ClientRecord dataclass.""" - def test_client_record_creation(self) -> None: - """Verify ClientRecord can be created with all required fields. - - Real-world significance: - - ClientRecord is the core data structure for each student notice - """ - client = data_models.ClientRecord( - sequence="00001", - client_id="C00001", - language="en", - person={"first_name": "Alice", "last_name": "Zephyr", "over_16": False}, - school={"name": "Tunnel Academy"}, - board={"name": "Guelph Board"}, - contact={"street": "123 Main St"}, - vaccines_due="Measles/Mumps/Rubella", - vaccines_due_list=["Measles", "Mumps", "Rubella"], - received=[], - metadata={}, - ) - - assert client.sequence == "00001" - assert client.client_id == "C00001" - assert client.language == "en" - - def test_client_record_is_frozen(self) -> None: - """Verify ClientRecord is immutable (frozen). - - Real-world significance: - - Prevents accidental modification of client data after preprocessing - - Ensures data integrity through pipeline - """ - client = data_models.ClientRecord( - sequence="00001", - client_id="C00001", - language="en", - person={}, - school={}, - board={}, - contact={}, - vaccines_due=None, - vaccines_due_list=None, - received=None, - metadata={}, - ) - - with pytest.raises(Exception): # FrozenInstanceError or AttributeError - client.sequence = "00002" # type: ignore[misc] - - def test_client_record_optional_qr_field(self) -> None: - """Verify ClientRecord has optional qr field. - - Real-world significance: - - QR code added in Step 2, may be None before then - """ - client = data_models.ClientRecord( - sequence="00001", - client_id="C00001", - language="en", - person={}, - school={}, - board={}, - contact={}, - vaccines_due=None, - vaccines_due_list=None, - received=None, - metadata={}, - qr=None, - ) - - assert client.qr is None - - client_with_qr = data_models.ClientRecord( - sequence="00001", - client_id="C00001", - language="en", - person={}, - school={}, - board={}, - contact={}, - vaccines_due=None, - vaccines_due_list=None, - received=None, - metadata={}, - qr={"payload": "test_payload", "filename": "test.png"}, - ) - - assert client_with_qr.qr is not None - assert client_with_qr.qr["payload"] == "test_payload" - def test_client_record_language_must_be_valid_enum_value(self) -> None: """Verify ClientRecord language must be a valid Language enum value. @@ -190,120 +97,3 @@ def test_client_record_invalid_language_rejected_by_enum_validation( # Verify that attempting to validate this language raises error with pytest.raises(ValueError, match="Unsupported language: es"): Language.from_string(client_invalid.language) - - -@pytest.mark.unit -class TestPreprocessResult: - """Unit tests for PreprocessResult dataclass.""" - - def test_preprocess_result_creation(self) -> None: - """Verify PreprocessResult aggregates clients and warnings. - - Real-world significance: - - Output of Step 1 (Preprocess), input to Steps 2-3 - """ - clients = [ - data_models.ClientRecord( - sequence="00001", - client_id="C00001", - language="en", - person={}, - school={}, - board={}, - contact={}, - vaccines_due=None, - vaccines_due_list=None, - received=None, - metadata={}, - ) - ] - - result = data_models.PreprocessResult( - clients=clients, - warnings=["Warning 1"], - ) - - assert len(result.clients) == 1 - assert len(result.warnings) == 1 - - def test_preprocess_result_empty_warnings(self) -> None: - """Verify PreprocessResult works with no warnings. - - Real-world significance: - - Clean input should have empty warnings list - """ - result = data_models.PreprocessResult( - clients=[], - warnings=[], - ) - - assert result.warnings == [] - - -@pytest.mark.unit -class TestArtifactPayload: - """Unit tests for ArtifactPayload dataclass.""" - - def test_artifact_payload_creation(self) -> None: - """Verify ArtifactPayload stores metadata and clients. - - Real-world significance: - - Artifacts are JSON files with client data and metadata - - Must include run_id for comparing pipeline runs - """ - clients = [] - payload = data_models.ArtifactPayload( - run_id="test_run_001", - language="en", - clients=clients, - warnings=[], - created_at="2025-01-01T12:00:00Z", - input_file="test.xlsx", - total_clients=0, - ) - - assert payload.run_id == "test_run_001" - assert payload.language == "en" - assert payload.total_clients == 0 - - def test_artifact_payload_optional_input_file(self) -> None: - """Verify ArtifactPayload has optional input_file field. - - Real-world significance: - - Not all artifacts know their source file - """ - payload_with_file = data_models.ArtifactPayload( - run_id="test_run_001", - language="en", - clients=[], - warnings=[], - created_at="2025-01-01T12:00:00Z", - input_file="input.xlsx", - ) - - assert payload_with_file.input_file == "input.xlsx" - - -@pytest.mark.unit -class TestPdfRecord: - """Unit tests for PdfRecord dataclass.""" - - def test_pdf_record_creation(self, tmp_path) -> None: - """Verify PdfRecord tracks compiled PDF metadata. - - Real-world significance: - - Used in Step 6 (Count PDFs) to verify all notices compiled - """ - pdf_path = tmp_path / "00001_C00001.pdf" - - record = data_models.PdfRecord( - sequence="00001", - client_id="C00001", - pdf_path=pdf_path, - page_count=1, - client={"first_name": "Alice"}, - ) - - assert record.sequence == "00001" - assert record.client_id == "C00001" - assert record.page_count == 1 diff --git a/tests/unit/test_dynamic_template_loading.py b/tests/unit/test_dynamic_template_loading.py deleted file mode 100644 index f27f16a..0000000 --- a/tests/unit/test_dynamic_template_loading.py +++ /dev/null @@ -1,259 +0,0 @@ -"""Unit tests for dynamic template loading functions. - -Tests cover: -- load_template_module() function -- build_language_renderers() function -- Error handling for missing templates -- Error handling for invalid modules - -Real-world significance: -- Dynamic loading enables custom template directories -- Error messages must be clear and actionable -- Validation must catch configuration errors early -""" - -from __future__ import annotations - -from pathlib import Path - -import pytest - -from pipeline import generate_notices - - -@pytest.mark.unit -class TestLoadTemplateModule: - """Unit tests for load_template_module function.""" - - @pytest.fixture - def templates_dir(self) -> Path: - """Provide path to default templates directory.""" - return Path(__file__).parent.parent.parent / "templates" - - @pytest.fixture - def custom_templates_dir(self) -> Path: - """Provide path to custom templates directory.""" - return Path(__file__).parent.parent / "fixtures" / "custom_templates" - - def test_load_template_module_success_en_from_default( - self, templates_dir: Path - ) -> None: - """Verify English template module loads from default templates. - - Real-world significance: - - Dynamic loading must work for standard templates - - Module must have render_notice function - """ - module = generate_notices.load_template_module(templates_dir, "en") - - assert hasattr(module, "render_notice") - assert callable(module.render_notice) - - def test_load_template_module_success_fr_from_default( - self, templates_dir: Path - ) -> None: - """Verify French template module loads from default templates.""" - module = generate_notices.load_template_module(templates_dir, "fr") - - assert hasattr(module, "render_notice") - assert callable(module.render_notice) - - def test_load_template_module_success_from_custom( - self, custom_templates_dir: Path - ) -> None: - """Verify template module loads from custom directory. - - Real-world significance: - - Custom templates must be loadable dynamically - - Enables PHU-specific template customization - """ - if not custom_templates_dir.exists(): - pytest.skip("Custom templates directory not set up") - - module = generate_notices.load_template_module(custom_templates_dir, "en") - - assert hasattr(module, "render_notice") - assert callable(module.render_notice) - - def test_load_template_module_missing_file(self, tmp_path: Path) -> None: - """Verify error raised when template file doesn't exist. - - Real-world significance: - - User provides wrong template directory - - Should fail with clear error message - """ - with pytest.raises(FileNotFoundError, match="Template module not found"): - generate_notices.load_template_module(tmp_path, "en") - - def test_load_template_module_missing_file_error_mentions_path( - self, tmp_path: Path - ) -> None: - """Verify error message includes expected path. - - Real-world significance: - - User can see exactly what path was searched - - Helps troubleshoot configuration issues - """ - with pytest.raises(FileNotFoundError) as exc_info: - generate_notices.load_template_module(tmp_path, "en") - - error_msg = str(exc_info.value) - assert "en_template.py" in error_msg - assert str(tmp_path) in error_msg - - def test_load_template_module_missing_render_notice(self, tmp_path: Path) -> None: - """Verify error raised when module lacks render_notice(). - - Real-world significance: - - Template file exists but is invalid - - Should fail with clear message about missing function - """ - # Create invalid template file - invalid_template = tmp_path / "en_template.py" - invalid_template.write_text("# Empty template\n", encoding="utf-8") - - with pytest.raises(AttributeError, match="must define render_notice"): - generate_notices.load_template_module(tmp_path, "en") - - def test_load_template_module_missing_render_notice_mentions_file( - self, tmp_path: Path - ) -> None: - """Verify error message mentions template file path. - - Real-world significance: - - User knows which file has the problem - - Can look at file to see what's wrong - """ - invalid_template = tmp_path / "en_template.py" - invalid_template.write_text("# Empty template\n", encoding="utf-8") - - with pytest.raises(AttributeError) as exc_info: - generate_notices.load_template_module(tmp_path, "en") - - error_msg = str(exc_info.value) - assert str(invalid_template) in error_msg - - def test_load_template_module_syntax_error_in_template( - self, tmp_path: Path - ) -> None: - """Verify error when template has syntax errors. - - Real-world significance: - - Catches Python errors in template modules - - Fail-fast prevents confusing later errors - """ - invalid_template = tmp_path / "en_template.py" - invalid_template.write_text("this is not valid python }{", encoding="utf-8") - - with pytest.raises(Exception): # SyntaxError or similar - generate_notices.load_template_module(tmp_path, "en") - - -@pytest.mark.unit -class TestBuildLanguageRenderers: - """Unit tests for build_language_renderers function.""" - - @pytest.fixture - def templates_dir(self) -> Path: - """Provide path to default templates directory.""" - return Path(__file__).parent.parent.parent / "templates" - - @pytest.fixture - def custom_templates_dir(self) -> Path: - """Provide path to custom templates directory.""" - return Path(__file__).parent.parent / "fixtures" / "custom_templates" - - def test_build_language_renderers_success_from_default( - self, templates_dir: Path - ) -> None: - """Verify all language renderers built from default templates. - - Real-world significance: - - Must load all configured languages - - Renderer dict used throughout notice generation - """ - renderers = generate_notices.build_language_renderers(templates_dir) - - # Should have renderer for each language - assert "en" in renderers - assert "fr" in renderers - assert callable(renderers["en"]) - assert callable(renderers["fr"]) - - def test_build_language_renderers_success_from_custom( - self, custom_templates_dir: Path - ) -> None: - """Verify all language renderers built from custom templates.""" - if not custom_templates_dir.exists(): - pytest.skip("Custom templates directory not set up") - - renderers = generate_notices.build_language_renderers(custom_templates_dir) - - assert "en" in renderers - assert "fr" in renderers - assert callable(renderers["en"]) - assert callable(renderers["fr"]) - - def test_build_language_renderers_missing_language_allowed( - self, tmp_path: Path - ) -> None: - """Verify that missing language template is allowed (not required). - - Real-world significance: - - PHU templates can support single language (e.g., English only) - - Missing language just means that language isn't available - - Error only occurs when that specific language is requested - """ - # Create only English template - en_template = tmp_path / "en_template.py" - en_template.write_text( - "def render_notice(context, *, logo_path, signature_path): return ''", - encoding="utf-8", - ) - - # Should NOT raise - just returns only English - renderers = generate_notices.build_language_renderers(tmp_path) - - assert "en" in renderers - assert "fr" not in renderers - assert len(renderers) == 1 - - def test_build_language_renderers_returns_dict_with_correct_types( - self, templates_dir: Path - ) -> None: - """Verify return type is dict with callable values. - - Real-world significance: - - Type checking catches misconfigurations - - Callables can be used as template renderers - """ - renderers = generate_notices.build_language_renderers(templates_dir) - - assert isinstance(renderers, dict) - for lang_code, renderer in renderers.items(): - assert isinstance(lang_code, str) - assert callable(renderer) - - def test_build_language_renderers_multiple_calls_independent( - self, templates_dir: Path - ) -> None: - """Verify multiple calls create independent renderer instances. - - Real-world significance: - - Pipeline can create renderers multiple times - - Each call loads modules fresh (separate instances) - """ - renderers1 = generate_notices.build_language_renderers(templates_dir) - renderers2 = generate_notices.build_language_renderers(templates_dir) - - # Different dicts (not same object) - assert renderers1 is not renderers2 - - # Different function objects (fresh module loads each time) - # This is expected with dynamic loading - each call creates new module instances - assert renderers1["en"] is not renderers2["en"] - assert renderers1["fr"] is not renderers2["fr"] - - # But they should behave the same way (callable with same signature) - assert callable(renderers1["en"]) - assert callable(renderers2["en"]) diff --git a/tests/unit/test_enums.py b/tests/unit/test_enums.py deleted file mode 100644 index 55e7d90..0000000 --- a/tests/unit/test_enums.py +++ /dev/null @@ -1,323 +0,0 @@ -"""Unit tests for enums module - bundle strategy, language, and template field enumerations. - -Tests cover: -- BundleStrategy enum values and string conversion -- BundleType enum values and strategy mapping -- Language enum values and string conversion -- TemplateField enum values and field availability -- Error handling for invalid values -- Case-insensitive conversion -- Default behavior for None values - -Real-world significance: -- Bundle strategy determines how PDFs are grouped (by size, school, board) -- Language code determines template renderer and localization -- Template fields define available placeholders for QR codes and PDF passwords -- Invalid values would cause pipeline crashes or incorrect behavior -""" - -from __future__ import annotations - -import pytest - -from pipeline.enums import BundleStrategy, BundleType, Language, TemplateField - - -@pytest.mark.unit -class TestBundleStrategy: - """Unit tests for BundleStrategy enumeration.""" - - def test_enum_values_correct(self) -> None: - """Verify BundleStrategy has expected enum values. - - Real-world significance: - - Defines valid bundling strategies for pipeline - """ - assert BundleStrategy.SIZE.value == "size" - assert BundleStrategy.SCHOOL.value == "school" - assert BundleStrategy.BOARD.value == "board" - - def test_from_string_valid_lowercase(self) -> None: - """Verify from_string works with lowercase input. - - Real-world significance: - - Config values are often lowercase in YAML - """ - assert BundleStrategy.from_string("size") == BundleStrategy.SIZE - assert BundleStrategy.from_string("school") == BundleStrategy.SCHOOL - assert BundleStrategy.from_string("board") == BundleStrategy.BOARD - - def test_from_string_valid_uppercase(self) -> None: - """Verify from_string is case-insensitive for uppercase. - - Real-world significance: - - Users might input "SIZE" or "BOARD" in config - """ - assert BundleStrategy.from_string("SIZE") == BundleStrategy.SIZE - assert BundleStrategy.from_string("SCHOOL") == BundleStrategy.SCHOOL - assert BundleStrategy.from_string("BOARD") == BundleStrategy.BOARD - - def test_from_string_valid_mixed_case(self) -> None: - """Verify from_string is case-insensitive for mixed case. - - Real-world significance: - - Should accept any case variation - """ - assert BundleStrategy.from_string("Size") == BundleStrategy.SIZE - assert BundleStrategy.from_string("School") == BundleStrategy.SCHOOL - assert BundleStrategy.from_string("BoArD") == BundleStrategy.BOARD - - def test_from_string_none_defaults_to_size(self) -> None: - """Verify None defaults to SIZE strategy. - - Real-world significance: - - Missing bundling config should use safe default (SIZE) - """ - assert BundleStrategy.from_string(None) == BundleStrategy.SIZE - - def test_from_string_invalid_value_raises_error(self) -> None: - """Verify ValueError for invalid strategy string. - - Real-world significance: - - User error (typo in config) must be caught and reported clearly - """ - with pytest.raises(ValueError, match="Unknown bundle strategy: invalid"): - BundleStrategy.from_string("invalid") - - def test_from_string_invalid_error_includes_valid_options(self) -> None: - """Verify error message includes list of valid options. - - Real-world significance: - - Users need to know what values are valid when they make a mistake - """ - with pytest.raises(ValueError) as exc_info: - BundleStrategy.from_string("bad") - - error_msg = str(exc_info.value) - assert "size" in error_msg - assert "school" in error_msg - assert "board" in error_msg - - -@pytest.mark.unit -class TestBundleType: - """Unit tests for BundleType enumeration.""" - - def test_enum_values_correct(self) -> None: - """Verify BundleType has expected enum values. - - Real-world significance: - - Type descriptors used for bundle metadata and reporting - """ - assert BundleType.SIZE_BASED.value == "size_based" - assert BundleType.SCHOOL_GROUPED.value == "school_grouped" - assert BundleType.BOARD_GROUPED.value == "board_grouped" - - -@pytest.mark.unit -class TestStrategyTypeIntegration: - """Integration tests between BundleStrategy and BundleType.""" - - def test_all_strategies_round_trip(self) -> None: - """Verify strategies convert to/from string consistently. - - Real-world significance: - - Required for config persistence and reproducibility - """ - for strategy in BundleStrategy: - string_value = strategy.value - reconstructed = BundleStrategy.from_string(string_value) - assert reconstructed == strategy - - -@pytest.mark.unit -class TestLanguage: - """Unit tests for Language enumeration.""" - - def test_enum_values_correct(self) -> None: - """Verify Language enum has correct values. - - Real-world significance: - - Defines supported output languages for immunization notices - """ - assert Language.ENGLISH.value == "en" - assert Language.FRENCH.value == "fr" - - def test_language_from_string_english(self) -> None: - """Verify from_string('en') returns ENGLISH. - - Real-world significance: - - CLI and config often pass language as lowercase strings - """ - assert Language.from_string("en") == Language.ENGLISH - - def test_language_from_string_french(self) -> None: - """Verify from_string('fr') returns FRENCH. - - Real-world significance: - - CLI and config often pass language as lowercase strings - """ - assert Language.from_string("fr") == Language.FRENCH - - def test_language_from_string_case_insensitive_english(self) -> None: - """Verify from_string() is case-insensitive for English. - - Real-world significance: - - Users might input 'EN', 'En', etc.; should accept any case - """ - assert Language.from_string("EN") == Language.ENGLISH - assert Language.from_string("En") == Language.ENGLISH - - def test_language_from_string_case_insensitive_french(self) -> None: - """Verify from_string() is case-insensitive for French. - - Real-world significance: - - Users might input 'FR', 'Fr', etc.; should accept any case - """ - assert Language.from_string("FR") == Language.FRENCH - assert Language.from_string("Fr") == Language.FRENCH - - def test_language_from_string_none_defaults_to_english(self) -> None: - """Verify from_string(None) defaults to ENGLISH. - - Real-world significance: - - Allows safe default language when none specified in config - """ - assert Language.from_string(None) == Language.ENGLISH - - def test_language_from_string_invalid_raises_error(self) -> None: - """Verify from_string() raises ValueError for unsupported language. - - Real-world significance: - - User error (typo in config or CLI) must be caught and reported clearly - """ - with pytest.raises(ValueError, match="Unsupported language: es"): - Language.from_string("es") - - def test_language_from_string_error_includes_valid_options(self) -> None: - """Verify error message includes list of valid language options. - - Real-world significance: - - Users need to know what language codes are valid when they make a mistake - """ - with pytest.raises(ValueError) as exc_info: - Language.from_string("xyz") - - error_msg = str(exc_info.value) - assert "Valid options:" in error_msg - assert "en" in error_msg - assert "fr" in error_msg - - def test_language_all_codes(self) -> None: - """Verify all_codes() returns set of all language codes. - - Real-world significance: - - CLI argument parser and config validation use this to determine - allowed language choices - """ - assert Language.all_codes() == {"en", "fr"} - - def test_language_all_codes_returns_set(self) -> None: - """Verify all_codes() returns a set (not list or tuple). - - Real-world significance: - - argparse.choices expects a container; set is optimal for O(1) lookups - """ - codes = Language.all_codes() - assert isinstance(codes, set) - assert len(codes) == 2 - - def test_language_from_string_round_trip(self) -> None: - """Verify languages convert to/from string consistently. - - Real-world significance: - - Required for config persistence and reproducibility - """ - for lang in Language: - string_value = lang.value - reconstructed = Language.from_string(string_value) - assert reconstructed == lang - - -@pytest.mark.unit -class TestTemplateField: - """Unit tests for TemplateField enumeration.""" - - def test_enum_values_correct(self) -> None: - """Verify TemplateField has expected enum values. - - Real-world significance: - - Defines available placeholders for template rendering in QR codes - and PDF password generation - """ - assert TemplateField.CLIENT_ID.value == "client_id" - assert TemplateField.FIRST_NAME.value == "first_name" - assert TemplateField.LAST_NAME.value == "last_name" - assert TemplateField.NAME.value == "name" - assert TemplateField.DATE_OF_BIRTH.value == "date_of_birth" - assert TemplateField.DATE_OF_BIRTH_ISO.value == "date_of_birth_iso" - assert ( - TemplateField.DATE_OF_BIRTH_ISO_COMPACT.value == "date_of_birth_iso_compact" - ) - assert TemplateField.SCHOOL.value == "school" - assert TemplateField.BOARD.value == "board" - assert TemplateField.STREET_ADDRESS.value == "street_address" - assert TemplateField.CITY.value == "city" - assert TemplateField.PROVINCE.value == "province" - assert TemplateField.POSTAL_CODE.value == "postal_code" - assert TemplateField.LANGUAGE_CODE.value == "language_code" - - def test_template_field_enum_has_all_fields(self) -> None: - """Verify TemplateField enum contains all expected fields. - - Real-world significance: - - Ensures all client context fields are available for templating - - Any missing field would cause template validation errors - """ - expected = { - "client_id", - "first_name", - "last_name", - "name", - "date_of_birth", - "date_of_birth_iso", - "date_of_birth_iso_compact", - "school", - "board", - "street_address", - "city", - "province", - "postal_code", - "language_code", - } - assert TemplateField.all_values() == expected - - def test_template_field_all_values_returns_set(self) -> None: - """Verify all_values() returns a set for use with set operations. - - Real-world significance: - - Set operations needed for validation (set difference to find disallowed fields) - """ - values = TemplateField.all_values() - assert isinstance(values, set) - assert len(values) == 14 - - def test_template_field_count_matches_enum(self) -> None: - """Verify number of fields matches enum member count. - - Real-world significance: - - Prevents accidental field additions being missed in all_values() - """ - enum_members = [f for f in TemplateField] - all_values = TemplateField.all_values() - assert len(enum_members) == len(all_values) - - def test_template_field_includes_board(self) -> None: - """Verify TemplateField includes 'board' field (was missing from old QR whitelist). - - Real-world significance: - - board field is generated by build_client_context() but was not - included in SUPPORTED_QR_TEMPLATE_FIELDS, causing inconsistency - """ - assert "board" in TemplateField.all_values() - assert TemplateField.BOARD.value == "board" diff --git a/tests/unit/test_generate_notices.py b/tests/unit/test_generate_notices.py index c29d4c4..ba8f2d4 100644 --- a/tests/unit/test_generate_notices.py +++ b/tests/unit/test_generate_notices.py @@ -17,6 +17,7 @@ from __future__ import annotations import json +from dataclasses import replace from pathlib import Path import pytest @@ -383,7 +384,6 @@ def test_build_template_context_includes_qr_url_when_present(self) -> None: # Create client with qr.payload already set (as would come from artifact) client_base = sample_input.create_test_client_record() # Use dataclass replace to create new instance with qr dict - from dataclasses import replace client = replace( client_base, @@ -421,6 +421,244 @@ def test_build_template_context_omits_qr_url_when_absent(self) -> None: assert "qr_url:" not in client_data_str +@pytest.mark.unit +class TestLoadTemplateModule: + """Unit tests for load_template_module function.""" + + @pytest.fixture + def templates_dir(self) -> Path: + """Provide path to default templates directory.""" + return Path(__file__).parent.parent.parent / "templates" + + @pytest.fixture + def custom_templates_dir(self) -> Path: + """Provide path to custom templates directory.""" + return Path(__file__).parent.parent / "fixtures" / "custom_templates" + + def test_load_template_module_success_en_from_default( + self, templates_dir: Path + ) -> None: + """Verify English template module loads from default templates. + + Real-world significance: + - Dynamic loading must work for standard templates + - Module must have render_notice function + """ + module = generate_notices.load_template_module(templates_dir, "en") + + assert hasattr(module, "render_notice") + assert callable(module.render_notice) + + def test_load_template_module_success_fr_from_default( + self, templates_dir: Path + ) -> None: + """Verify French template module loads from default templates.""" + module = generate_notices.load_template_module(templates_dir, "fr") + + assert hasattr(module, "render_notice") + assert callable(module.render_notice) + + def test_load_template_module_success_from_custom( + self, custom_templates_dir: Path + ) -> None: + """Verify template module loads from custom directory. + + Real-world significance: + - Custom templates must be loadable dynamically + - Enables PHU-specific template customization + """ + if not custom_templates_dir.exists(): + pytest.skip("Custom templates directory not set up") + + module = generate_notices.load_template_module(custom_templates_dir, "en") + + assert hasattr(module, "render_notice") + assert callable(module.render_notice) + + def test_load_template_module_missing_file(self, tmp_path: Path) -> None: + """Verify error raised when template file doesn't exist. + + Real-world significance: + - User provides wrong template directory + - Should fail with clear error message + """ + with pytest.raises(FileNotFoundError, match="Template module not found"): + generate_notices.load_template_module(tmp_path, "en") + + def test_load_template_module_missing_file_error_mentions_path( + self, tmp_path: Path + ) -> None: + """Verify error message includes expected path. + + Real-world significance: + - User can see exactly what path was searched + - Helps troubleshoot configuration issues + """ + with pytest.raises(FileNotFoundError) as exc_info: + generate_notices.load_template_module(tmp_path, "en") + + error_msg = str(exc_info.value) + assert "en_template.py" in error_msg + assert str(tmp_path) in error_msg + + def test_load_template_module_missing_render_notice(self, tmp_path: Path) -> None: + """Verify error raised when module lacks render_notice(). + + Real-world significance: + - Template file exists but is invalid + - Should fail with clear message about missing function + """ + # Create invalid template file + invalid_template = tmp_path / "en_template.py" + invalid_template.write_text("# Empty template\n", encoding="utf-8") + + with pytest.raises(AttributeError, match="must define render_notice"): + generate_notices.load_template_module(tmp_path, "en") + + def test_load_template_module_missing_render_notice_mentions_file( + self, tmp_path: Path + ) -> None: + """Verify error message mentions template file path. + + Real-world significance: + - User knows which file has the problem + - Can look at file to see what's wrong + """ + invalid_template = tmp_path / "en_template.py" + invalid_template.write_text("# Empty template\n", encoding="utf-8") + + with pytest.raises(AttributeError) as exc_info: + generate_notices.load_template_module(tmp_path, "en") + + error_msg = str(exc_info.value) + assert str(invalid_template) in error_msg + + def test_load_template_module_syntax_error_in_template( + self, tmp_path: Path + ) -> None: + """Verify error when template has syntax errors. + + Real-world significance: + - Catches Python errors in template modules + - Fail-fast prevents confusing later errors + """ + invalid_template = tmp_path / "en_template.py" + invalid_template.write_text("this is not valid python }{", encoding="utf-8") + + with pytest.raises(Exception): # SyntaxError or similar + generate_notices.load_template_module(tmp_path, "en") + + +@pytest.mark.unit +class TestBuildLanguageRenderers: + """Unit tests for build_language_renderers function.""" + + @pytest.fixture + def templates_dir(self) -> Path: + """Provide path to default templates directory.""" + return Path(__file__).parent.parent.parent / "templates" + + @pytest.fixture + def custom_templates_dir(self) -> Path: + """Provide path to custom templates directory.""" + return Path(__file__).parent.parent / "fixtures" / "custom_templates" + + def test_build_language_renderers_success_from_default( + self, templates_dir: Path + ) -> None: + """Verify all language renderers built from default templates. + + Real-world significance: + - Must load all configured languages + - Renderer dict used throughout notice generation + """ + renderers = generate_notices.build_language_renderers(templates_dir) + + # Should have renderer for each language + assert "en" in renderers + assert "fr" in renderers + assert callable(renderers["en"]) + assert callable(renderers["fr"]) + + def test_build_language_renderers_success_from_custom( + self, custom_templates_dir: Path + ) -> None: + """Verify all language renderers built from custom templates.""" + if not custom_templates_dir.exists(): + pytest.skip("Custom templates directory not set up") + + renderers = generate_notices.build_language_renderers(custom_templates_dir) + + assert "en" in renderers + assert "fr" in renderers + assert callable(renderers["en"]) + assert callable(renderers["fr"]) + + def test_build_language_renderers_missing_language_allowed( + self, tmp_path: Path + ) -> None: + """Verify that missing language template is allowed (not required). + + Real-world significance: + - PHU templates can support single language (e.g., English only) + - Missing language just means that language isn't available + - Error only occurs when that specific language is requested + """ + # Create only English template + en_template = tmp_path / "en_template.py" + en_template.write_text( + "def render_notice(context, *, logo_path, signature_path): return ''", + encoding="utf-8", + ) + + # Should NOT raise - just returns only English + renderers = generate_notices.build_language_renderers(tmp_path) + + assert "en" in renderers + assert "fr" not in renderers + assert len(renderers) == 1 + + def test_build_language_renderers_returns_dict_with_correct_types( + self, templates_dir: Path + ) -> None: + """Verify return type is dict with callable values. + + Real-world significance: + - Type checking catches misconfigurations + - Callables can be used as template renderers + """ + renderers = generate_notices.build_language_renderers(templates_dir) + + assert isinstance(renderers, dict) + for lang_code, renderer in renderers.items(): + assert isinstance(lang_code, str) + assert callable(renderer) + + def test_build_language_renderers_multiple_calls_independent( + self, templates_dir: Path + ) -> None: + """Verify multiple calls create independent renderer instances. + + Real-world significance: + - Pipeline can create renderers multiple times + - Each call loads modules fresh (separate instances) + """ + renderers1 = generate_notices.build_language_renderers(templates_dir) + renderers2 = generate_notices.build_language_renderers(templates_dir) + + # Different dicts (not same object) + assert renderers1 is not renderers2 + + # Different function objects (fresh module loads each time) + # This is expected with dynamic loading - each call creates new module instances + assert renderers1["en"] is not renderers2["en"] + assert renderers1["fr"] is not renderers2["fr"] + + # But they should behave the same way (callable with same signature) + assert callable(renderers1["en"]) + assert callable(renderers2["en"]) + + @pytest.mark.unit class TestLanguageSupport: """Unit tests for language-specific functionality.""" @@ -433,8 +671,6 @@ def test_language_renderers_configured(self) -> None: - Both language renderers must be present """ # Build renderers from default template directory - from pathlib import Path - templates_dir = Path(__file__).parent.parent.parent / "templates" renderers = generate_notices.build_language_renderers(templates_dir) @@ -455,8 +691,6 @@ def test_render_notice_english_client(self, tmp_test_dir: Path) -> None: - Must render without errors """ # Build renderers from default template directory - from pathlib import Path - templates_dir = Path(__file__).parent.parent.parent / "templates" renderers = generate_notices.build_language_renderers(templates_dir) @@ -475,8 +709,6 @@ def test_render_notice_french_client(self, tmp_test_dir: Path) -> None: - Must render without errors for fr language code """ # Build renderers from default template directory - from pathlib import Path - templates_dir = Path(__file__).parent.parent.parent / "templates" renderers = generate_notices.build_language_renderers(templates_dir) diff --git a/tests/unit/test_preprocess.py b/tests/unit/test_preprocess.py index 471ff23..7a3316d 100644 --- a/tests/unit/test_preprocess.py +++ b/tests/unit/test_preprocess.py @@ -112,6 +112,7 @@ def test_handles_non_alphabetic_characters(self): """Verify that non-letter characters are preserved.""" assert preprocess.normalize("123 Name!") == "123 name!" + @pytest.mark.unit class TestFilterColumns: """Unit tests for filter_columns() column filtering utility.""" @@ -164,7 +165,10 @@ def test_order_of_columns_is_preserved(self): required = ["dob", "child_first_name"] result = preprocess.filter_columns(df, required) - assert list(result.columns) == ["child_first_name", "dob"] or list(result.columns) == required + assert ( + list(result.columns) == ["child_first_name", "dob"] + or list(result.columns) == required + ) # Either column order can appear depending on implementation; both are acceptable def test_ignores_required_columns_not_in_df(self): @@ -176,6 +180,7 @@ def test_ignores_required_columns_not_in_df(self): assert "child_first_name" in result.columns assert "missing_column" not in result.columns + @pytest.mark.unit class TestReadInput: """Unit tests for read_input function.""" @@ -785,5 +790,21 @@ def test_build_result_no_warning_for_unique_client_ids( # Should have NO warnings about duplicates duplicate_warnings = [w for w in result.warnings if "Duplicate client ID" in w] assert len(duplicate_warnings) == 0 - + +@pytest.mark.unit +class TestVaccineProcessingDue: + """Unit tests for process_vaccines_due function.""" + + def test_process_vaccines_due_normalization(self) -> None: + """Verify process_vaccines_due normalizes and formats disease names.""" + from pipeline import translation_helpers + + translation_helpers.clear_caches() + + # Test with variant input - should normalize correctly + result = preprocess.process_vaccines_due("Poliomyelitis, Measles", "en") + + # Should normalize Poliomyelitis to Polio (canonical form) + assert "Polio" in result + assert "Measles" in result diff --git a/tests/unit/test_run_pipeline.py b/tests/unit/test_run_pipeline.py index dff22a8..6c6ed3e 100644 --- a/tests/unit/test_run_pipeline.py +++ b/tests/unit/test_run_pipeline.py @@ -24,7 +24,8 @@ import pytest -from pipeline import orchestrator +from pipeline import generate_notices, orchestrator +from pipeline.enums import Language @pytest.mark.unit @@ -208,7 +209,9 @@ def test_run_step_1_prepare_output_user_cancels( ) assert result is False - def test_run_step_2_preprocess(self, tmp_test_dir: Path, tmp_output_structure: dict) -> None: + def test_run_step_2_preprocess( + self, tmp_test_dir: Path, tmp_output_structure: dict + ) -> None: """Verify Step 2: preprocess returns client count.""" mock_df = MagicMock() mock_mapped_df = MagicMock() @@ -220,16 +223,35 @@ def test_run_step_2_preprocess(self, tmp_test_dir: Path, tmp_output_structure: d mock_result.clients = [client1, client2] mock_result.warnings = [] - with patch("pipeline.orchestrator.preprocess.read_input", return_value=mock_df), \ - patch("pipeline.orchestrator.preprocess.map_columns", return_value=(mock_mapped_df, {})), \ - patch("pipeline.orchestrator.preprocess.filter_columns", return_value=mock_filtered_df), \ - patch("pipeline.orchestrator.preprocess.ensure_required_columns", return_value=mock_final_df), \ - patch("pipeline.orchestrator.preprocess.build_preprocess_result", return_value=mock_result), \ - patch("pipeline.orchestrator.preprocess.configure_logging", return_value=tmp_test_dir / "log.txt"), \ - patch("pipeline.orchestrator.preprocess.write_artifact", return_value="artifact.json"), \ - patch("pipeline.orchestrator.json.loads", return_value={"vaccine": "MMR"}), \ - patch("builtins.print"): - + with ( + patch("pipeline.orchestrator.preprocess.read_input", return_value=mock_df), + patch( + "pipeline.orchestrator.preprocess.map_columns", + return_value=(mock_mapped_df, {}), + ), + patch( + "pipeline.orchestrator.preprocess.filter_columns", + return_value=mock_filtered_df, + ), + patch( + "pipeline.orchestrator.preprocess.ensure_required_columns", + return_value=mock_final_df, + ), + patch( + "pipeline.orchestrator.preprocess.build_preprocess_result", + return_value=mock_result, + ), + patch( + "pipeline.orchestrator.preprocess.configure_logging", + return_value=tmp_test_dir / "log.txt", + ), + patch( + "pipeline.orchestrator.preprocess.write_artifact", + return_value="artifact.json", + ), + patch("pipeline.orchestrator.json.loads", return_value={"vaccine": "MMR"}), + patch("builtins.print"), + ): total = orchestrator.run_step_2_preprocess( input_dir=tmp_test_dir, input_file="test.xlsx", @@ -364,3 +386,245 @@ def test_pipeline_catches_compilation_errors(self) -> None: """ # Error handling tested at integration level assert True # Placeholder + + +@pytest.mark.unit +class TestUnsupportedLanguageDetection: + """Tests for early detection of unsupported language codes.""" + + def test_language_enum_validation_catches_invalid_code(self) -> None: + """Verify Language.from_string() catches invalid codes immediately. + + FAILURE POINT #1: Enum Validation + - Earliest point in the pipeline where language codes are validated + - Used by CLI, configuration loading, and preprocessing + - Provides clear error message listing valid options + + Real-world significance: + - Prevents silent failures downstream + - Users see immediately what languages are supported + - Clear error message guides users to fix their input + """ + # Invalid language code + with pytest.raises(ValueError) as exc_info: + Language.from_string("es") + + error_msg = str(exc_info.value) + assert "Unsupported language: es" in error_msg + assert "Valid options:" in error_msg + assert "en" in error_msg + assert "fr" in error_msg + + def test_language_enum_validation_error_message_format(self) -> None: + """Verify error message is informative and actionable. + + Real-world significance: + - Users can immediately see what went wrong + - Error message lists all valid options + - Helps administrators troubleshoot configuration issues + """ + invalid_codes = ["es", "pt", "de", "xyz", "invalid"] + + for invalid_code in invalid_codes: + with pytest.raises(ValueError) as exc_info: + Language.from_string(invalid_code) + + error_msg = str(exc_info.value) + # Error should be specific about which code is invalid + assert f"Unsupported language: {invalid_code}" in error_msg + # Error should list all valid options + assert "Valid options:" in error_msg + + def test_language_enum_validation_case_insensitive_accepts_mixed_case( + self, + ) -> None: + """Verify case-insensitive handling prevents user errors. + + Real-world significance: + - Users won't face errors for minor case variations + - "EN", "En", "eN" all work correctly + """ + # All case variations should work + assert Language.from_string("EN") == Language.ENGLISH + assert Language.from_string("En") == Language.ENGLISH + assert Language.from_string("FR") == Language.FRENCH + assert Language.from_string("Fr") == Language.FRENCH + + def test_language_from_string_none_defaults_to_english(self) -> None: + """Verify None defaults to English (safe default). + + Real-world significance: + - Prevents KeyError if language is somehow omitted + - Provides reasonable default behavior + """ + assert Language.from_string(None) == Language.ENGLISH + + def test_template_renderer_dispatch_assumes_valid_language(self) -> None: + """Verify get_language_renderer() assumes language is already validated. + + CHANGE RATIONALE (Task 4 - Remove Redundant Validation): + - Language validation happens at THREE upstream points: + 1. CLI: argparse choices (before pipeline runs) + 2. Enum: Language.from_string() validates at multiple usage points + 3. Type system: Type hints enforce Language enum + - get_language_renderer() can safely assume valid input (no defensive check needed) + - Removing redundant check simplifies code and improves performance + + Real-world significance: + - Code is clearer: no misleading defensive checks + - No false sense of protection; real validation is upstream + - If invalid language somehow reaches this point, KeyError is appropriate + (indicates upstream validation failure, not a data issue) + + Validation Contract: + - Input: Language enum (already validated upstream) + - Output: Callable template renderer + - No error handling needed (error indicates upstream validation failed) + """ + # Build renderers from default template directory + from pathlib import Path + + templates_dir = Path(__file__).parent.parent.parent / "templates" + renderers = generate_notices.build_language_renderers(templates_dir) + + # Verify renderer dispatch works for valid languages + en = Language.from_string("en") + en_renderer = generate_notices.get_language_renderer(en, renderers) + assert callable(en_renderer) + + fr = Language.from_string("fr") + fr_renderer = generate_notices.get_language_renderer(fr, renderers) + assert callable(fr_renderer) + + def test_valid_languages_pass_all_checks(self) -> None: + """Verify valid languages pass all validation checks. + + Real-world significance: + - Confirms that supported languages work end-to-end + - Positive test case for all failure points + """ + # Build renderers from default template directory + from pathlib import Path + + templates_dir = Path(__file__).parent.parent.parent / "templates" + renderers = generate_notices.build_language_renderers(templates_dir) + + # English + en_lang = Language.from_string("en") + assert en_lang == Language.ENGLISH + en_renderer = generate_notices.get_language_renderer(en_lang, renderers) + assert callable(en_renderer) + + # French + fr_lang = Language.from_string("fr") + assert fr_lang == Language.FRENCH + fr_renderer = generate_notices.get_language_renderer(fr_lang, renderers) + assert callable(fr_renderer) + + def test_language_all_codes_returns_supported_languages(self) -> None: + """Verify Language.all_codes() returns set of all supported languages. + + Real-world significance: + - Used by CLI for dynamic argument validation + - Ensures CLI choices update automatically when languages are added + """ + codes = Language.all_codes() + assert isinstance(codes, set) + assert "en" in codes + assert "fr" in codes + assert len(codes) == 2 + + +@pytest.mark.unit +class TestLanguageFailurePathDocumentation: + """Document the exact failure points and error messages for unsupported languages.""" + + def test_failure_path_unsupported_language_documentation(self) -> None: + """Document where unsupported languages fail in the pipeline. + + This test serves as documentation of the failure detection strategy. + + FAILURE POINT SEQUENCE: + ======================= + + 1. **CLI Entry Point (FIRST DEFENSE - ARGPARSE)** + Location: pipeline/orchestrator.py, parse_args() + Trigger: User runs `viper input.xlsx es` + Error Message: "argument language: invalid choice: 'es' (choose from en, fr)" + Resolution: User sees valid choices immediately + + 2. **Enum Validation (PRIMARY VALIDATION)** + Location: pipeline/enums.py, Language.from_string() + Trigger: Any code path tries Language.from_string("es") + Error Message: "ValueError: Unsupported language: es. Valid options: en, fr" + Used By: + - Preprocessing: convert_date_string(), line ~178-201 + - Preprocessing: build_result(), line ~675 + - Generate notices: render_notice(), line ~249 + - Testing: Language validation tests + + 3. **Template Dispatcher (NO DEFENSIVE CHECK - Task 4 OPTIMIZATION)** + Location: pipeline/generate_notices.py, get_language_renderer() + Status: REMOVED redundant validation check in Task 4 + Rationale: Language is guaranteed valid by CLI validation + Language.from_string() + Performance: Eliminates unnecessary dict lookup validation + Safety: Type system and upstream validation provide sufficient protection + + 4. **Rendering Failure (SHOULD NOT REACH)** + Location: pipeline/generate_notices.py, render_notice() + Would Occur: If invalid language somehow bypassed both checks + Error Type: Would be KeyError from _LANGUAGE_RENDERERS[language.value] + Prevention: Checks 1-2 ensure this never happens + + RESULT: **IMMEDIATE FAILURE WITH CLEAR ERROR MESSAGE** + - User sees error at CLI before pipeline starts + - If CLI validation bypassed, fails in enum validation with clear message + - All failure points provide actionable error messages listing valid options + - **ZERO RISK** of silent failures or cryptic KeyError + + ADDING A NEW LANGUAGE: + ===================== + If a new language needs to be added (e.g., Spanish): + + 1. Add to enum: + class Language(Enum): + ENGLISH = "en" + FRENCH = "fr" + SPANISH = "es" # Add here + + 2. CLI automatically updated (uses Language.all_codes()) + + 3. Enum validation automatically updated (iterates Language members) + + 4. Create template: templates/es_template.py with render_notice() + + 5. Register renderer: + _LANGUAGE_RENDERERS = { + Language.ENGLISH.value: render_notice_en, + Language.FRENCH.value: render_notice_fr, + Language.SPANISH.value: render_notice_es, # Add here + } + + 6. Add Spanish vaccine/disease mappings to config files + + 7. Tests automatically include new language (generic test patterns) + + Result: **THREE-LINE CHANGE** in code + config updates + """ + # Build renderers from default template directory + from pathlib import Path + + templates_dir = Path(__file__).parent.parent.parent / "templates" + renderers = generate_notices.build_language_renderers(templates_dir) + + # This test is primarily documentation; verify current state + assert Language.all_codes() == {"en", "fr"} + + # Verify enum validation works as documented + with pytest.raises(ValueError, match="Unsupported language: es"): + Language.from_string("es") + + # Verify renderer dispatch works as documented + en = Language.from_string("en") + en_renderer = generate_notices.get_language_renderer(en, renderers) + assert callable(en_renderer) diff --git a/tests/unit/test_translation_helpers.py b/tests/unit/test_translation_helpers.py index 05dcf42..f3b0c2f 100644 --- a/tests/unit/test_translation_helpers.py +++ b/tests/unit/test_translation_helpers.py @@ -327,6 +327,38 @@ def test_all_canonical_diseases_have_french_labels(self) -> None: # These should have accented French versions pass + def test_normalize_then_translate_polio_english(self) -> None: + """Verify Poliomyelitis -> Polio -> Polio (English).""" + translation_helpers.clear_caches() + normalized = translation_helpers.normalize_disease("Poliomyelitis") + assert normalized == "Polio" + + translated = translation_helpers.display_label( + "diseases_overdue", normalized, "en" + ) + assert translated == "Polio" + + def test_normalize_then_translate_polio_french(self) -> None: + """Verify Poliomyelitis -> Polio -> Poliomyélite (French).""" + translation_helpers.clear_caches() + normalized = translation_helpers.normalize_disease("Poliomyelitis") + assert normalized == "Polio" + + translated = translation_helpers.display_label( + "diseases_overdue", normalized, "fr" + ) + assert translated == "Poliomyélite" + + def test_multiple_languages_independent(self) -> None: + """Verify translations for different languages are independent.""" + translation_helpers.clear_caches() + en_polio = translation_helpers.display_label("diseases_overdue", "Polio", "en") + fr_polio = translation_helpers.display_label("diseases_overdue", "Polio", "fr") + + assert en_polio != fr_polio + assert en_polio == "Polio" + assert fr_polio == "Poliomyélite" + @pytest.fixture def tmp_test_dir(tmp_path: Path) -> Path: diff --git a/tests/unit/test_unsupported_language_failure_paths.py b/tests/unit/test_unsupported_language_failure_paths.py deleted file mode 100644 index ea8a947..0000000 --- a/tests/unit/test_unsupported_language_failure_paths.py +++ /dev/null @@ -1,269 +0,0 @@ -"""Unit tests for unsupported language failure detection and error messages. - -This module tests the failure paths when unsupported languages are used, ensuring -early, informative error detection throughout the pipeline. - -Real-world significance: -- Unsupported languages should be caught immediately at entry points -- Error messages must be clear and actionable -- No silent failures or cryptic KeyErrors -- Pipeline should fail fast with helpful guidance - -Failure Point Analysis: -1. **CLI Entry Point (FIRST DEFENSE)**: argparse validates against Language.all_codes() -2. **Enum Validation (PRIMARY DEFENSE)**: Language.from_string() provides detailed error messages -3. **Template Dispatcher (NO DEFENSIVE CHECK)**: get_language_renderer() assumes valid input - - Removed in Task 4 (redundant validation) - - Language is guaranteed valid by checks 1-2 - - No performance penalty from unnecessary checks -4. **Preprocessing**: Language enum validation in date conversion and vaccine mapping -""" - -from __future__ import annotations - -import pytest - -from pipeline.enums import Language -from pipeline import generate_notices - - -@pytest.mark.unit -class TestUnsupportedLanguageDetection: - """Tests for early detection of unsupported language codes.""" - - def test_language_enum_validation_catches_invalid_code(self) -> None: - """Verify Language.from_string() catches invalid codes immediately. - - FAILURE POINT #1: Enum Validation - - Earliest point in the pipeline where language codes are validated - - Used by CLI, configuration loading, and preprocessing - - Provides clear error message listing valid options - - Real-world significance: - - Prevents silent failures downstream - - Users see immediately what languages are supported - - Clear error message guides users to fix their input - """ - # Invalid language code - with pytest.raises(ValueError) as exc_info: - Language.from_string("es") - - error_msg = str(exc_info.value) - assert "Unsupported language: es" in error_msg - assert "Valid options:" in error_msg - assert "en" in error_msg - assert "fr" in error_msg - - def test_language_enum_validation_error_message_format(self) -> None: - """Verify error message is informative and actionable. - - Real-world significance: - - Users can immediately see what went wrong - - Error message lists all valid options - - Helps administrators troubleshoot configuration issues - """ - invalid_codes = ["es", "pt", "de", "xyz", "invalid"] - - for invalid_code in invalid_codes: - with pytest.raises(ValueError) as exc_info: - Language.from_string(invalid_code) - - error_msg = str(exc_info.value) - # Error should be specific about which code is invalid - assert f"Unsupported language: {invalid_code}" in error_msg - # Error should list all valid options - assert "Valid options:" in error_msg - - def test_language_enum_validation_case_insensitive_accepts_mixed_case( - self, - ) -> None: - """Verify case-insensitive handling prevents user errors. - - Real-world significance: - - Users won't face errors for minor case variations - - "EN", "En", "eN" all work correctly - """ - # All case variations should work - assert Language.from_string("EN") == Language.ENGLISH - assert Language.from_string("En") == Language.ENGLISH - assert Language.from_string("FR") == Language.FRENCH - assert Language.from_string("Fr") == Language.FRENCH - - def test_language_from_string_none_defaults_to_english(self) -> None: - """Verify None defaults to English (safe default). - - Real-world significance: - - Prevents KeyError if language is somehow omitted - - Provides reasonable default behavior - """ - assert Language.from_string(None) == Language.ENGLISH - - def test_template_renderer_dispatch_assumes_valid_language(self) -> None: - """Verify get_language_renderer() assumes language is already validated. - - CHANGE RATIONALE (Task 4 - Remove Redundant Validation): - - Language validation happens at THREE upstream points: - 1. CLI: argparse choices (before pipeline runs) - 2. Enum: Language.from_string() validates at multiple usage points - 3. Type system: Type hints enforce Language enum - - get_language_renderer() can safely assume valid input (no defensive check needed) - - Removing redundant check simplifies code and improves performance - - Real-world significance: - - Code is clearer: no misleading defensive checks - - No false sense of protection; real validation is upstream - - If invalid language somehow reaches this point, KeyError is appropriate - (indicates upstream validation failure, not a data issue) - - Validation Contract: - - Input: Language enum (already validated upstream) - - Output: Callable template renderer - - No error handling needed (error indicates upstream validation failed) - """ - # Build renderers from default template directory - from pathlib import Path - - templates_dir = Path(__file__).parent.parent.parent / "templates" - renderers = generate_notices.build_language_renderers(templates_dir) - - # Verify renderer dispatch works for valid languages - en = Language.from_string("en") - en_renderer = generate_notices.get_language_renderer(en, renderers) - assert callable(en_renderer) - - fr = Language.from_string("fr") - fr_renderer = generate_notices.get_language_renderer(fr, renderers) - assert callable(fr_renderer) - - def test_valid_languages_pass_all_checks(self) -> None: - """Verify valid languages pass all validation checks. - - Real-world significance: - - Confirms that supported languages work end-to-end - - Positive test case for all failure points - """ - # Build renderers from default template directory - from pathlib import Path - - templates_dir = Path(__file__).parent.parent.parent / "templates" - renderers = generate_notices.build_language_renderers(templates_dir) - - # English - en_lang = Language.from_string("en") - assert en_lang == Language.ENGLISH - en_renderer = generate_notices.get_language_renderer(en_lang, renderers) - assert callable(en_renderer) - - # French - fr_lang = Language.from_string("fr") - assert fr_lang == Language.FRENCH - fr_renderer = generate_notices.get_language_renderer(fr_lang, renderers) - assert callable(fr_renderer) - - def test_language_all_codes_returns_supported_languages(self) -> None: - """Verify Language.all_codes() returns set of all supported languages. - - Real-world significance: - - Used by CLI for dynamic argument validation - - Ensures CLI choices update automatically when languages are added - """ - codes = Language.all_codes() - assert isinstance(codes, set) - assert "en" in codes - assert "fr" in codes - assert len(codes) == 2 - - -@pytest.mark.unit -class TestLanguageFailurePathDocumentation: - """Document the exact failure points and error messages for unsupported languages.""" - - def test_failure_path_unsupported_language_documentation(self) -> None: - """Document where unsupported languages fail in the pipeline. - - This test serves as documentation of the failure detection strategy. - - FAILURE POINT SEQUENCE: - ======================= - - 1. **CLI Entry Point (FIRST DEFENSE - ARGPARSE)** - Location: pipeline/orchestrator.py, parse_args() - Trigger: User runs `viper input.xlsx es` - Error Message: "argument language: invalid choice: 'es' (choose from en, fr)" - Resolution: User sees valid choices immediately - - 2. **Enum Validation (PRIMARY VALIDATION)** - Location: pipeline/enums.py, Language.from_string() - Trigger: Any code path tries Language.from_string("es") - Error Message: "ValueError: Unsupported language: es. Valid options: en, fr" - Used By: - - Preprocessing: convert_date_string(), line ~178-201 - - Preprocessing: build_result(), line ~675 - - Generate notices: render_notice(), line ~249 - - Testing: Language validation tests - - 3. **Template Dispatcher (NO DEFENSIVE CHECK - Task 4 OPTIMIZATION)** - Location: pipeline/generate_notices.py, get_language_renderer() - Status: REMOVED redundant validation check in Task 4 - Rationale: Language is guaranteed valid by CLI validation + Language.from_string() - Performance: Eliminates unnecessary dict lookup validation - Safety: Type system and upstream validation provide sufficient protection - - 4. **Rendering Failure (SHOULD NOT REACH)** - Location: pipeline/generate_notices.py, render_notice() - Would Occur: If invalid language somehow bypassed both checks - Error Type: Would be KeyError from _LANGUAGE_RENDERERS[language.value] - Prevention: Checks 1-2 ensure this never happens - - RESULT: **IMMEDIATE FAILURE WITH CLEAR ERROR MESSAGE** - - User sees error at CLI before pipeline starts - - If CLI validation bypassed, fails in enum validation with clear message - - All failure points provide actionable error messages listing valid options - - **ZERO RISK** of silent failures or cryptic KeyError - - ADDING A NEW LANGUAGE: - ===================== - If a new language needs to be added (e.g., Spanish): - - 1. Add to enum: - class Language(Enum): - ENGLISH = "en" - FRENCH = "fr" - SPANISH = "es" # Add here - - 2. CLI automatically updated (uses Language.all_codes()) - - 3. Enum validation automatically updated (iterates Language members) - - 4. Create template: templates/es_template.py with render_notice() - - 5. Register renderer: - _LANGUAGE_RENDERERS = { - Language.ENGLISH.value: render_notice_en, - Language.FRENCH.value: render_notice_fr, - Language.SPANISH.value: render_notice_es, # Add here - } - - 6. Add Spanish vaccine/disease mappings to config files - - 7. Tests automatically include new language (generic test patterns) - - Result: **THREE-LINE CHANGE** in code + config updates - """ - # Build renderers from default template directory - from pathlib import Path - - templates_dir = Path(__file__).parent.parent.parent / "templates" - renderers = generate_notices.build_language_renderers(templates_dir) - - # This test is primarily documentation; verify current state - assert Language.all_codes() == {"en", "fr"} - - # Verify enum validation works as documented - with pytest.raises(ValueError, match="Unsupported language: es"): - Language.from_string("es") - - # Verify renderer dispatch works as documented - en = Language.from_string("en") - en_renderer = generate_notices.get_language_renderer(en, renderers) - assert callable(en_renderer) From f50ebf205add492164a0db3e55ff9851a9d59085 Mon Sep 17 00:00:00 2001 From: Justin Angevaare Date: Tue, 20 Jan 2026 21:39:08 +0000 Subject: [PATCH 4/8] Rename test modules --- docs/TESTING_STANDARDS.md | 4 ++-- plans/002-strategic-testing-refinement.md | 21 +++++++++---------- ...ig_validation.py => test_config_loader.py} | 0 ...t_run_pipeline.py => test_orchestrator.py} | 0 4 files changed, 12 insertions(+), 13 deletions(-) rename tests/unit/{test_config_validation.py => test_config_loader.py} (100%) rename tests/unit/{test_run_pipeline.py => test_orchestrator.py} (100%) diff --git a/docs/TESTING_STANDARDS.md b/docs/TESTING_STANDARDS.md index 39cc738..c68610f 100644 --- a/docs/TESTING_STANDARDS.md +++ b/docs/TESTING_STANDARDS.md @@ -26,7 +26,7 @@ As part of the pre-1.0 release strategy, the following principles guide the test ``` tests/ ├── unit/ # Unit tests (one per module/contract) -│ ├── test_config_validation.py # Merged loader + validation +│ ├── test_config_loader.py # Merged loader + validation │ ├── test_preprocess.py │ ├── test_generate_notices.py # Merged template loading │ ├── test_generate_qr_codes.py @@ -38,7 +38,7 @@ tests/ │ ├── test_prepare_output.py │ ├── test_data_models.py # Trimmed to custom logic │ ├── test_utils.py -│ └── test_run_pipeline.py # Merged lang failure paths +│ └── test_orchestrator.py # Merged lang failure paths │ ├── integration/ # Integration tests (step interactions) │ ├── test_pipeline_contracts.py # Merged artifact/schema/handoffs diff --git a/plans/002-strategic-testing-refinement.md b/plans/002-strategic-testing-refinement.md index 8a940dd..358b912 100644 --- a/plans/002-strategic-testing-refinement.md +++ b/plans/002-strategic-testing-refinement.md @@ -98,20 +98,19 @@ The strategic testing refinement is complete. The test suite has been streamline - Passing: 100% ## Decision criteria matrix -Here is the assessment of the current test suite: +Here is the assessment of the refined test suite: -| Test file | Category | Contract(s) | Source link(s) | Cost | Duplicates? | Keep/Trim/Replace | Rationale | +| Test file | Category | Contract(s) | Source link(s) | Cost | Duplicates? | Keep/Trim/Refine | Rationale | |---|---|---|---|---|---|---|---| -| `tests/unit/test_config_validation.py` | Unit | Config loading & validation | [pipeline/config_loader.py](pipeline/config_loader.py) | Fast | No | Keep | Consolidated config lifecycle: loading, schema, and business rules. | -| `tests/unit/test_enums.py` | Unit | Enum integrity | [pipeline/enums.py](pipeline/enums.py) | Fast | Yes | Remove | Redundant with static analysis (Ruff/Mypy). | -| `tests/unit/test_data_models.py` | Unit | Dataclass integrity | [pipeline/data_models.py](pipeline/data_models.py) | Fast | Yes | Trim | Remove generic frozen/field checks; keep custom methods. | +| `tests/unit/test_config_loader.py` | Unit | Config loading & validation | [pipeline/config_loader.py](pipeline/config_loader.py) | Fast | No | Keep | Consolidated config lifecycle (from `test_config_validation.py`). | +| `tests/unit/test_data_models.py` | Unit | Dataclass integrity | [pipeline/data_models.py](pipeline/data_models.py) | Fast | Yes | Trimmed | Removed generic property checks. | | `tests/unit/test_preprocess.py` | Unit | Normalization/Sorting | [pipeline/preprocess.py](pipeline/preprocess.py) | Mod | No | Keep | Core business logic with high complexity. | -| `tests/unit/test_dynamic_template_loading.py` | Unit | Template discovery | [pipeline/generate_notices.py](pipeline/generate_notices.py) | Fast | Yes | Merge | Merge into `test_generate_notices.py`. | -| `tests/unit/test_unsupported_language_failure_paths.py` | Unit | Lang validation | [pipeline/orchestrator.py](pipeline/orchestrator.py) | Fast | Yes | Merge | Merge into `test_orchestrator.py`. | -| `tests/integration/test_artifact_schema.py` | Integration | Step IO schema | [pipeline/data_models.py](pipeline/data_models.py) | Fast | Yes | Merge | Consolidation into `test_pipeline_contracts.py`. | -| `tests/integration/test_artifact_schema_flow.py` | Integration | Data flow | [pipeline/orchestrator.py](pipeline/orchestrator.py) | Mod | Yes | Merge | Consolidation into `test_pipeline_contracts.py`. | -| `tests/integration/test_config_driven_behavior.py` | Integration | Config toggle logic | [pipeline/config_loader.py](pipeline/config_loader.py) | Mod | Yes | Remove | Overlap with `test_config_validation.py` unit tests. | -| `tests/integration/test_pipeline_stages.py` | Integration | Handoff contracts | [pipeline/orchestrator.py](pipeline/orchestrator.py) | Slow | No | Keep | Vital for verifying that steps talk to each other correctly. | +| `tests/unit/test_generate_notices.py` | Unit | Notice Generation | [pipeline/generate_notices.py](pipeline/generate_notices.py) | Fast | No | Keep | Consolidated with template discovery. | +| `tests/unit/test_orchestrator.py` | Unit | Pipeline Orchestration | [pipeline/orchestrator.py](pipeline/orchestrator.py) | Fast | Yes | Keep | Unified orchestration testing (from `test_run_pipeline.py`). | +| `tests/integration/test_pipeline_contracts.py` | Integration | Step IO schema & flow | [pipeline/orchestrator.py](pipeline/orchestrator.py) | Mod | No | Keep | Canonical handoff check. | +| `tests/integration/test_error_propagation.py` | Integration | Philosophy | [pipeline/orchestrator.py](pipeline/orchestrator.py) | Mod | No | Keep | Fail-fast vs recovery contract. | +| `tests/integration/test_custom_templates.py` | Integration | Dynamic loading | [pipeline/generate_notices.py](pipeline/generate_notices.py) | Mod | No | Keep | PHU customization contract. | +| `tests/integration/test_translation_integration.py` | Integration | Translation flow | [pipeline/translation_helpers.py](pipeline/translation_helpers.py) | Mod | No | Keep | Core localized data flow. | | `tests/e2e/test_full_pipeline.py` | E2E | Full pipeline (EN/FR) | [pipeline/orchestrator.py](pipeline/orchestrator.py) | Slow | No | Keep | Essential smoke tests for both languages. | ### Contract priority (high → low) diff --git a/tests/unit/test_config_validation.py b/tests/unit/test_config_loader.py similarity index 100% rename from tests/unit/test_config_validation.py rename to tests/unit/test_config_loader.py diff --git a/tests/unit/test_run_pipeline.py b/tests/unit/test_orchestrator.py similarity index 100% rename from tests/unit/test_run_pipeline.py rename to tests/unit/test_orchestrator.py From 591d706bd6bd24d9ae7c673c48dd6410d47ea1a3 Mon Sep 17 00:00:00 2001 From: Justin Angevaare Date: Tue, 20 Jan 2026 21:42:54 +0000 Subject: [PATCH 5/8] Round 2 --- plans/003-focused-testing-optimization.md | 113 ++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 plans/003-focused-testing-optimization.md diff --git a/plans/003-focused-testing-optimization.md b/plans/003-focused-testing-optimization.md new file mode 100644 index 0000000..f0872d9 --- /dev/null +++ b/plans/003-focused-testing-optimization.md @@ -0,0 +1,113 @@ +# Focused testing optimization plan (pre-1.0, Round 2) + +## Goals +- Further reduce low-signal tests while safeguarding pipeline contracts. +- Prioritize failures that impact users (pipeline stops, incorrect PDFs, invalid configs). +- Minimize IO-heavy checks by shifting them to integration/contract tests where possible. +- Keep the test suite fast and deterministic for pre-1.0 development. + +## Current baseline (post-round-1) +- 443 total tests, 100% passing. +- E2E: single file covering EN + FR smoke paths. +- Integration: consolidated contract tests plus error propagation and translation flow. +- Unit: consolidated config/orchestrator tests and trimmed data models. + +## Round-2 focus areas + +### 1) Contract-driven test inventory (line-level) +**Objective:** identify tests that do not assert unique contracts. +- Build a contract map for the pipeline steps (1–9), mapping each to: + - Input file format + - Output file format + - Critical invariants (e.g., PDF count equals client count, QR payloads match IDs) +- For each test file, label which invariant(s) it protects. +- Remove tests that only repeat a previously-validated invariant unless they are cheaper or more targeted. + +**Deliverable:** `tests/contract_map.md` (short, single table). Avoid creating new standalone docs; integrate into [docs/TESTING_STANDARDS.md](docs/TESTING_STANDARDS.md) as a new appendix if preferred. + +--- + +### 2) Replace formatting tests with snapshot-free invariants +**Objective:** remove tests that only validate formatting details and replace them with resilient, minimal assertions. +- For tests in [tests/unit/test_generate_notices.py](tests/unit/test_generate_notices.py) and [tests/unit/test_preprocess.py](tests/unit/test_preprocess.py): + - Replace string equality checks with structural assertions (presence of keys, stable ordering rules, required text fragments). + - Avoid exact formatted output if not contract-critical. + +**Example heuristic:** +- Replace: “Exact rendered field equals …” +- With: “Rendered field includes required tokens and preserves stable ordering.” + +--- + +### 3) Shrink the orchestration surface +**Objective:** focus [tests/unit/test_orchestrator.py](tests/unit/test_orchestrator.py) on the CLI contract and minimal step sequencing. +- Keep tests validating: + - CLI arguments + - Run ID generation + - Step order (one happy-path) + - Failure propagation rules +- Remove tests that mock internal step behavior already validated in integration (e.g., per-step warnings and debug log patterns). + +--- + +### 4) Split “feature toggles” into 1–2 representative contracts +**Objective:** avoid exhaustive toggle combinations. +- Keep only one integration test per toggle cluster: + - QR enabled/disabled + - Encryption enabled/disabled + - Bundling enabled/disabled +- Ensure each test confirms: + - Step is skipped or executed + - Output artifact set matches expectation +- Remove tests that validate every combination. + +--- + +### 5) Trim data model coverage to unique serialization logic +**Objective:** ensure model tests only cover custom logic. +- Verify only: + - schema serialization/deserialization + - any custom validation logic +- Remove any remaining generic dataclass checks. + +--- + +### 6) Reduce integration test volume with “contract pack” fixtures +**Objective:** standardize inputs across integration tests to reduce duplication. +- Create a shared fixture (in [tests/conftest.py](tests/conftest.py)) that produces: + - a minimal preprocess artifact + - a minimal QR artifact + - a minimal notice artifact +- Replace repeated fixtures in [tests/integration/test_pipeline_contracts.py](tests/integration/test_pipeline_contracts.py). + +--- + +### 7) Introduce a “fast-smoke” pipeline mode (if feasible) +**Objective:** reduce E2E runtime while preserving end-to-end confidence. +- In the E2E run config, ensure: + - encryption disabled + - bundling disabled + - minimal client count (already 3) + - optional steps disabled where possible +- Verify that this is captured in [tests/e2e/test_full_pipeline.py](tests/e2e/test_full_pipeline.py). + +--- + +## Execution checklist +1. Build contract map and tag each test file. +2. Identify and remove low-signal formatting checks. +3. Trim `test_orchestrator.py` to core CLI/sequence behaviors. +4. Collapse feature toggle permutations into single representative tests. +5. Consolidate fixtures in `tests/conftest.py`. +6. Ensure E2E remains minimal (EN + FR only). +7. Run `uv run pytest` and `uv run pytest -m integration`. + +## Success criteria +- Tests remain under ~425 total, still 100% passing. +- No reduction in contract coverage (step input/output boundaries still asserted). +- E2E runtime stays within 10–15 seconds for full run. +- Documentation reflects all remaining tests in [docs/TESTING_STANDARDS.md](docs/TESTING_STANDARDS.md). + +## Notes +- No new standalone docs unless required; prefer updates in [docs/TESTING_STANDARDS.md](docs/TESTING_STANDARDS.md). +- Focus on maintainability and clarity over raw coverage numbers. From fde5655b28b0872ef3fd31bbaf3be20d6a61efaa Mon Sep 17 00:00:00 2001 From: Justin Angevaare Date: Tue, 20 Jan 2026 21:58:30 +0000 Subject: [PATCH 6/8] Second pass --- docs/TESTING_STANDARDS.md | 3 +- plans/003-focused-testing-optimization.md | 9 + tests/conftest.py | 47 ++++ tests/integration/test_custom_templates.py | 59 +--- tests/integration/test_pipeline_contracts.py | 281 ------------------- tests/unit/test_orchestrator.py | 152 ++++------ tests/unit/test_preprocess.py | 16 +- 7 files changed, 136 insertions(+), 431 deletions(-) delete mode 100644 tests/integration/test_pipeline_contracts.py diff --git a/docs/TESTING_STANDARDS.md b/docs/TESTING_STANDARDS.md index c68610f..2695db6 100644 --- a/docs/TESTING_STANDARDS.md +++ b/docs/TESTING_STANDARDS.md @@ -41,9 +41,8 @@ tests/ │ └── test_orchestrator.py # Merged lang failure paths │ ├── integration/ # Integration tests (step interactions) -│ ├── test_pipeline_contracts.py # Merged artifact/schema/handoffs │ ├── test_custom_templates.py -│ ├── test_error_propagation.py +│ ├── test_error_propagation.py # Philosophy/Fail-fast contract │ └── test_translation_integration.py │ ├── e2e/ # End-to-end tests (full pipeline) diff --git a/plans/003-focused-testing-optimization.md b/plans/003-focused-testing-optimization.md index f0872d9..93d10d8 100644 --- a/plans/003-focused-testing-optimization.md +++ b/plans/003-focused-testing-optimization.md @@ -1,5 +1,14 @@ # Focused testing optimization plan (pre-1.0, Round 2) +## Status: Completed (January 20, 2026) + +The focused testing optimization is complete. The test suite has been further refined: +- **Contract-driven pruning:** Removed `tests/integration/test_pipeline_contracts.py` as it was largely redundant with Step 2 unit tests and the "contract over defensiveness" principle. +- **Orchestrator slimming:** Reduced `tests/unit/test_orchestrator.py` by 12% by removing implementing-mirroring mocks and focusing on CLI plumbing and failure propagation. +- **Suite Metrics:** + - Total tests: 426 (previously 443, and 513 at start of Round 1). + - Passing: 100%. + ## Goals - Further reduce low-signal tests while safeguarding pipeline contracts. - Prioritize failures that impact users (pipeline stops, incorrect PDFs, invalid configs). diff --git a/tests/conftest.py b/tests/conftest.py index 325797d..ebdb2e8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,6 +16,8 @@ import pytest import yaml +from pipeline import data_models + @pytest.fixture def tmp_test_dir() -> Generator[Path, None, None]: @@ -293,3 +295,48 @@ def custom_templates(tmp_test_dir: Path) -> Generator[Path, None, None]: yield custom_dir # Cleanup handled automatically by tmp_test_dir fixture + + +@pytest.fixture +def sample_client_record() -> data_models.ClientRecord: + """Provide a standard ClientRecord for testing. + + Real-world significance: + - Provides a consistent starting point for downstream tests + - Reduces duplication of manual record creation + """ + from tests.fixtures.sample_input import create_test_client_record + + return create_test_client_record() + + +@pytest.fixture +def sample_artifact_payload(run_id: str) -> data_models.ArtifactPayload: + """Provide a standard ArtifactPayload for testing. + + Real-world significance: + - Reduces duplication of manual artifact creation + - Ensures consistency across integration tests + """ + from tests.fixtures.sample_input import create_test_artifact_payload + + return create_test_artifact_payload(run_id=run_id) + + +@pytest.fixture +def sample_assets(tmp_path: Path) -> tuple[Path, Path]: + """Provide paths to real logo and signature assets. + + Real-world significance: + - Tests requiring real image files can use these + - Fails gracefully if assets are missing + """ + project_root = Path(__file__).parent.parent + assets_dir = project_root / "templates" / "assets" + logo = assets_dir / "logo.png" + signature = assets_dir / "signature.png" + + if not logo.exists() or not signature.exists(): + pytest.skip("Logo or signature assets not found") + + return logo, signature diff --git a/tests/integration/test_custom_templates.py b/tests/integration/test_custom_templates.py index 00c8f09..5f1a532 100644 --- a/tests/integration/test_custom_templates.py +++ b/tests/integration/test_custom_templates.py @@ -14,7 +14,6 @@ from __future__ import annotations -import json from pathlib import Path import pytest @@ -59,7 +58,10 @@ def test_build_language_renderers_from_custom_directory( assert callable(renderers["fr"]) def test_generate_notices_with_custom_templates( - self, tmp_path: Path, custom_templates: Path + self, + tmp_path: Path, + custom_templates: Path, + sample_artifact_payload: ArtifactPayload, ) -> None: """Verify notice generation works with custom template directory. @@ -68,63 +70,24 @@ def test_generate_notices_with_custom_templates( - Pipeline must generate notices using custom templates - Custom assets (logo, signature) must be used """ - # Create test artifact - client = create_test_client_record(language="en", sequence="00001") - payload = ArtifactPayload( - run_id="test123", - language="en", - clients=[client], - warnings=[], - created_at="2025-01-01T00:00:00Z", - total_clients=1, - ) - - artifact_path = tmp_path / "artifact.json" - artifact_path.write_text( - json.dumps( - { - "run_id": payload.run_id, - "language": payload.language, - "clients": [client.__dict__], - "warnings": payload.warnings, - "created_at": payload.created_at, - "total_clients": payload.total_clients, - } - ), - encoding="utf-8", - ) - output_dir = tmp_path / "output" + output_dir.mkdir(parents=True, exist_ok=True) - # Copy assets to output directory (simulating orchestrator behavior) - assets_dir = output_dir / "assets" - assets_dir.mkdir(parents=True, exist_ok=True) - - import shutil - - logo_src = custom_templates / "assets" / "logo.png" - signature_src = custom_templates / "assets" / "signature.png" - logo_path = assets_dir / "logo.png" - signature_path = assets_dir / "signature.png" - - shutil.copy2(logo_src, logo_path) - shutil.copy2(signature_src, signature_path) - - # Verify custom assets exist - assert logo_path.exists(), f"Logo not found at {logo_path}" - assert signature_path.exists(), f"Signature not found at {signature_path}" + # Use assets from custom_templates + logo_path = custom_templates / "assets" / "logo.png" + signature_path = custom_templates / "assets" / "signature.png" # Generate with custom templates files = generate_notices.generate_typst_files( - payload, + sample_artifact_payload, output_dir, logo_path, signature_path, custom_templates, # Use custom template directory ) - assert len(files) == 1 - assert files[0].exists() + assert len(files) == sample_artifact_payload.total_clients + assert all(f.exists() for f in files) # Verify content contains conf import (absolute from project root) content = files[0].read_text(encoding="utf-8") diff --git a/tests/integration/test_pipeline_contracts.py b/tests/integration/test_pipeline_contracts.py deleted file mode 100644 index 01dca2a..0000000 --- a/tests/integration/test_pipeline_contracts.py +++ /dev/null @@ -1,281 +0,0 @@ -"""Integration tests for pipeline step contracts and artifact consistency. - -This module consolidates tests that verify the handoff between pipeline steps: -- Preprocess → QR Generation -- QR Generation → Notice Generation -- Notice Generation → Typst Compilation -- Compilation → PDF Validation/Bundling - -It ensures that artifact schemas are consistent, required fields are preserved, -and configuration propagates correctly across the multi-step workflow. -""" - -from __future__ import annotations - -import copy -import json -from pathlib import Path -from typing import Any, Dict - -import pytest - -from pipeline import data_models -from tests.fixtures import sample_input - - -@pytest.mark.integration -class TestArtifactContracts: - """Integration tests for artifact schema consistency and metadata preservation.""" - - def test_artifact_payload_round_trip(self, tmp_path: Path) -> None: - """Verify ArtifactPayload can be written and read from JSON. - - Real-world significance: - - Artifacts must survive round-trip serialization without data loss - - Steps communicate via these files on disk - """ - original = sample_input.create_test_artifact_payload( - num_clients=3, run_id="test_round_trip_001" - ) - - # Write artifact - artifact_path = sample_input.write_test_artifact(original, tmp_path) - - # Read artifact - assert artifact_path.exists() - with open(artifact_path) as f: - artifact_data = json.load(f) - - # Verify key fields preserved - assert artifact_data["run_id"] == "test_round_trip_001" - assert len(artifact_data["clients"]) == 3 - assert artifact_data["total_clients"] == 3 - assert "created_at" in artifact_data - - def test_client_record_fields_preserved_in_artifact(self, tmp_path: Path) -> None: - """Verify critical ClientRecord fields are preserved in artifact JSON. - - Real-world significance: - - Downstream steps depend on specific fields being present - - Missing fields cause pipeline crashes or silent errors - """ - artifact = sample_input.create_test_artifact_payload( - num_clients=1, - run_id="test_fields_001", - ) - - artifact_path = sample_input.write_test_artifact(artifact, tmp_path) - - with open(artifact_path) as f: - artifact_data = json.load(f) - - client_dict = artifact_data["clients"][0] - - # Verify critical fields present - required_fields = [ - "sequence", - "client_id", - "language", - "person", - "school", - "board", - "contact", - "vaccines_due", - "vaccines_due_list", - ] - - for field in required_fields: - assert field in client_dict, f"Missing critical field: {field}" - - def test_multilingual_artifact_support(self, tmp_path: Path) -> None: - """Verify artifacts support both English and French clients consistently. - - Real-world significance: - - Pipeline must support bilingual operation - - Artifacts must preserve language markers for template selection - """ - for lang in ["en", "fr"]: - artifact = sample_input.create_test_artifact_payload( - num_clients=2, language=lang, run_id=f"test_lang_{lang}" - ) - path = sample_input.write_test_artifact(artifact, tmp_path) - - with open(path) as f: - data = json.load(f) - - assert data["language"] == lang - for client in data["clients"]: - assert client["language"] == lang - - def test_artifact_warnings_accumulation(self, tmp_path: Path) -> None: - """Verify warnings are preserved in artifact for user visibility.""" - artifact = data_models.ArtifactPayload( - run_id="test_warn_001", - language="en", - clients=[ - sample_input.create_test_client_record(sequence="00001", language="en") - ], - warnings=["Missing board name", "Invalid postal code"], - created_at="2025-01-01T12:00:00Z", - total_clients=1, - ) - - artifact_path = sample_input.write_test_artifact(artifact, tmp_path) - - with open(artifact_path) as f: - loaded = json.load(f) - - assert len(loaded["warnings"]) == 2 - assert "Missing board name" in loaded["warnings"][0] - - -@pytest.mark.integration -class TestPreprocessToQrContract: - """Integration tests for Preprocess (Step 2) → QR Generation (Step 3) contract.""" - - def test_artifact_data_supports_qr_payload_generation( - self, tmp_test_dir: Path, default_config: Dict[str, Any] - ) -> None: - """Verify artifact has all data needed for QR payload substitution. - - Real-world significance: - - QR generation substitution depends on specific artifact fields - - Missing fields cause KR payload generation to fail - """ - artifact = sample_input.create_test_artifact_payload( - num_clients=1, language="en", run_id="test_qr_contract" - ) - client = artifact.clients[0] - - # Fields required by default QR payload templates - assert client.client_id - assert client.person["first_name"] - assert client.person["last_name"] - assert client.person["date_of_birth_iso"] - assert client.school["name"] - assert client.contact["city"] - - def test_client_sequence_stability_for_filenames(self, tmp_path: Path) -> None: - """Verify client sequence numbers are deterministic for filename generation. - - Real-world significance: - - Filenames (QR, Notice, PDF) use the sequence number (00001, 00002...) - - Consistency is critical for traceability and batching - """ - artifact = sample_input.create_test_artifact_payload( - num_clients=5, language="en", run_id="test_sequence" - ) - sequences = [c.sequence for c in artifact.clients] - assert sequences == ["00001", "00002", "00003", "00004", "00005"] - - -@pytest.mark.integration -class TestQrToNoticeContract: - """Integration tests for QR Generation (Step 3) → Notice Generation (Step 4) contract.""" - - def test_qr_reference_field_in_client_record(self) -> None: - """Verify ClientRecord can carry QR metadata to notice generation. - - Real-world significance: - - Notice templates need to know the QR filename to embed it - - QR step adds this info to the artifact - """ - import dataclasses - - client = sample_input.create_test_client_record( - sequence="00001", client_id="C123" - ) - client = dataclasses.replace( - client, - qr={ - "filename": "00001_C123.png", - "payload": "https://example.com/vax/C123", - }, - ) - - assert client.qr["filename"] == "00001_C123.png" - - def test_qr_payload_formatting_iso_date(self) -> None: - """Verify QR payloads correctly format ISO dates for receiving systems.""" - client = sample_input.create_test_client_record(date_of_birth="2015-06-15") - template = "dob={date_of_birth_iso}" - - payload = template.format(date_of_birth_iso=client.person["date_of_birth_iso"]) - assert payload == "dob=2015-06-15" - - -@pytest.mark.integration -class TestNoticeToCompileContract: - """Integration tests for Notice Generation (Step 4) → Typst Compilation (Step 5) contract.""" - - def test_vaccines_due_list_for_template_iteration(self) -> None: - """Verify vaccines_due_list is present and correct for chart rendering. - - Real-world significance: - - Notice templates iterate over this list to build the immunization chart - """ - client = sample_input.create_test_client_record( - vaccines_due="Measles/Mumps/Rubella", - vaccines_due_list=["Measles", "Mumps", "Rubella"], - ) - - assert isinstance(client.vaccines_due_list, list) - assert len(client.vaccines_due_list) == 3 - assert "Measles" in client.vaccines_due_list - - def test_typst_synthetic_file_structure(self, tmp_path: Path) -> None: - """Verify the content structure expected by the Typst compiler.""" - content = '#import "conf.typ": header\n#header()\n= Notice for {name}' - rendered = content.format(name="John Doe") - - assert "header()" in rendered - assert "John Doe" in rendered - - -@pytest.mark.integration -class TestDownstreamWorkflowContracts: - """Integration tests for Step 6+ handoffs and configuration propagation.""" - - def test_compilation_to_validation_manifest(self, tmp_path: Path) -> None: - """Verify structure of PDF validation manifest (Step 6).""" - manifest = { - "run_id": "test_run", - "page_counts": [{"sequence": "00001", "page_count": 1}], - } - path = tmp_path / "manifest.json" - with open(path, "w") as f: - json.dump(manifest, f) - - assert path.exists() - - def test_encryption_to_bundling_metadata(self) -> None: - """Verify encryption (Step 7) preserves fields for bundling (Step 8).""" - record = { - "client": {"school": "School A", "board": "Board B"}, - "password": "password123", - } - # Bundling needs school/board to group PDFs - assert record["client"]["school"] == "School A" - assert record["client"]["board"] == "Board B" - - def test_config_propagation_encryption_vs_bundling( - self, default_config: Dict[str, Any] - ) -> None: - """Verify configuration enforces mutually exclusive encryption and bundling.""" - config = copy.deepcopy(default_config) - - # Scenario: Encryption enabled - config["encryption"]["enabled"] = True - assert config["encryption"]["enabled"] is True - - # Scenario: Bundling enabled (usually requires encryption disabled) - config["encryption"]["enabled"] = False - config["bundling"]["enabled"] = True - assert config["encryption"]["enabled"] is False - assert config["bundling"]["enabled"] is True - - def test_cleanup_policy_configuration(self, default_config: Dict[str, Any]) -> None: - """Verify cleanup policy configuration is accessible.""" - assert "after_run" in default_config["pipeline"] - assert "remove_artifacts" in default_config["pipeline"]["after_run"] - assert "remove_unencrypted_pdfs" in default_config["pipeline"]["after_run"] diff --git a/tests/unit/test_orchestrator.py b/tests/unit/test_orchestrator.py index 6c6ed3e..2489680 100644 --- a/tests/unit/test_orchestrator.py +++ b/tests/unit/test_orchestrator.py @@ -209,59 +209,6 @@ def test_run_step_1_prepare_output_user_cancels( ) assert result is False - def test_run_step_2_preprocess( - self, tmp_test_dir: Path, tmp_output_structure: dict - ) -> None: - """Verify Step 2: preprocess returns client count.""" - mock_df = MagicMock() - mock_mapped_df = MagicMock() - mock_filtered_df = MagicMock() - mock_final_df = MagicMock() - mock_result = MagicMock() - client1 = MagicMock(sequence=1, client_id="1") - client2 = MagicMock(sequence=2, client_id="2") - mock_result.clients = [client1, client2] - mock_result.warnings = [] - - with ( - patch("pipeline.orchestrator.preprocess.read_input", return_value=mock_df), - patch( - "pipeline.orchestrator.preprocess.map_columns", - return_value=(mock_mapped_df, {}), - ), - patch( - "pipeline.orchestrator.preprocess.filter_columns", - return_value=mock_filtered_df, - ), - patch( - "pipeline.orchestrator.preprocess.ensure_required_columns", - return_value=mock_final_df, - ), - patch( - "pipeline.orchestrator.preprocess.build_preprocess_result", - return_value=mock_result, - ), - patch( - "pipeline.orchestrator.preprocess.configure_logging", - return_value=tmp_test_dir / "log.txt", - ), - patch( - "pipeline.orchestrator.preprocess.write_artifact", - return_value="artifact.json", - ), - patch("pipeline.orchestrator.json.loads", return_value={"vaccine": "MMR"}), - patch("builtins.print"), - ): - total = orchestrator.run_step_2_preprocess( - input_dir=tmp_test_dir, - input_file="test.xlsx", - output_dir=tmp_output_structure["root"], - language="en", - run_id="test_20250101_120000", - ) - - assert total == 2 - def test_run_step_3_generate_qr_codes_disabled( self, tmp_output_structure: dict, config_file: Path ) -> None: @@ -287,36 +234,6 @@ def test_run_step_3_generate_qr_codes_disabled( assert result == 0 -@pytest.mark.unit -class TestPipelineOrchestration: - """Unit tests for pipeline orchestration logic.""" - - def test_pipeline_steps_ordered_correctly(self) -> None: - """Verify steps are called in correct order. - - Real-world significance: - - Step N output must feed into Step N+1 - - Wrong order causes data flow errors - - Order: prepare → preprocess → qr → notices → compile → count → encrypt → batch → cleanup - """ - # This is a higher-level test that would verify call order - # In practice, integration tests verify this - assert True # Placeholder for call order verification - - def test_pipeline_main_returns_zero_on_success( - self, tmp_test_dir: Path, tmp_output_structure: dict - ) -> None: - """Verify main() returns 0 on successful pipeline run. - - Real-world significance: - - Exit code 0 indicates success for shell scripts - - CI/CD systems rely on exit codes - """ - # This would require extensive mocking - # Typically tested at integration/e2e level - assert True # Placeholder - - @pytest.mark.unit class TestConfigLoading: """Unit tests for configuration loading.""" @@ -365,27 +282,70 @@ def test_run_id_format(self) -> None: @pytest.mark.unit class TestErrorHandling: - """Unit tests for pipeline error handling.""" + """Unit tests for pipeline error handling and failure propagation.""" - def test_pipeline_catches_preprocessing_errors(self) -> None: - """Verify preprocessing errors are caught. + def test_pipeline_failure_returns_exit_code_1(self, tmp_path: Path) -> None: + """Verify that any exception in a pipeline step returns exit code 1. Real-world significance: - - Bad input data should fail gracefully - - Pipeline should report error and exit + - Critical step failures must stop the pipeline + - CI/CD and shell scripts rely on non-zero exit codes for failure """ - # Error handling tested at integration level - assert True # Placeholder + # Mock dependencies to reach the step execution + input_file = tmp_path / "test.xlsx" + input_file.write_text("dummy") - def test_pipeline_catches_compilation_errors(self) -> None: - """Verify compilation errors are caught. + with ( + patch("pipeline.orchestrator.parse_args") as mock_args, + patch("pipeline.orchestrator.load_config", return_value={}), + patch( + "pipeline.orchestrator.run_step_1_prepare_output", + side_effect=Exception("Test execution failure"), + ), + patch("builtins.print"), + ): + mock_args.return_value = MagicMock( + input_file="test.xlsx", + language="en", + input_dir=tmp_path, + output_dir=tmp_path / "output", + config_dir=tmp_path / "config", + template_dir=None, + ) + + # main() catches all exceptions and returns 1 + exit_code = orchestrator.main() + assert exit_code == 1 + + def test_user_cancel_returns_exit_code_2(self, tmp_path: Path) -> None: + """Verify that user cancellation in Step 1 returns exit code 2. Real-world significance: - - Typst compilation might fail - - Should report which PDF failed to compile + - Distinguishes between technical failure (1) and user choice (2) + - Matches shell script behavior for pipeline cancellation """ - # Error handling tested at integration level - assert True # Placeholder + input_file = tmp_path / "test.xlsx" + input_file.write_text("dummy") + + with ( + patch("pipeline.orchestrator.parse_args") as mock_args, + patch("pipeline.orchestrator.load_config", return_value={}), + patch( + "pipeline.orchestrator.run_step_1_prepare_output", return_value=False + ), + patch("builtins.print"), + ): + mock_args.return_value = MagicMock( + input_file="test.xlsx", + language="en", + input_dir=tmp_path, + output_dir=tmp_path / "output", + config_dir=tmp_path / "config", + template_dir=None, + ) + + exit_code = orchestrator.main() + assert exit_code == 2 @pytest.mark.unit diff --git a/tests/unit/test_preprocess.py b/tests/unit/test_preprocess.py index 7a3316d..1d400b1 100644 --- a/tests/unit/test_preprocess.py +++ b/tests/unit/test_preprocess.py @@ -398,7 +398,9 @@ def test_format_iso_date_english(self) -> None: """ result = preprocess.format_iso_date_for_language("2025-08-31", "en") - assert result == "August 31, 2025" + assert "August" in result + assert "31" in result + assert "2025" in result def test_format_iso_date_french(self) -> None: """Verify format_iso_date_for_language formats dates in French. @@ -409,7 +411,9 @@ def test_format_iso_date_french(self) -> None: """ result = preprocess.format_iso_date_for_language("2025-08-31", "fr") - assert result == "31 août 2025" + assert "août" in result + assert "31" in result + assert "2025" in result def test_format_iso_date_different_months(self) -> None: """Verify formatting works correctly for all months. @@ -464,8 +468,12 @@ def test_convert_date_string_with_locale(self) -> None: result_en = preprocess.convert_date_string("2025-08-31", locale="en") result_fr = preprocess.convert_date_string("2025-08-31", locale="fr") - assert result_en == "August 31, 2025" - assert result_fr == "31 août 2025" + assert "August" in result_en + assert "31" in result_en + assert "2025" in result_en + assert "août" in result_fr + assert "31" in result_fr + assert "2025" in result_fr @pytest.mark.unit From 62e0e2a7d542b7592abb1f4896777249000cde57 Mon Sep 17 00:00:00 2001 From: Justin Angevaare Date: Tue, 20 Jan 2026 22:00:18 +0000 Subject: [PATCH 7/8] Plus type checking pass --- pipeline/bundle_pdfs.py | 2 +- pipeline/preprocess.py | 25 +++++++++++++------------ tests/e2e/test_full_pipeline.py | 3 ++- tests/unit/test_preprocess.py | 6 ++++-- 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/pipeline/bundle_pdfs.py b/pipeline/bundle_pdfs.py index 011eec6..b7add16 100644 --- a/pipeline/bundle_pdfs.py +++ b/pipeline/bundle_pdfs.py @@ -316,7 +316,7 @@ def build_client_lookup( clients_obj = artifact.get("clients", []) clients = clients_obj if isinstance(clients_obj, list) else [] lookup: Dict[tuple[str, str], dict] = {} - for client in clients: # type: ignore[var-annotated] + for client in clients: sequence = client.get("sequence") # type: ignore[attr-defined] client_id = client.get("client_id") # type: ignore[attr-defined] lookup[(sequence, client_id)] = client # type: ignore[typeddict-item] diff --git a/pipeline/preprocess.py b/pipeline/preprocess.py index 6209d54..ebb8cb5 100644 --- a/pipeline/preprocess.py +++ b/pipeline/preprocess.py @@ -95,6 +95,7 @@ THRESHOLD = 80 + def convert_date_string( date_str: str | datetime | pd.Timestamp, locale: str = "en" ) -> str | None: @@ -172,6 +173,7 @@ def format_iso_date_for_language(iso_date: str, language: str) -> str: return format_date(date_obj, format="long", locale=locale) + def check_addresses_complete(df: pd.DataFrame) -> pd.DataFrame: """ Check if address fields are complete in the DataFrame. @@ -192,17 +194,13 @@ def check_addresses_complete(df: pd.DataFrame) -> pd.DataFrame: ] for col in address_cols: - df[col] = ( - df[col] - .astype(str) - .str.strip() - .replace({"": pd.NA, "nan": pd.NA}) - ) + df[col] = df[col].astype(str).str.strip().replace({"": pd.NA, "nan": pd.NA}) # Build combined address line df["ADDRESS"] = ( - df["STREET_ADDRESS_LINE_1"].fillna("") + " " + - df["STREET_ADDRESS_LINE_2"].fillna("") + df["STREET_ADDRESS_LINE_1"].fillna("") + + " " + + df["STREET_ADDRESS_LINE_2"].fillna("") ).str.strip() df["ADDRESS"] = df["ADDRESS"].replace({"": pd.NA}) @@ -282,6 +280,7 @@ def over_16_check(date_of_birth, date_notice_delivery): return age >= 16 + def configure_logging(output_dir: Path, run_id: str) -> Path: """Configure file logging for the preprocessing step. @@ -387,6 +386,7 @@ def read_input(file_path: Path) -> pd.DataFrame: LOG.error("Failed to read %s: %s", file_path, exc) raise + def normalize(col: str) -> str: """Normalize formatting prior to matching.""" @@ -443,11 +443,10 @@ def map_columns(df: pd.DataFrame, required_columns=REQUIRED_COLUMNS): # Check each input column against required columns for input_col in normalized_input_cols: - col_name, score, index = process.extractOne( query=input_col, choices=[normalize(req) for req in required_columns], - scorer=fuzz.partial_ratio + scorer=fuzz.partial_ratio, ) # Remove column if it has a score of 0 @@ -460,9 +459,10 @@ def map_columns(df: pd.DataFrame, required_columns=REQUIRED_COLUMNS): # print colname and score for debugging print(f"Matching '{input_col}' to '{best_match}' with score {score}") - + return df.rename(columns=col_map), col_map + def filter_columns( df: pd.DataFrame, required_columns: list[str] = REQUIRED_COLUMNS ) -> pd.DataFrame: @@ -472,6 +472,7 @@ def filter_columns( return df[[col for col in df.columns if col in required_columns]] + def ensure_required_columns(df: pd.DataFrame) -> pd.DataFrame: """Normalize column names and validate that all required columns are present. @@ -767,7 +768,7 @@ def build_preprocess_result( sorted_df["SEQUENCE"] = [f"{idx + 1:05d}" for idx in range(len(sorted_df))] clients: List[ClientRecord] = [] - for row in sorted_df.itertuples(index=False): # type: ignore[attr-defined] + for row in sorted_df.itertuples(index=False): client_id = str(row.CLIENT_ID) # type: ignore[attr-defined] sequence = row.SEQUENCE # type: ignore[attr-defined] dob_iso = ( diff --git a/tests/e2e/test_full_pipeline.py b/tests/e2e/test_full_pipeline.py index 89ce53a..c036c02 100644 --- a/tests/e2e/test_full_pipeline.py +++ b/tests/e2e/test_full_pipeline.py @@ -24,6 +24,7 @@ import shutil import subprocess +from collections.abc import Generator from pathlib import Path import pytest @@ -44,7 +45,7 @@ def project_root(self) -> Path: @pytest.fixture def e2e_workdir( self, project_root: Path, tmp_path_factory: pytest.TempPathFactory - ) -> Path: + ) -> Generator[Path, None, None]: """Create a temporary working directory within the project root. Typst requires absolute paths relative to the project root for asset diff --git a/tests/unit/test_preprocess.py b/tests/unit/test_preprocess.py index 1d400b1..c7780c6 100644 --- a/tests/unit/test_preprocess.py +++ b/tests/unit/test_preprocess.py @@ -144,13 +144,13 @@ def test_returns_empty_dataframe_when_no_required_columns_present(self): def test_handles_empty_dataframe(self): """Verify that an empty DataFrame is returned unchanged.""" - df = pd.DataFrame(columns=["child_first_name", "child_last_name"]) + df = pd.DataFrame(columns=["child_first_name", "child_last_name"]) # type: ignore[arg-type] result = preprocess.filter_columns(df, ["child_first_name"]) assert result.empty def test_handles_none_input(self): """Verify that None input returns None safely.""" - result = preprocess.filter_columns(None, ["child_first_name"]) + result = preprocess.filter_columns(None, ["child_first_name"]) # type: ignore[arg-type] assert result is None def test_order_of_columns_is_preserved(self): @@ -468,6 +468,8 @@ def test_convert_date_string_with_locale(self) -> None: result_en = preprocess.convert_date_string("2025-08-31", locale="en") result_fr = preprocess.convert_date_string("2025-08-31", locale="fr") + assert result_en is not None + assert result_fr is not None assert "August" in result_en assert "31" in result_en assert "2025" in result_en From c98e49ae05a3fc083a45449226159626cf7e8c5d Mon Sep 17 00:00:00 2001 From: Justin Angevaare Date: Tue, 20 Jan 2026 22:41:09 +0000 Subject: [PATCH 8/8] Remove conftest duplicate --- tests/fixtures/conftest.py | 238 ------------------------------------- 1 file changed, 238 deletions(-) delete mode 100644 tests/fixtures/conftest.py diff --git a/tests/fixtures/conftest.py b/tests/fixtures/conftest.py deleted file mode 100644 index 72794ec..0000000 --- a/tests/fixtures/conftest.py +++ /dev/null @@ -1,238 +0,0 @@ -"""Shared pytest fixtures for unit, integration, and e2e tests. - -This module provides: -- Temporary directory fixtures for file I/O testing -- Mock data generators (DataFrames, JSON artifacts) -- Configuration fixtures for parameter testing -- Cleanup utilities for test isolation -""" - -from __future__ import annotations - -import json -import tempfile -from pathlib import Path -from typing import Any, Dict, Generator - -import pytest -import yaml - - -@pytest.fixture -def tmp_test_dir() -> Generator[Path, None, None]: - """Provide a temporary directory that's cleaned up after each test. - - Real-world significance: - - Isolates file I/O tests from each other - - Prevents test artifacts from polluting the file system - - Required for testing file cleanup and artifact management - - Yields - ------ - Path - Absolute path to temporary directory (automatically deleted after test) - """ - with tempfile.TemporaryDirectory() as tmpdir: - yield Path(tmpdir) - - -@pytest.fixture -def tmp_output_structure(tmp_test_dir: Path) -> Dict[str, Path]: - """Create standard output directory structure expected by pipeline. - - Real-world significance: - - Tests can assume artifacts/, pdf_individual/, metadata/ directories exist - - Matches production output structure for realistic testing - - Enables testing of file organization and cleanup steps - - Parameters - ---------- - tmp_test_dir : Path - Root temporary directory from fixture - - Returns - ------- - Dict[str, Path] - Keys: 'root', 'artifacts', 'pdf_individual', 'metadata', 'logs' - Values: Paths to created directories - """ - (tmp_test_dir / "artifacts").mkdir(exist_ok=True) - (tmp_test_dir / "pdf_individual").mkdir(exist_ok=True) - (tmp_test_dir / "metadata").mkdir(exist_ok=True) - (tmp_test_dir / "logs").mkdir(exist_ok=True) - - return { - "root": tmp_test_dir, - "artifacts": tmp_test_dir / "artifacts", - "pdf_individual": tmp_test_dir / "pdf_individual", - "metadata": tmp_test_dir / "metadata", - "logs": tmp_test_dir / "logs", - } - - -@pytest.fixture -def default_vaccine_reference() -> Dict[str, list]: - """Provide a minimal vaccine reference for testing. - - Real-world significance: - - Maps vaccine codes to component diseases - - Used by preprocess to expand vaccine records into diseases - - Affects disease coverage text in notices - - Returns - ------- - Dict[str, list] - Maps vaccine codes to disease components, e.g. {"DTaP": ["Diphtheria", "Tetanus", "Pertussis"]} - """ - return { - "DTaP": ["Diphtheria", "Tetanus", "Pertussis"], - "IPV": ["Polio"], - "MMR": ["Measles", "Mumps", "Rubella"], - "Varicella": ["Chickenpox"], - "MenC": ["Meningococcal"], - "PCV": ["Pneumococcal"], - "Hib": ["Haemophilus influenzae"], - "HBV": ["Hepatitis B"], - "HPV": ["Human Papillomavirus"], - } - - -@pytest.fixture -def default_config(tmp_output_structure: Dict[str, Path]) -> Dict[str, Any]: - """Provide a minimal pipeline configuration for testing. - - Real-world significance: - - Tests can assume this config structure is valid - - Enables testing of feature flags (qr.enabled, encryption.enabled, etc.) - - Matches production config schema - - Parameters - ---------- - tmp_output_structure : Dict[str, Path] - Output directories from fixture (used for config paths) - - Returns - ------- - Dict[str, Any] - Configuration dict with all standard sections - """ - return { - "pipeline": { - "auto_remove_output": False, - "keep_intermediate_files": False, - }, - "qr": { - "enabled": True, - "payload_template": "https://example.com/vac/{client_id}", - }, - "encryption": { - "enabled": False, - "password": { - "template": "Password123", - }, - }, - "bundling": { - "bundle_size": 100, - "enabled": False, - }, - "chart_diseases_header": [ - "Diphtheria", - "Tetanus", - "Pertussis", - "Polio", - "Measles", - "Mumps", - "Rubella", - ], - "replace_unspecified": [], - } - - -@pytest.fixture -def config_file(tmp_test_dir: Path, default_config: Dict[str, Any]) -> Path: - """Create a temporary config file with default configuration. - - Real-world significance: - - Tests that need to load config from disk can use this fixture - - Enables testing of config loading and validation - - Provides realistic config for integration tests - - Parameters - ---------- - tmp_test_dir : Path - Root temporary directory - default_config : Dict[str, Any] - Default configuration dict - - Returns - ------- - Path - Path to created YAML config file - """ - config_path = tmp_test_dir / "parameters.yaml" - with open(config_path, "w") as f: - yaml.dump(default_config, f) - return config_path - - -@pytest.fixture -def vaccine_reference_file( - tmp_test_dir: Path, default_vaccine_reference: Dict[str, list] -) -> Path: - """Create a temporary vaccine reference file. - - Real-world significance: - - Tests that need vaccine mapping can load from disk - - Enables testing of vaccine expansion into component diseases - - Matches production vaccine_reference.json location/format - - Parameters - ---------- - tmp_test_dir : Path - Root temporary directory - default_vaccine_reference : Dict[str, list] - Vaccine reference dict - - Returns - ------- - Path - Path to created JSON vaccine reference file - """ - vaccine_ref_path = tmp_test_dir / "vaccine_reference.json" - with open(vaccine_ref_path, "w") as f: - json.dump(default_vaccine_reference, f) - return vaccine_ref_path - - -@pytest.fixture -def run_id() -> str: - """Provide a consistent run ID for testing artifact generation. - - Real-world significance: - - Artifacts are stored with run_id to enable comparing multiple pipeline runs - - Enables tracking of which batch processed which clients - - Required for reproducibility testing - - Returns - ------- - str - Example run ID in format used by production code - """ - return "test_run_20250101_120000" - - -# Markers fixture for organizing test execution -@pytest.fixture(params=["unit", "integration", "e2e"]) -def test_layer(request: pytest.FixtureRequest) -> str: - """Fixture to identify which test layer is running (informational only). - - Real-world significance: - - Documents which test layer is executing (for reporting/analysis) - - Can be used by conftest hooks to apply layer-specific setup - - Yields - ------ - str - Layer name: "unit", "integration", or "e2e" - """ - return request.param