Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 54 additions & 111 deletions docs/TESTING_STANDARDS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@ This document defines the testing strategy and organizational standards for the

Tests are organized in three layers to provide different types of validation at different speeds.

## Strategic Principles (Pre-1.0 Refinement)

As part of the pre-1.0 release strategy, the following principles guide the test suite:

1. **Contracts over Defensiveness:** Focus on validating the boundaries between steps (inputs/outputs on disk) rather than repetitive internal logic checks in every module.
2. **Minimal E2E Smoke Tests:** Preserve exactly one full pipeline path per language (English and French) to ensure end-to-end viability without overwhelming the suite with redundant E2E variants.
3. **Consolidated Units:** Merge tests for tightly coupled modules (e.g., config loading and validation) to reduce file overhead and improve maintainability.
4. **Deterministic and Isolated:** Tests must not rely on shared global state or external services. All PDF compilation must satisfy project-root path constraints.

## Frameworks and Metrics Used

`pytest` is the framework used to write and run tests for the codebase. As a metric to determine the percentage of source code that is executed during testing, code coverage is used. `pytest-cov` is used to determine whether there are areas in the codebase that are not executed during testing, which may contribute to bugs. Code coverage is integrated in our GitHub actions when a pull request is made to ensure that new additions to the main code base are tested.
Expand All @@ -16,33 +25,28 @@ Tests are organized in three layers to provide different types of validation at

```
tests/
├── unit/ # Unit tests (one per module)
│ ├── test_config_loader.py
├── unit/ # Unit tests (one per module/contract)
│ ├── test_config_loader.py # Merged loader + validation
│ ├── test_preprocess.py
│ ├── test_generate_notices.py
│ ├── test_generate_notices.py # Merged template loading
│ ├── test_generate_qr_codes.py
│ ├── test_compile_notices.py
│ ├── test_count_pdfs.py
│ ├── test_validate_pdfs.py
│ ├── test_encrypt_notice.py
│ ├── test_batch_pdfs.py
│ ├── test_bundle_pdfs.py
│ ├── test_cleanup.py
│ ├── test_prepare_output.py
│ ├── test_enums.py
│ ├── test_data_models.py
│ ├── test_data_models.py # Trimmed to custom logic
│ ├── test_utils.py
│ └── test_run_pipeline.py
│ └── test_orchestrator.py # Merged lang failure paths
├── integration/ # Integration tests (step interactions)
│ ├── test_pipeline_preprocess_to_qr.py
│ ├── test_pipeline_notices_to_compile.py
│ ├── test_pipeline_pdf_validation.py
│ ├── test_artifact_schema.py
│ └── test_config_driven_behavior.py
│ ├── test_custom_templates.py
│ ├── test_error_propagation.py # Philosophy/Fail-fast contract
│ └── test_translation_integration.py
├── e2e/ # End-to-end tests (full pipeline)
│ ├── test_full_pipeline_en.py
│ ├── test_full_pipeline_fr.py
│ └── test_pipeline_edge_cases.py
│ └── test_full_pipeline.py # EN and FR paths (smoke tests)
├── fixtures/ # Shared test utilities
│ ├── conftest.py # Pytest fixtures
Expand Down Expand Up @@ -166,106 +170,45 @@ def project_root() -> Path:
return Path(__file__).parent.parent.parent # tests/e2e/... → project root

@pytest.mark.e2e
def test_full_pipeline_english(project_root: Path) -> None:
"""E2E: Complete pipeline generates PDF output for English input.

Real-world significance:
- Verifies full 9-step pipeline works end-to-end
- Ensures PDF files are created with correct names and counts
- Tests English language variant (French tested separately)

Parameters
----------
project_root : Path
Fixture providing absolute path to project root
class TestFullPipelineExecution:
"""E2E: Complete pipeline generates PDF output for multiple languages."""

@pytest.fixture
def project_root(self) -> Path:
"""Fixture providing absolute path to project root."""
return Path(__file__).resolve().parent.parent.parent

@pytest.fixture
def e2e_workdir(self, project_root: Path, tmp_path_factory: pytest.TempPathFactory) -> Path:
"""Create a temporary workdir within project root for path resolution."""
workdir = project_root / f"tmp_e2e_{tmp_path_factory.mktemp('e2e').name}"
workdir.mkdir(parents=True, exist_ok=True)
# Prepare subdirs...
return workdir

def test_full_pipeline_english(self, project_root: Path, e2e_workdir: Path) -> None:
"""Verifies full 9-step pipeline works end-to-end for English."""
# Run pipeline using 'uv run viper' in project_root...
pass
```

Raises
------
AssertionError
If pipeline exit code is non-zero or PDF count incorrect
### Feature Toggle Testing (Config Overrides)

Assertion: Pipeline succeeds and generates correct number of PDFs
"""
input_dir = project_root / "input"
output_dir = project_root / "output"

input_file = input_dir / "e2e_test_clients.xlsx"
# Create test Excel file...

# Run pipeline with project_root as CWD (not tmp_path)
result = subprocess.run(
["uv", "run", "viper", input_file.name, "en"],
cwd=str(project_root),
capture_output=True,
text=True
)

assert result.returncode == 0
pdfs = list((output_dir / "pdf_individual").glob("*.pdf"))
assert len(pdfs) == 3
```
While E2E tests are reserved for full smoke paths, feature toggles (e.g., QR enabled/disabled) should primarily be tested at the **Integration** level. This avoids the overhead of full PDF compilation for every configuration variant.

### Configuration Override Pattern for Feature Testing
**Preferred Pattern:**
In `tests/integration/test_pipeline_contracts.py`, use a modular approach to verify that configuration flags correctly skip or include specific step logic.

**Solution:**
```python
import yaml
from pathlib import Path

@pytest.mark.e2e
def test_pipeline_with_qr_disabled(project_root: Path) -> None:
"""E2E: QR code generation can be disabled via config.
@pytest.mark.integration
def test_pipeline_skips_qr_when_disabled(tmp_path: Path):
"""Integration: verify that 'qr.enabled=False' skips Step 3.

Real-world significance:
- Verifies feature flags in config actually control pipeline behavior
- Tests that disabled QR generation doesn't crash pipeline
- Ensures config-driven behavior is deterministic and testable

Parameters
----------
project_root : Path
Fixture providing absolute path to project root

Raises
------
AssertionError
If QR code generation is not skipped when disabled

Notes
-----
Always restores original config in finally block to prevent test pollution.
- Feature flags must be deterministic and respected
- Avoids unnecessary processing time when features aren't needed
"""
config_path = project_root / "config" / "parameters.yaml"

# Load original config
with open(config_path) as f:
original_config = yaml.safe_load(f)

try:
# Modify config
original_config["qr"]["enabled"] = False
with open(config_path, "w") as f:
yaml.dump(original_config, f)

# Run pipeline
result = subprocess.run(
["uv", "run", "viper", "test_input.xlsx", "en"],
cwd=str(project_root),
capture_output=True,
text=True
)

# Verify QR generation was skipped
assert result.returncode == 0
assert "Step 3: Generating QR codes" not in result.stdout
qr_dir = project_root / "output" / "artifacts" / "qr_codes"
assert not qr_dir.exists() or len(list(qr_dir.glob("*.png"))) == 0

finally:
# Restore original config
original_config["qr"]["enabled"] = True
with open(config_path, "w") as f:
yaml.dump(original_config, f)
# Logic to verify step bypass or artifact omission
```

### Input/Output Fixture Pattern
Expand Down Expand Up @@ -633,14 +576,14 @@ def test_preprocess_sorts_clients_deterministically():

## Test Coverage Goals

- **scripts/**: >80% code coverage
- **Pipeline orchestration**: >60% coverage (harder to test due to I/O)
- **pipeline/**: >80% code coverage
- **Pipeline orchestration**: >60% coverage (verified via combination of unit and integration tests)
- **Critical path (Steps 1–6)**: >90% coverage
- **Optional features (Steps 7–9)**: >70% coverage

Run coverage reports with:
```bash
uv run pytest --cov=scripts --cov-report=html
uv run pytest --cov=pipeline --cov-report=html
```

View results in `htmlcov/index.html`.
2 changes: 1 addition & 1 deletion pipeline/bundle_pdfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def build_client_lookup(
clients_obj = artifact.get("clients", [])
clients = clients_obj if isinstance(clients_obj, list) else []
lookup: Dict[tuple[str, str], dict] = {}
for client in clients: # type: ignore[var-annotated]
for client in clients:
sequence = client.get("sequence") # type: ignore[attr-defined]
client_id = client.get("client_id") # type: ignore[attr-defined]
lookup[(sequence, client_id)] = client # type: ignore[typeddict-item]
Expand Down
25 changes: 13 additions & 12 deletions pipeline/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@

THRESHOLD = 80


def convert_date_string(
date_str: str | datetime | pd.Timestamp, locale: str = "en"
) -> str | None:
Expand Down Expand Up @@ -172,6 +173,7 @@ def format_iso_date_for_language(iso_date: str, language: str) -> str:

return format_date(date_obj, format="long", locale=locale)


def check_addresses_complete(df: pd.DataFrame) -> pd.DataFrame:
"""
Check if address fields are complete in the DataFrame.
Expand All @@ -192,17 +194,13 @@ def check_addresses_complete(df: pd.DataFrame) -> pd.DataFrame:
]

for col in address_cols:
df[col] = (
df[col]
.astype(str)
.str.strip()
.replace({"": pd.NA, "nan": pd.NA})
)
df[col] = df[col].astype(str).str.strip().replace({"": pd.NA, "nan": pd.NA})

# Build combined address line
df["ADDRESS"] = (
df["STREET_ADDRESS_LINE_1"].fillna("") + " " +
df["STREET_ADDRESS_LINE_2"].fillna("")
df["STREET_ADDRESS_LINE_1"].fillna("")
+ " "
+ df["STREET_ADDRESS_LINE_2"].fillna("")
).str.strip()

df["ADDRESS"] = df["ADDRESS"].replace({"": pd.NA})
Expand Down Expand Up @@ -282,6 +280,7 @@ def over_16_check(date_of_birth, date_notice_delivery):

return age >= 16


def configure_logging(output_dir: Path, run_id: str) -> Path:
"""Configure file logging for the preprocessing step.

Expand Down Expand Up @@ -387,6 +386,7 @@ def read_input(file_path: Path) -> pd.DataFrame:
LOG.error("Failed to read %s: %s", file_path, exc)
raise


def normalize(col: str) -> str:
"""Normalize formatting prior to matching."""

Expand Down Expand Up @@ -443,11 +443,10 @@ def map_columns(df: pd.DataFrame, required_columns=REQUIRED_COLUMNS):

# Check each input column against required columns
for input_col in normalized_input_cols:

col_name, score, index = process.extractOne(
query=input_col,
choices=[normalize(req) for req in required_columns],
scorer=fuzz.partial_ratio
scorer=fuzz.partial_ratio,
)

# Remove column if it has a score of 0
Expand All @@ -460,9 +459,10 @@ def map_columns(df: pd.DataFrame, required_columns=REQUIRED_COLUMNS):

# print colname and score for debugging
print(f"Matching '{input_col}' to '{best_match}' with score {score}")

return df.rename(columns=col_map), col_map


def filter_columns(
df: pd.DataFrame, required_columns: list[str] = REQUIRED_COLUMNS
) -> pd.DataFrame:
Expand All @@ -472,6 +472,7 @@ def filter_columns(

return df[[col for col in df.columns if col in required_columns]]


def ensure_required_columns(df: pd.DataFrame) -> pd.DataFrame:
"""Normalize column names and validate that all required columns are present.

Expand Down Expand Up @@ -767,7 +768,7 @@ def build_preprocess_result(
sorted_df["SEQUENCE"] = [f"{idx + 1:05d}" for idx in range(len(sorted_df))]

clients: List[ClientRecord] = []
for row in sorted_df.itertuples(index=False): # type: ignore[attr-defined]
for row in sorted_df.itertuples(index=False):
client_id = str(row.CLIENT_ID) # type: ignore[attr-defined]
sequence = row.SEQUENCE # type: ignore[attr-defined]
dob_iso = (
Expand Down
Loading