dantheman-dev · Copilot · May 4, 2026 · May 4, 2026
diff --git a/README.md b/README.md
@@ -1,2 +1,70 @@
 # pricing-model-lab
+
 Python pricing analysis lab for comparing model-derived values, observed values, and deviation thresholds for decision support workflows.
+
+## Features
+
+- Load a CSV with columns: `item_id`, `observed_value`, `model_value`
+- Validate inputs (numeric checks, missing-value detection)
+- Calculate relative deviation: `(observed_value - model_value) / model_value`
+- Add a `review_flag` column based on a configurable threshold (default **3%**)
+- Output a clean CSV sorted by absolute deviation (largest first)
+- CLI interface via `argparse`
+
+## Project Structure
+
+```
+pricing-model-lab/
+├── data/
+│   └── sample_data.csv        # Synthetic sample dataset
+├── pricing_model_lab/
+│   ├── __init__.py
+│   ├── processor.py           # Core pipeline logic
+│   └── cli.py                 # Command-line interface
+├── tests/
+│   └── test_processor.py      # pytest tests
+├── pyproject.toml
+└── requirements.txt
+```
+
+## Installation
+
+```bash
+pip install -e .
+```
+
+## Usage
+
+### Command Line
+
+```bash
+pricing-model-lab INPUT_CSV OUTPUT_CSV [--threshold THRESHOLD]
+```
+
+| Argument | Description |
+|---|---|
+| `INPUT_CSV` | Path to input CSV (columns: `item_id`, `observed_value`, `model_value`) |
+| `OUTPUT_CSV` | Path for the processed output CSV |
+| `--threshold` | Absolute deviation threshold for the review flag (default: `0.03` → 3%) |
+
+**Example:**
+
+```bash
+pricing-model-lab data/sample_data.csv output.csv --threshold 0.05
+```
+
+### Python API
+
+```python
+from pricing_model_lab.processor import process
+
+result = process("data/sample_data.csv", "output.csv", threshold=0.03)
+print(result.head())
+```
+
+## Running Tests
+
+```bash
+pytest
+```
+
diff --git a/data/sample_data.csv b/data/sample_data.csv
@@ -0,0 +1,21 @@
+item_id,observed_value,model_value
+ITEM_001,102.50,100.00
+ITEM_002,98.00,100.00
+ITEM_003,105.00,100.00
+ITEM_004,107.50,100.00
+ITEM_005,95.00,100.00
+ITEM_006,110.00,100.00
+ITEM_007,99.50,100.00
+ITEM_008,88.00,100.00
+ITEM_009,101.00,100.00
+ITEM_010,115.00,100.00
+ITEM_011,52.00,50.00
+ITEM_012,48.50,50.00
+ITEM_013,53.50,50.00
+ITEM_014,57.00,50.00
+ITEM_015,46.00,50.00
+ITEM_016,200.00,195.00
+ITEM_017,185.00,195.00
+ITEM_018,203.00,195.00
+ITEM_019,210.00,195.00
+ITEM_020,180.00,195.00
diff --git a/pricing_model_lab/__init__.py b/pricing_model_lab/__init__.py
@@ -0,0 +1,3 @@
+"""pricing-model-lab: compare observed vs. model values and flag deviations."""
+
+__version__ = "0.1.0"
diff --git a/pricing_model_lab/cli.py b/pricing_model_lab/cli.py
@@ -0,0 +1,62 @@
+"""Command-line interface for the pricing model lab."""
+
+from __future__ import annotations
+
+import argparse
+import sys
+
+from pricing_model_lab.processor import DEFAULT_THRESHOLD, process
+
+
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        prog="pricing-model-lab",
+        description=(
+            "Compare observed vs. model values, compute deviations, "
+            "and flag items that exceed a configurable threshold."
+        ),
+    )
+    parser.add_argument(
+        "input",
+        metavar="INPUT_CSV",
+        help="Path to the input CSV file (columns: item_id, observed_value, model_value).",
+    )
+    parser.add_argument(
+        "output",
+        metavar="OUTPUT_CSV",
+        help="Path where the processed output CSV will be written.",
+    )
+    parser.add_argument(
+        "--threshold",
+        type=float,
+        default=DEFAULT_THRESHOLD,
+        metavar="THRESHOLD",
+        help=(
+            "Absolute deviation threshold for the review flag "
+            f"(default: {DEFAULT_THRESHOLD * 100:.0f}%%)."
+        ),
+    )
+    return parser
+
+
+def main(argv: list[str] | None = None) -> None:
+    parser = build_parser()
+    args = parser.parse_args(argv)
+
+    try:
+        result = process(args.input, args.output, threshold=args.threshold)
+    except (FileNotFoundError, ValueError) as exc:
+        print(f"Error: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+    flagged = result["review_flag"].sum()
+    total = len(result)
+    print(
+        f"Processed {total} items. "
+        f"{flagged} flagged (|deviation| > {args.threshold:.2%}). "
+        f"Output written to: {args.output}"
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pricing_model_lab/processor.py b/pricing_model_lab/processor.py
@@ -0,0 +1,121 @@
+"""Core processing logic for the pricing model lab."""
+
+from __future__ import annotations
+
+import pandas as pd
+
+REQUIRED_COLUMNS = {"item_id", "observed_value", "model_value"}
+DEFAULT_THRESHOLD = 0.03  # 3%
+
+
+def load_csv(filepath: str) -> pd.DataFrame:
+    """Load a CSV file and return a DataFrame.
+
+    Raises
+    ------
+    FileNotFoundError
+        If *filepath* does not exist.
+    ValueError
+        If required columns are missing.
+    """
+    df = pd.read_csv(filepath)
+
+    missing = REQUIRED_COLUMNS - set(df.columns)
+    if missing:
+        raise ValueError(f"CSV is missing required columns: {sorted(missing)}")
+
+    return df
+
+
+def validate(df: pd.DataFrame) -> pd.DataFrame:
+    """Validate that numeric columns are numeric and contain no missing values.
+
+    Returns a cleaned copy with numeric columns cast to float.
+
+    Raises
+    ------
+    ValueError
+        If non-numeric data or missing values are found in *observed_value*
+        or *model_value*.
+    """
+    df = df.copy()
+
+    for col in ("observed_value", "model_value"):
+        df[col] = pd.to_numeric(df[col], errors="coerce")
+
+        if df[col].isna().any():
+            raise ValueError(
+                f"Column '{col}' contains missing or non-numeric values."
+            )
+
+    if (df["model_value"] == 0).any():
+        raise ValueError(
+            "Column 'model_value' contains zero values; deviation is undefined."
+        )
+
+    if df["item_id"].isna().any():
+        raise ValueError("Column 'item_id' contains missing values.")
+
+    return df
+
+
+def calculate_deviation(df: pd.DataFrame) -> pd.DataFrame:
+    """Add a *deviation* column: (observed_value - model_value) / model_value."""
+    df = df.copy()
+    df["deviation"] = (df["observed_value"] - df["model_value"]) / df["model_value"]
+    return df
+
+
+def add_review_flag(
+    df: pd.DataFrame, threshold: float = DEFAULT_THRESHOLD
+) -> pd.DataFrame:
+    """Add a boolean *review_flag* column.
+
+    A row is flagged when the absolute deviation exceeds *threshold*.
+    """
+    if threshold < 0:
+        raise ValueError("threshold must be non-negative.")
+
+    df = df.copy()
+    df["review_flag"] = df["deviation"].abs() > threshold
+    return df
+
+
+def sort_by_abs_deviation(df: pd.DataFrame) -> pd.DataFrame:
+    """Return *df* sorted by absolute deviation in descending order."""
+    df = df.copy()
+    df["_abs_deviation"] = df["deviation"].abs()
+    df = df.sort_values("_abs_deviation", ascending=False).drop(
+        columns=["_abs_deviation"]
+    )
+    return df.reset_index(drop=True)
+
+
+def process(
+    input_path: str,
+    output_path: str,
+    threshold: float = DEFAULT_THRESHOLD,
+) -> pd.DataFrame:
+    """Run the full pipeline and write results to *output_path*.
+
+    Parameters
+    ----------
+    input_path:
+        Path to the input CSV file.
+    output_path:
+        Path where the output CSV will be written.
+    threshold:
+        Absolute deviation threshold for the review flag (default 3 %).
+
+    Returns
+    -------
+    pd.DataFrame
+        The processed DataFrame that was written to *output_path*.
+    """
+    df = load_csv(input_path)
+    df = validate(df)
+    df = calculate_deviation(df)
+    df = add_review_flag(df, threshold=threshold)
+    df = sort_by_abs_deviation(df)
+    df.to_csv(output_path, index=False)
+    return df
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,24 @@
+[build-system]
+requires = ["setuptools>=68", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "pricing-model-lab"
+version = "0.1.0"
+description = "Compare observed vs. model values, compute deviations, and flag outliers."
+readme = "README.md"
+requires-python = ">=3.9"
+license = { text = "MIT" }
+dependencies = [
+    "pandas>=2.0",
+]
+
+[project.scripts]
+pricing-model-lab = "pricing_model_lab.cli:main"
+
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["pricing_model_lab*"]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,2 @@
+pandas>=2.0
+pytest>=7.0
diff --git a/tests/__init__.py b/tests/__init__.py
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		"""pricing-model-lab: compare observed vs. model values and flag deviations."""

		__version__ = "0.1.0"