Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,70 @@
# pricing-model-lab

Python pricing analysis lab for comparing model-derived values, observed values, and deviation thresholds for decision support workflows.

## Features

- Load a CSV with columns: `item_id`, `observed_value`, `model_value`
- Validate inputs (numeric checks, missing-value detection)
- Calculate relative deviation: `(observed_value - model_value) / model_value`
- Add a `review_flag` column based on a configurable threshold (default **3%**)
- Output a clean CSV sorted by absolute deviation (largest first)
- CLI interface via `argparse`

## Project Structure

```
pricing-model-lab/
├── data/
│ └── sample_data.csv # Synthetic sample dataset
├── pricing_model_lab/
│ ├── __init__.py
│ ├── processor.py # Core pipeline logic
│ └── cli.py # Command-line interface
├── tests/
│ └── test_processor.py # pytest tests
├── pyproject.toml
└── requirements.txt
```

## Installation

```bash
pip install -e .
```

## Usage

### Command Line

```bash
pricing-model-lab INPUT_CSV OUTPUT_CSV [--threshold THRESHOLD]
```

| Argument | Description |
|---|---|
| `INPUT_CSV` | Path to input CSV (columns: `item_id`, `observed_value`, `model_value`) |
| `OUTPUT_CSV` | Path for the processed output CSV |
| `--threshold` | Absolute deviation threshold for the review flag (default: `0.03` → 3%) |

**Example:**

```bash
pricing-model-lab data/sample_data.csv output.csv --threshold 0.05
```

### Python API

```python
from pricing_model_lab.processor import process

result = process("data/sample_data.csv", "output.csv", threshold=0.03)
print(result.head())
```

## Running Tests

```bash
pytest
```

21 changes: 21 additions & 0 deletions data/sample_data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
item_id,observed_value,model_value
ITEM_001,102.50,100.00
ITEM_002,98.00,100.00
ITEM_003,105.00,100.00
ITEM_004,107.50,100.00
ITEM_005,95.00,100.00
ITEM_006,110.00,100.00
ITEM_007,99.50,100.00
ITEM_008,88.00,100.00
ITEM_009,101.00,100.00
ITEM_010,115.00,100.00
ITEM_011,52.00,50.00
ITEM_012,48.50,50.00
ITEM_013,53.50,50.00
ITEM_014,57.00,50.00
ITEM_015,46.00,50.00
ITEM_016,200.00,195.00
ITEM_017,185.00,195.00
ITEM_018,203.00,195.00
ITEM_019,210.00,195.00
ITEM_020,180.00,195.00
3 changes: 3 additions & 0 deletions pricing_model_lab/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""pricing-model-lab: compare observed vs. model values and flag deviations."""

__version__ = "0.1.0"
62 changes: 62 additions & 0 deletions pricing_model_lab/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
"""Command-line interface for the pricing model lab."""

from __future__ import annotations

import argparse
import sys

from pricing_model_lab.processor import DEFAULT_THRESHOLD, process


def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog="pricing-model-lab",
description=(
"Compare observed vs. model values, compute deviations, "
"and flag items that exceed a configurable threshold."
),
)
parser.add_argument(
"input",
metavar="INPUT_CSV",
help="Path to the input CSV file (columns: item_id, observed_value, model_value).",
)
parser.add_argument(
"output",
metavar="OUTPUT_CSV",
help="Path where the processed output CSV will be written.",
)
parser.add_argument(
"--threshold",
type=float,
default=DEFAULT_THRESHOLD,
metavar="THRESHOLD",
help=(
"Absolute deviation threshold for the review flag "
f"(default: {DEFAULT_THRESHOLD * 100:.0f}%%)."
),
)
return parser


def main(argv: list[str] | None = None) -> None:
parser = build_parser()
args = parser.parse_args(argv)

try:
result = process(args.input, args.output, threshold=args.threshold)
except (FileNotFoundError, ValueError) as exc:
print(f"Error: {exc}", file=sys.stderr)
sys.exit(1)

flagged = result["review_flag"].sum()
total = len(result)
print(
f"Processed {total} items. "
f"{flagged} flagged (|deviation| > {args.threshold:.2%}). "
f"Output written to: {args.output}"
)


if __name__ == "__main__":
main()
121 changes: 121 additions & 0 deletions pricing_model_lab/processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
"""Core processing logic for the pricing model lab."""

from __future__ import annotations

import pandas as pd

REQUIRED_COLUMNS = {"item_id", "observed_value", "model_value"}
DEFAULT_THRESHOLD = 0.03 # 3%


def load_csv(filepath: str) -> pd.DataFrame:
"""Load a CSV file and return a DataFrame.

Raises
------
FileNotFoundError
If *filepath* does not exist.
ValueError
If required columns are missing.
"""
df = pd.read_csv(filepath)

missing = REQUIRED_COLUMNS - set(df.columns)
if missing:
raise ValueError(f"CSV is missing required columns: {sorted(missing)}")

return df


def validate(df: pd.DataFrame) -> pd.DataFrame:
"""Validate that numeric columns are numeric and contain no missing values.

Returns a cleaned copy with numeric columns cast to float.

Raises
------
ValueError
If non-numeric data or missing values are found in *observed_value*
or *model_value*.
"""
df = df.copy()

for col in ("observed_value", "model_value"):
df[col] = pd.to_numeric(df[col], errors="coerce")

if df[col].isna().any():
raise ValueError(
f"Column '{col}' contains missing or non-numeric values."
)

if (df["model_value"] == 0).any():
raise ValueError(
"Column 'model_value' contains zero values; deviation is undefined."
)

if df["item_id"].isna().any():
raise ValueError("Column 'item_id' contains missing values.")

return df


def calculate_deviation(df: pd.DataFrame) -> pd.DataFrame:
"""Add a *deviation* column: (observed_value - model_value) / model_value."""
df = df.copy()
df["deviation"] = (df["observed_value"] - df["model_value"]) / df["model_value"]
return df


def add_review_flag(
df: pd.DataFrame, threshold: float = DEFAULT_THRESHOLD
) -> pd.DataFrame:
"""Add a boolean *review_flag* column.

A row is flagged when the absolute deviation exceeds *threshold*.
"""
if threshold < 0:
raise ValueError("threshold must be non-negative.")

df = df.copy()
df["review_flag"] = df["deviation"].abs() > threshold
return df


def sort_by_abs_deviation(df: pd.DataFrame) -> pd.DataFrame:
"""Return *df* sorted by absolute deviation in descending order."""
df = df.copy()
df["_abs_deviation"] = df["deviation"].abs()
df = df.sort_values("_abs_deviation", ascending=False).drop(
columns=["_abs_deviation"]
)
return df.reset_index(drop=True)


def process(
input_path: str,
output_path: str,
threshold: float = DEFAULT_THRESHOLD,
) -> pd.DataFrame:
"""Run the full pipeline and write results to *output_path*.

Parameters
----------
input_path:
Path to the input CSV file.
output_path:
Path where the output CSV will be written.
threshold:
Absolute deviation threshold for the review flag (default 3 %).

Returns
-------
pd.DataFrame
The processed DataFrame that was written to *output_path*.
"""
df = load_csv(input_path)
df = validate(df)
df = calculate_deviation(df)
df = add_review_flag(df, threshold=threshold)
df = sort_by_abs_deviation(df)
df.to_csv(output_path, index=False)
return df
24 changes: 24 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
[build-system]
requires = ["setuptools>=68", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "pricing-model-lab"
version = "0.1.0"
description = "Compare observed vs. model values, compute deviations, and flag outliers."
readme = "README.md"
requires-python = ">=3.9"
license = { text = "MIT" }
dependencies = [
"pandas>=2.0",
]

[project.scripts]
pricing-model-lab = "pricing_model_lab.cli:main"

[tool.setuptools.packages.find]
where = ["."]
include = ["pricing_model_lab*"]

[tool.pytest.ini_options]
testpaths = ["tests"]
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pandas>=2.0
pytest>=7.0
Empty file added tests/__init__.py
Empty file.
Loading