diff --git a/README.md b/README.md
index b6ab1e4..b56766a 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,70 @@
 # pricing-model-lab
+
 Python pricing analysis lab for comparing model-derived values, observed values, and deviation thresholds for decision support workflows.
+
+## Features
+
+- Load a CSV with columns: `item_id`, `observed_value`, `model_value`
+- Validate inputs (numeric checks, missing-value detection)
+- Calculate relative deviation: `(observed_value - model_value) / model_value`
+- Add a `review_flag` column based on a configurable threshold (default **3%**)
+- Output a clean CSV sorted by absolute deviation (largest first)
+- CLI interface via `argparse`
+
+## Project Structure
+
+```
+pricing-model-lab/
+├── data/
+│   └── sample_data.csv        # Synthetic sample dataset
+├── pricing_model_lab/
+│   ├── __init__.py
+│   ├── processor.py           # Core pipeline logic
+│   └── cli.py                 # Command-line interface
+├── tests/
+│   └── test_processor.py      # pytest tests
+├── pyproject.toml
+└── requirements.txt
+```
+
+## Installation
+
+```bash
+pip install -e .
+```
+
+## Usage
+
+### Command Line
+
+```bash
+pricing-model-lab INPUT_CSV OUTPUT_CSV [--threshold THRESHOLD]
+```
+
+| Argument | Description |
+|---|---|
+| `INPUT_CSV` | Path to input CSV (columns: `item_id`, `observed_value`, `model_value`) |
+| `OUTPUT_CSV` | Path for the processed output CSV |
+| `--threshold` | Absolute deviation threshold for the review flag (default: `0.03` → 3%) |
+
+**Example:**
+
+```bash
+pricing-model-lab data/sample_data.csv output.csv --threshold 0.05
+```
+
+### Python API
+
+```python
+from pricing_model_lab.processor import process
+
+result = process("data/sample_data.csv", "output.csv", threshold=0.03)
+print(result.head())
+```
+
+## Running Tests
+
+```bash
+pytest
+```
+
diff --git a/data/sample_data.csv b/data/sample_data.csv
new file mode 100644
index 0000000..e29555b
--- /dev/null
+++ b/data/sample_data.csv
@@ -0,0 +1,21 @@
+item_id,observed_value,model_value
+ITEM_001,102.50,100.00
+ITEM_002,98.00,100.00
+ITEM_003,105.00,100.00
+ITEM_004,107.50,100.00
+ITEM_005,95.00,100.00
+ITEM_006,110.00,100.00
+ITEM_007,99.50,100.00
+ITEM_008,88.00,100.00
+ITEM_009,101.00,100.00
+ITEM_010,115.00,100.00
+ITEM_011,52.00,50.00
+ITEM_012,48.50,50.00
+ITEM_013,53.50,50.00
+ITEM_014,57.00,50.00
+ITEM_015,46.00,50.00
+ITEM_016,200.00,195.00
+ITEM_017,185.00,195.00
+ITEM_018,203.00,195.00
+ITEM_019,210.00,195.00
+ITEM_020,180.00,195.00
diff --git a/pricing_model_lab/__init__.py b/pricing_model_lab/__init__.py
new file mode 100644
index 0000000..84d01a2
--- /dev/null
+++ b/pricing_model_lab/__init__.py
@@ -0,0 +1,3 @@
+"""pricing-model-lab: compare observed vs. model values and flag deviations."""
+
+__version__ = "0.1.0"
diff --git a/pricing_model_lab/cli.py b/pricing_model_lab/cli.py
new file mode 100644
index 0000000..aa5c8cd
--- /dev/null
+++ b/pricing_model_lab/cli.py
@@ -0,0 +1,62 @@
+"""Command-line interface for the pricing model lab."""
+
+from __future__ import annotations
+
+import argparse
+import sys
+
+from pricing_model_lab.processor import DEFAULT_THRESHOLD, process
+
+
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        prog="pricing-model-lab",
+        description=(
+            "Compare observed vs. model values, compute deviations, "
+            "and flag items that exceed a configurable threshold."
+        ),
+    )
+    parser.add_argument(
+        "input",
+        metavar="INPUT_CSV",
+        help="Path to the input CSV file (columns: item_id, observed_value, model_value).",
+    )
+    parser.add_argument(
+        "output",
+        metavar="OUTPUT_CSV",
+        help="Path where the processed output CSV will be written.",
+    )
+    parser.add_argument(
+        "--threshold",
+        type=float,
+        default=DEFAULT_THRESHOLD,
+        metavar="THRESHOLD",
+        help=(
+            "Absolute deviation threshold for the review flag "
+            f"(default: {DEFAULT_THRESHOLD * 100:.0f}%%)."
+        ),
+    )
+    return parser
+
+
+def main(argv: list[str] | None = None) -> None:
+    parser = build_parser()
+    args = parser.parse_args(argv)
+
+    try:
+        result = process(args.input, args.output, threshold=args.threshold)
+    except (FileNotFoundError, ValueError) as exc:
+        print(f"Error: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+    flagged = result["review_flag"].sum()
+    total = len(result)
+    print(
+        f"Processed {total} items. "
+        f"{flagged} flagged (|deviation| > {args.threshold:.2%}). "
+        f"Output written to: {args.output}"
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pricing_model_lab/processor.py b/pricing_model_lab/processor.py
new file mode 100644
index 0000000..c2f8c6a
--- /dev/null
+++ b/pricing_model_lab/processor.py
@@ -0,0 +1,121 @@
+"""Core processing logic for the pricing model lab."""
+
+from __future__ import annotations
+
+import pandas as pd
+
+REQUIRED_COLUMNS = {"item_id", "observed_value", "model_value"}
+DEFAULT_THRESHOLD = 0.03  # 3%
+
+
+def load_csv(filepath: str) -> pd.DataFrame:
+    """Load a CSV file and return a DataFrame.
+
+    Raises
+    ------
+    FileNotFoundError
+        If *filepath* does not exist.
+    ValueError
+        If required columns are missing.
+    """
+    df = pd.read_csv(filepath)
+
+    missing = REQUIRED_COLUMNS - set(df.columns)
+    if missing:
+        raise ValueError(f"CSV is missing required columns: {sorted(missing)}")
+
+    return df
+
+
+def validate(df: pd.DataFrame) -> pd.DataFrame:
+    """Validate that numeric columns are numeric and contain no missing values.
+
+    Returns a cleaned copy with numeric columns cast to float.
+
+    Raises
+    ------
+    ValueError
+        If non-numeric data or missing values are found in *observed_value*
+        or *model_value*.
+    """
+    df = df.copy()
+
+    for col in ("observed_value", "model_value"):
+        df[col] = pd.to_numeric(df[col], errors="coerce")
+
+        if df[col].isna().any():
+            raise ValueError(
+                f"Column '{col}' contains missing or non-numeric values."
+            )
+
+    if (df["model_value"] == 0).any():
+        raise ValueError(
+            "Column 'model_value' contains zero values; deviation is undefined."
+        )
+
+    if df["item_id"].isna().any():
+        raise ValueError("Column 'item_id' contains missing values.")
+
+    return df
+
+
+def calculate_deviation(df: pd.DataFrame) -> pd.DataFrame:
+    """Add a *deviation* column: (observed_value - model_value) / model_value."""
+    df = df.copy()
+    df["deviation"] = (df["observed_value"] - df["model_value"]) / df["model_value"]
+    return df
+
+
+def add_review_flag(
+    df: pd.DataFrame, threshold: float = DEFAULT_THRESHOLD
+) -> pd.DataFrame:
+    """Add a boolean *review_flag* column.
+
+    A row is flagged when the absolute deviation exceeds *threshold*.
+    """
+    if threshold < 0:
+        raise ValueError("threshold must be non-negative.")
+
+    df = df.copy()
+    df["review_flag"] = df["deviation"].abs() > threshold
+    return df
+
+
+def sort_by_abs_deviation(df: pd.DataFrame) -> pd.DataFrame:
+    """Return *df* sorted by absolute deviation in descending order."""
+    df = df.copy()
+    df["_abs_deviation"] = df["deviation"].abs()
+    df = df.sort_values("_abs_deviation", ascending=False).drop(
+        columns=["_abs_deviation"]
+    )
+    return df.reset_index(drop=True)
+
+
+def process(
+    input_path: str,
+    output_path: str,
+    threshold: float = DEFAULT_THRESHOLD,
+) -> pd.DataFrame:
+    """Run the full pipeline and write results to *output_path*.
+
+    Parameters
+    ----------
+    input_path:
+        Path to the input CSV file.
+    output_path:
+        Path where the output CSV will be written.
+    threshold:
+        Absolute deviation threshold for the review flag (default 3 %).
+
+    Returns
+    -------
+    pd.DataFrame
+        The processed DataFrame that was written to *output_path*.
+    """
+    df = load_csv(input_path)
+    df = validate(df)
+    df = calculate_deviation(df)
+    df = add_review_flag(df, threshold=threshold)
+    df = sort_by_abs_deviation(df)
+    df.to_csv(output_path, index=False)
+    return df
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..6c13712
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,24 @@
+[build-system]
+requires = ["setuptools>=68", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "pricing-model-lab"
+version = "0.1.0"
+description = "Compare observed vs. model values, compute deviations, and flag outliers."
+readme = "README.md"
+requires-python = ">=3.9"
+license = { text = "MIT" }
+dependencies = [
+    "pandas>=2.0",
+]
+
+[project.scripts]
+pricing-model-lab = "pricing_model_lab.cli:main"
+
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["pricing_model_lab*"]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..3ceaf54
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+pandas>=2.0
+pytest>=7.0
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_processor.py b/tests/test_processor.py
new file mode 100644
index 0000000..414349e
--- /dev/null
+++ b/tests/test_processor.py
@@ -0,0 +1,202 @@
+"""Tests for pricing_model_lab.processor."""
+
+from __future__ import annotations
+
+import io
+import textwrap
+
+import pandas as pd
+import pytest
+
+from pricing_model_lab.processor import (
+    add_review_flag,
+    calculate_deviation,
+    load_csv,
+    process,
+    sort_by_abs_deviation,
+    validate,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_df(
+    item_ids=("A", "B", "C"),
+    observed=(102.0, 98.0, 110.0),
+    model=(100.0, 100.0, 100.0),
+) -> pd.DataFrame:
+    return pd.DataFrame(
+        {
+            "item_id": list(item_ids),
+            "observed_value": list(observed),
+            "model_value": list(model),
+        }
+    )
+
+
+# ---------------------------------------------------------------------------
+# load_csv
+# ---------------------------------------------------------------------------
+
+def test_load_csv_success(tmp_path):
+    csv_content = "item_id,observed_value,model_value\nA,100,100\n"
+    p = tmp_path / "data.csv"
+    p.write_text(csv_content)
+    df = load_csv(str(p))
+    assert list(df.columns) == ["item_id", "observed_value", "model_value"]
+    assert len(df) == 1
+
+
+def test_load_csv_missing_columns(tmp_path):
+    p = tmp_path / "bad.csv"
+    p.write_text("item_id,observed_value\nA,100\n")
+    with pytest.raises(ValueError, match="missing required columns"):
+        load_csv(str(p))
+
+
+def test_load_csv_file_not_found():
+    with pytest.raises(FileNotFoundError):
+        load_csv("/nonexistent/path/file.csv")
+
+
+# ---------------------------------------------------------------------------
+# validate
+# ---------------------------------------------------------------------------
+
+def test_validate_success():
+    df = _make_df()
+    result = validate(df)
+    assert result["observed_value"].dtype == float
+    assert result["model_value"].dtype == float
+
+
+def test_validate_non_numeric_observed():
+    df = _make_df(observed=("bad", 98.0, 110.0))
+    with pytest.raises(ValueError, match="observed_value"):
+        validate(df)
+
+
+def test_validate_non_numeric_model():
+    df = _make_df(model=(100.0, "n/a", 100.0))
+    with pytest.raises(ValueError, match="model_value"):
+        validate(df)
+
+
+def test_validate_missing_observed():
+    df = _make_df(observed=(None, 98.0, 110.0))
+    with pytest.raises(ValueError, match="observed_value"):
+        validate(df)
+
+
+def test_validate_zero_model_value():
+    df = _make_df(model=(0.0, 100.0, 100.0))
+    with pytest.raises(ValueError, match="zero values"):
+        validate(df)
+
+
+def test_validate_missing_item_id():
+    df = _make_df(item_ids=(None, "B", "C"))
+    with pytest.raises(ValueError, match="item_id"):
+        validate(df)
+
+
+# ---------------------------------------------------------------------------
+# calculate_deviation
+# ---------------------------------------------------------------------------
+
+def test_calculate_deviation_values():
+    df = _make_df(observed=(110.0, 90.0, 100.0), model=(100.0, 100.0, 100.0))
+    result = calculate_deviation(df)
+    assert "deviation" in result.columns
+    assert result.loc[0, "deviation"] == pytest.approx(0.10)
+    assert result.loc[1, "deviation"] == pytest.approx(-0.10)
+    assert result.loc[2, "deviation"] == pytest.approx(0.00)
+
+
+def test_calculate_deviation_does_not_mutate():
+    df = _make_df()
+    original_cols = list(df.columns)
+    calculate_deviation(df)
+    assert list(df.columns) == original_cols
+
+
+# ---------------------------------------------------------------------------
+# add_review_flag
+# ---------------------------------------------------------------------------
+
+def test_add_review_flag_default_threshold():
+    df = _make_df(observed=(103.1, 96.9, 100.0), model=(100.0, 100.0, 100.0))
+    df = calculate_deviation(df)
+    result = add_review_flag(df)
+    # deviations: 0.031, -0.031, 0.0
+    assert result.loc[0, "review_flag"]
+    assert result.loc[1, "review_flag"]
+    assert not result.loc[2, "review_flag"]
+
+
+def test_add_review_flag_custom_threshold():
+    df = _make_df(observed=(105.0, 100.0, 100.0), model=(100.0, 100.0, 100.0))
+    df = calculate_deviation(df)
+    result = add_review_flag(df, threshold=0.10)
+    # deviation 5% < 10% → not flagged
+    assert not result.loc[0, "review_flag"]
+
+
+def test_add_review_flag_negative_threshold():
+    df = calculate_deviation(_make_df())
+    with pytest.raises(ValueError, match="non-negative"):
+        add_review_flag(df, threshold=-0.01)
+
+
+# ---------------------------------------------------------------------------
+# sort_by_abs_deviation
+# ---------------------------------------------------------------------------
+
+def test_sort_by_abs_deviation_order():
+    df = _make_df(observed=(101.0, 120.0, 95.0), model=(100.0, 100.0, 100.0))
+    df = calculate_deviation(df)
+    result = sort_by_abs_deviation(df)
+    abs_devs = result["deviation"].abs().tolist()
+    assert abs_devs == sorted(abs_devs, reverse=True)
+
+
+def test_sort_by_abs_deviation_no_extra_columns():
+    df = calculate_deviation(_make_df())
+    result = sort_by_abs_deviation(df)
+    assert "_abs_deviation" not in result.columns
+
+
+# ---------------------------------------------------------------------------
+# process (end-to-end)
+# ---------------------------------------------------------------------------
+
+def test_process_end_to_end(tmp_path):
+    csv_content = textwrap.dedent("""\
+        item_id,observed_value,model_value
+        X,110,100
+        Y,100,100
+        Z,85,100
+    """)
+    input_path = tmp_path / "input.csv"
+    input_path.write_text(csv_content)
+    output_path = tmp_path / "output.csv"
+
+    result = process(str(input_path), str(output_path), threshold=0.03)
+
+    assert output_path.exists()
+    written = pd.read_csv(str(output_path))
+    assert list(written.columns) == [
+        "item_id", "observed_value", "model_value", "deviation", "review_flag"
+    ]
+    # Z has largest abs deviation (−15%), X second (10%), Y zero
+    assert written.loc[0, "item_id"] == "Z"
+    assert written.loc[1, "item_id"] == "X"
+    assert written.loc[2, "item_id"] == "Y"
+    # Z and X should be flagged; Y should not
+    assert written.loc[0, "review_flag"]
+    assert written.loc[1, "review_flag"]
+    assert not written.loc[2, "review_flag"]
+    # row count matches
+    assert len(result) == 3