From a5df4c6db3ce1b1940c118f8d6c4818084ab2df1 Mon Sep 17 00:00:00 2001
From: "Sheriff .n Ibrahim" <ibrahimsheriff999@gmail.com>
Date: Mon, 9 Mar 2026 14:31:36 +0100
Subject: [PATCH 1/2] Add Week 8 exercise for IbrahimSheriff: SpecialistAgent,
 FrontierAgent, ChromaDB, ensemble evaluation, and Gradio UI

---
 .../IbrahimSheriff/pricer_beat_39.ipynb       | 432 ++++++++++++++++++
 .../IbrahimSheriff/week8_exercise.ipynb       | 348 ++++++++++++++
 2 files changed, 780 insertions(+)
 create mode 100644 week7/community_contributions/IbrahimSheriff/pricer_beat_39.ipynb
 create mode 100644 week8/community_contributions/IbrahimSheriff/week8_exercise.ipynb

diff --git a/week7/community_contributions/IbrahimSheriff/pricer_beat_39.ipynb b/week7/community_contributions/IbrahimSheriff/pricer_beat_39.ipynb
new file mode 100644
index 0000000000..210c2b5617
--- /dev/null
+++ b/week7/community_contributions/IbrahimSheriff/pricer_beat_39.ipynb
@@ -0,0 +1,432 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# Week 7: Beat the 39 — Price prediction (IbrahimSheriff2)\n",
+        "\n",
+        "**Goal:** Get average absolute error **below 39.85** (ideally into the lower 30s).  \n",
+        "**Metric:** Same as instructor — average $ error on 250 test samples.  \n",
+        "**HF user:** `sheriff`"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "%pip install -q pandas scikit-learn datasets transformers torch peft bitsandbytes trl accelerate matplotlib python-dotenv"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "import re\n",
+        "import math\n",
+        "from pathlib import Path\n",
+        "from datetime import datetime\n",
+        "import torch\n",
+        "import numpy as np\n",
+        "from tqdm import tqdm\n",
+        "from dotenv import load_dotenv\n",
+        "from datasets import load_dataset, Dataset\n",
+        "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, set_seed\n",
+        "from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model, PeftModel\n",
+        "from trl import SFTTrainer, SFTConfig, DataCollatorForCompletionOnlyLM\n",
+        "import matplotlib.pyplot as plt\n",
+        "\n",
+        "load_dotenv(override=True)\n",
+        "set_seed(42)\n",
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Config (hyperparameters to tune)\n",
+        "\n",
+        "**Instructor baseline:** 39.85. Try varying these to get into the lower 30s.  \n",
+        "**Tip:** Data manipulation (filtering, balancing, prompt format) often gives the biggest gain."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "HF_USER = \"sheriff\"\n",
+        "DATASET_NAME = \"ed-donner/pricer-data\"\n",
+        "BASE_MODEL = \"meta-llama/Llama-3.2-3B\"\n",
+        "PROJECT_NAME = \"pricer\"\n",
+        "\n",
+        "# --- Hyperparameters (tune these to beat 39.85) ---\n",
+        "NUM_EPOCHS = 2\n",
+        "LEARNING_RATE = 2e-4\n",
+        "PER_DEVICE_TRAIN_BATCH_SIZE = 4\n",
+        "GRADIENT_ACCUMULATION_STEPS = 4\n",
+        "MAX_SEQ_LENGTH = 256\n",
+        "LORA_R = 8\n",
+        "LORA_ALPHA = 32\n",
+        "LORA_DROPOUT = 0.05\n",
+        "WARMUP_RATIO = 0.05\n",
+        "WEIGHT_DECAY = 0.01\n",
+        "\n",
+        "# Optional: subsample for quick experiments (set to None to use full train)\n",
+        "TRAIN_SUBSAMPLE = None  # e.g. 20000\n",
+        "\n",
+        "EVAL_SIZE = 250  # same as instructor Tester\n",
+        "hf_token = os.environ.get(\"HF_TOKEN\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Load data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "dataset = load_dataset(DATASET_NAME, token=hf_token)\n",
+        "train_raw = dataset[\"train\"]\n",
+        "test_raw = dataset[\"test\"]\n",
+        "print(f\"Train: {len(train_raw)}, Test: {len(test_raw)}\")\n",
+        "print(\"Columns:\", train_raw.column_names)\n",
+        "if len(train_raw) > 0:\n",
+        "    ex = train_raw[0]\n",
+        "    print(\"Sample keys:\", list(ex.keys()))\n",
+        "    if \"text\" in ex:\n",
+        "        print(\"Sample text (first 300 chars):\", (ex[\"text\"] or \"\")[:300])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Prepare training data (prompt + completion)\n",
+        "\n",
+        "**Data manipulation idea:** You can filter by price range, oversample rare buckets, or clean `text` here to teach the model better."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def build_train_text(example):\n",
+        "    # pricer-data: \"text\" is the prompt; model should complete with \"Price is $X\"\n",
+        "    prompt = (example.get(\"text\") or \"\").strip()\n",
+        "    price = example.get(\"price\")\n",
+        "    if price is None:\n",
+        "        return None\n",
+        "    try:\n",
+        "        p = float(price)\n",
+        "    except (TypeError, ValueError):\n",
+        "        return None\n",
+        "    # Completion format expected at eval (extract_price looks for \"Price is $\")\n",
+        "    completion = f\"Price is ${p:.2f}\"\n",
+        "    return prompt + completion\n",
+        "\n",
+        "train_list = []\n",
+        "for i in range(len(train_raw)):\n",
+        "    row = train_raw[i]\n",
+        "    text = build_train_text(row)\n",
+        "    if text:\n",
+        "        train_list.append({\"text\": text})\n",
+        "\n",
+        "if TRAIN_SUBSAMPLE:\n",
+        "    np.random.seed(42)\n",
+        "    idx = np.random.choice(len(train_list), min(TRAIN_SUBSAMPLE, len(train_list)), replace=False)\n",
+        "    train_list = [train_list[i] for i in idx]\n",
+        "\n",
+        "train_ds = Dataset.from_list(train_list)\n",
+        "print(f\"Training samples: {len(train_ds)}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Validation: small subset from train for eval_strategy\n",
+        "val_size = min(500, len(train_ds) // 10)\n",
+        "val_ds = train_ds.select(range(val_size))\n",
+        "train_ds = train_ds.select(range(val_size, len(train_ds)))\n",
+        "print(f\"Train: {len(train_ds)}, Val: {len(val_ds)}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Model & QLoRA"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "bnb_config = BitsAndBytesConfig(\n",
+        "    load_in_4bit=True,\n",
+        "    bnb_4bit_use_double_quant=True,\n",
+        "    bnb_4bit_compute_dtype=torch.bfloat16,\n",
+        "    bnb_4bit_quant_type=\"nf4\",\n",
+        ")\n",
+        "\n",
+        "tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)\n",
+        "tokenizer.pad_token = tokenizer.eos_token\n",
+        "tokenizer.padding_side = \"right\"\n",
+        "\n",
+        "model = AutoModelForCausalLM.from_pretrained(\n",
+        "    BASE_MODEL,\n",
+        "    quantization_config=bnb_config,\n",
+        "    device_map=\"auto\",\n",
+        ")\n",
+        "model.generation_config.pad_token_id = tokenizer.pad_token_id\n",
+        "model = prepare_model_for_kbit_training(model)\n",
+        "\n",
+        "lora_config = LoraConfig(\n",
+        "    r=LORA_R,\n",
+        "    lora_alpha=LORA_ALPHA,\n",
+        "    target_modules=[\"q_proj\", \"v_proj\", \"k_proj\", \"o_proj\"],\n",
+        "    lora_dropout=LORA_DROPOUT,\n",
+        "    bias=\"none\",\n",
+        "    task_type=\"CAUSAL_LM\",\n",
+        ")\n",
+        "model = get_peft_model(model, lora_config)\n",
+        "model.print_trainable_parameters()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Completion-only loss: only tokens after \"Price is $\" are trained\n",
+        "RESPONSE_TEMPLATE = \"Price is $\"\n",
+        "collator = DataCollatorForCompletionOnlyLM(RESPONSE_TEMPLATE, tokenizer=tokenizer)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "RUN_NAME = f\"{datetime.now():%Y-%m-%d_%H.%M.%S}\"\n",
+        "OUTPUT_DIR = f\"{PROJECT_NAME}-{RUN_NAME}\"\n",
+        "\n",
+        "training_args = SFTConfig(\n",
+        "    output_dir=OUTPUT_DIR,\n",
+        "    run_name=RUN_NAME,\n",
+        "    dataset_text_field=\"text\",\n",
+        "    max_seq_length=MAX_SEQ_LENGTH,\n",
+        "    num_train_epochs=NUM_EPOCHS,\n",
+        "    per_device_train_batch_size=PER_DEVICE_TRAIN_BATCH_SIZE,\n",
+        "    per_device_eval_batch_size=4,\n",
+        "    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,\n",
+        "    eval_strategy=\"steps\",\n",
+        "    eval_steps=200,\n",
+        "    learning_rate=LEARNING_RATE,\n",
+        "    lr_scheduler_type=\"cosine\",\n",
+        "    warmup_ratio=WARMUP_RATIO,\n",
+        "    optim=\"paged_adamw_32bit\",\n",
+        "    weight_decay=WEIGHT_DECAY,\n",
+        "    bf16=True,\n",
+        "    logging_steps=50,\n",
+        "    save_strategy=\"steps\",\n",
+        "    save_steps=500,\n",
+        "    save_total_limit=2,\n",
+        "    load_best_model_at_end=True,\n",
+        "    metric_for_best_model=\"eval_loss\",\n",
+        "    greater_is_better=False,\n",
+        "    push_to_hub=False,\n",
+        ")\n",
+        "\n",
+        "trainer = SFTTrainer(\n",
+        "    model=model,\n",
+        "    train_dataset=train_ds,\n",
+        "    eval_dataset=val_ds,\n",
+        "    args=training_args,\n",
+        "    data_collator=collator,\n",
+        ")\n",
+        "print(f\"Output dir: {OUTPUT_DIR}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "trainer.train()\n",
+        "trainer.save_model(OUTPUT_DIR)\n",
+        "tokenizer.save_pretrained(OUTPUT_DIR)\n",
+        "print(f\"Saved to {OUTPUT_DIR}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Evaluation — beat 39.85\n",
+        "\n",
+        "Load the saved adapter (or set `ADAPTER_PATH` to a previous run) and run the same metric as the instructor: **average absolute error** on 250 test samples."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "ADAPTER_PATH = OUTPUT_DIR  # or e.g. \"pricer-2025-03-09_12.00.00\"\n",
+        "\n",
+        "tokenizer = AutoTokenizer.from_pretrained(ADAPTER_PATH, trust_remote_code=True)\n",
+        "tokenizer.pad_token = tokenizer.eos_token\n",
+        "\n",
+        "base_model = AutoModelForCausalLM.from_pretrained(\n",
+        "    BASE_MODEL,\n",
+        "    quantization_config=bnb_config,\n",
+        "    device_map=\"auto\",\n",
+        ")\n",
+        "model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)\n",
+        "model.eval()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def extract_price(s):\n",
+        "    if \"Price is $\" in s:\n",
+        "        contents = s.split(\"Price is $\")[1].replace(\",\", \"\")\n",
+        "        m = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", contents)\n",
+        "        return float(m.group()) if m else 0.0\n",
+        "    return 0.0\n",
+        "\n",
+        "@torch.no_grad()\n",
+        "def predict(prompt, max_new_tokens=15):\n",
+        "    inputs = tokenizer(prompt, return_tensors=\"pt\").to(model.device)\n",
+        "    out = model.generate(\n",
+        "        **inputs,\n",
+        "        max_new_tokens=max_new_tokens,\n",
+        "        do_sample=False,\n",
+        "        pad_token_id=tokenizer.eos_token_id,\n",
+        "    )\n",
+        "    full = tokenizer.decode(out[0], skip_special_tokens=True)\n",
+        "    return extract_price(full)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "GREEN, YELLOW, RED, RESET = \"\\033[92m\", \"\\033[93m\", \"\\033[91m\", \"\\033[0m\"\n",
+        "COLOR_MAP = {\"red\": RED, \"orange\": YELLOW, \"green\": GREEN}\n",
+        "\n",
+        "class Tester:\n",
+        "    def __init__(self, predictor, data, title=None, size=250):\n",
+        "        self.predictor = predictor\n",
+        "        self.data = data\n",
+        "        self.title = title or \"Model\"\n",
+        "        self.size = min(size, len(data))\n",
+        "        self.guesses, self.truths, self.errors, self.sles, self.colors = [], [], [], [], []\n",
+        "\n",
+        "    def color_for(self, error, truth):\n",
+        "        if error < 40 or (truth and error / truth < 0.2):\n",
+        "            return \"green\"\n",
+        "        if error < 80 or (truth and error / truth < 0.4):\n",
+        "            return \"orange\"\n",
+        "        return \"red\"\n",
+        "\n",
+        "    def run_datapoint(self, i):\n",
+        "        row = self.data[i]\n",
+        "        prompt = (row.get(\"text\") or \"\").strip()\n",
+        "        truth = float(row.get(\"price\", 0))\n",
+        "        guess = self.predictor(prompt)\n",
+        "        error = abs(guess - truth)\n",
+        "        log_err = math.log(truth + 1) - math.log(guess + 1)\n",
+        "        sle = log_err ** 2\n",
+        "        color = self.color_for(error, truth)\n",
+        "        title = (prompt[:50] + \"...\") if len(prompt) > 50 else prompt\n",
+        "        self.guesses.append(guess)\n",
+        "        self.truths.append(truth)\n",
+        "        self.errors.append(error)\n",
+        "        self.sles.append(sle)\n",
+        "        self.colors.append(color)\n",
+        "        print(f\"{COLOR_MAP[color]}{i+1}: Guess: ${guess:,.2f} Truth: ${truth:,.2f} Error: ${error:,.2f} SLE: {sle:.2f} {title}{RESET}\")\n",
+        "\n",
+        "    def report(self):\n",
+        "        average_error = sum(self.errors) / self.size\n",
+        "        rmsle = math.sqrt(sum(self.sles) / self.size)\n",
+        "        hits = sum(1 for c in self.colors if c == \"green\")\n",
+        "        title = f\"{self.title} Error=${average_error:,.2f} RMSLE={rmsle:.2f} Hits={hits/self.size*100:.1f}%\"\n",
+        "        plt.figure(figsize=(12, 8))\n",
+        "        max_val = max(max(self.truths), max(self.guesses))\n",
+        "        plt.plot([0, max_val], [0, max_val], color=\"deepskyblue\", lw=2, alpha=0.6)\n",
+        "        plt.scatter(self.truths, self.guesses, s=3, c=self.colors)\n",
+        "        plt.xlabel(\"Ground Truth\")\n",
+        "        plt.ylabel(\"Model Estimate\")\n",
+        "        plt.title(title)\n",
+        "        plt.show()\n",
+        "        return average_error\n",
+        "\n",
+        "    def run(self):\n",
+        "        for i in range(self.size):\n",
+        "            self.run_datapoint(i)\n",
+        "        return self.report()\n",
+        "\n",
+        "    @classmethod\n",
+        "    def test(cls, predictor, data, title=None, size=250):\n",
+        "        t = cls(predictor, data, title=title, size=size)\n",
+        "        return t.run()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "test_data = test_raw.select(range(EVAL_SIZE))\n",
+        "avg_error = Tester.test(predict, test_data)\n",
+        "print(f\"\\n>>> Average error: ${avg_error:,.2f}  (instructor baseline: 39.85; goal: < 39, lower 30s)\")"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": ".venv",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python",
+      "version": "3.12.12"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/week8/community_contributions/IbrahimSheriff/week8_exercise.ipynb b/week8/community_contributions/IbrahimSheriff/week8_exercise.ipynb
new file mode 100644
index 0000000000..f78843b7ce
--- /dev/null
+++ b/week8/community_contributions/IbrahimSheriff/week8_exercise.ipynb
@@ -0,0 +1,348 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Week 8 Exercise: The Price is Right — IbrahimSheriff\n",
+    "\n",
+    "## Overview\n",
+    "This notebook implements a complete Week 8–style pipeline:\n",
+    "- **SpecialistAgent**: Fine-tuned LLM deployed on Modal\n",
+    "- **FrontierAgent**: RAG + frontier model (GPT) with ChromaDB\n",
+    "- **ChromaDB**: Vector store of product embeddings (SentenceTransformer)\n",
+    "- **Ensemble**: Combined predictor (Specialist + Frontier)\n",
+    "- **Evaluation**: Official `evaluate()` on test set (average absolute error)\n",
+    "- **DealAgentFramework + Gradio UI**: Autonomous deal-hunting and table UI\n",
+    "\n",
+    "## Prerequisites\n",
+    "- Modal token set; HuggingFace secret in Modal (`huggingface-secret`)\n",
+    "- Deploy pricer: from **week8** folder run `uv run modal deploy -m pricer_service`\n",
+    "- `.env`: `HF_TOKEN`, `OPENAI_API_KEY` (for FrontierAgent)\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Setup: path and environment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import sys\n",
+    "import logging\n",
+    "from pathlib import Path\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "load_dotenv(override=True)\n",
+    "os.environ.setdefault(\"PYTHONIOENCODING\", \"utf-8\")\n",
+    "\n",
+    "notebook_dir = Path.cwd()\n",
+    "week8_root = notebook_dir.parent.parent\n",
+    "if str(week8_root) not in sys.path:\n",
+    "    sys.path.insert(0, str(week8_root))\n",
+    "\n",
+    "logging.getLogger().setLevel(logging.INFO)\n",
+    "print(f\"Week 8 root: {week8_root}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "required = [\"HF_TOKEN\", \"OPENAI_API_KEY\"]\n",
+    "for var in required:\n",
+    "    status = \"SET\" if os.getenv(var) else \"MISSING\"\n",
+    "    print(f\"  {var}: {status}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Load data (Item.from_hub)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from agents.items import Item\n",
+    "from huggingface_hub import login\n",
+    "\n",
+    "hf_token = os.environ.get(\"HF_TOKEN\")\n",
+    "if hf_token:\n",
+    "    login(token=hf_token, add_to_git_credential=False)\n",
+    "\n",
+    "LITE_MODE = False\n",
+    "username = \"ed-donner\"\n",
+    "dataset = f\"{username}/items_lite\" if LITE_MODE else f\"{username}/items_full\"\n",
+    "\n",
+    "train, val, test = Item.from_hub(dataset)\n",
+    "print(f\"Loaded {len(train):,} train, {len(val):,} val, {len(test):,} test items\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. ChromaDB vector store\n",
+    "\n",
+    "We build the vector store in the current folder so DealAgentFramework can use it later. Use a subset for speed if you prefer."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import chromadb\n",
+    "from sentence_transformers import SentenceTransformer\n",
+    "from tqdm.notebook import tqdm\n",
+    "\n",
+    "DB = \"products_vectorstore\"\n",
+    "client = chromadb.PersistentClient(path=DB)\n",
+    "collection = client.get_or_create_collection(\"products\")\n",
+    "\n",
+    "def description(item):\n",
+    "    return (item.summary or item.title or \"\").strip() or getattr(item, \"prompt\", \"\")[:500]\n",
+    "\n",
+    "N_DOCS = 20_000\n",
+    "existing = collection.count()\n",
+    "print(f\"Collection has {existing} documents.\")\n",
+    "if existing == 0:\n",
+    "    print(f\"Populating with {N_DOCS} items...\")\n",
+    "    encoder = SentenceTransformer(\"sentence-transformers/all-MiniLM-L6-v2\")\n",
+    "    for i in tqdm(range(0, min(N_DOCS, len(train)), 1000)):\n",
+    "        batch = train[i : i + 1000]\n",
+    "        docs = [description(it) for it in batch]\n",
+    "        vecs = encoder.encode(docs).astype(float).tolist()\n",
+    "        metas = [{\"category\": getattr(it, \"category\", \"\"), \"price\": it.price} for it in batch]\n",
+    "        ids = [f\"doc_{j}\" for j in range(i, i + len(batch))]\n",
+    "        collection.add(ids=ids, documents=docs, embeddings=vecs, metadatas=metas)\n",
+    "    print(f\"Done. Collection count: {collection.count()}\")\n",
+    "else:\n",
+    "    print(\"Using existing vector store.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. SpecialistAgent (Modal fine-tuned pricer)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from agents.specialist_agent import SpecialistAgent\n",
+    "\n",
+    "specialist = SpecialistAgent()\n",
+    "sample = test[0]\n",
+    "est = specialist.price(description(sample))\n",
+    "print(f\"Sample: {sample.title[:50]}...\")\n",
+    "print(f\"Actual: ${sample.price:.2f}, Specialist: ${est:.2f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5. FrontierAgent (RAG + frontier model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from agents.frontier_agent import FrontierAgent\n",
+    "\n",
+    "frontier = FrontierAgent(collection)\n",
+    "est_f = frontier.price(description(sample))\n",
+    "print(f\"Frontier estimate: ${est_f:.2f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6. Predictors for evaluator\n",
+    "\n",
+    "Each takes an `Item` and returns a price (for `evaluate(predictor, test)`)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def specialist_predictor(item):\n",
+    "    return specialist.price(description(item))\n",
+    "\n",
+    "def frontier_predictor(item):\n",
+    "    return frontier.price(description(item))\n",
+    "\n",
+    "def ensemble_predictor(item):\n",
+    "    s = specialist.price(description(item))\n",
+    "    f = frontier.price(description(item))\n",
+    "    return 0.5 * s + 0.5 * f"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 7. Run evaluation (Week 8 implementation)\n",
+    "\n",
+    "Uses the same `evaluate()` as week8 day2: 200 items, report and charts. This **determines your result**."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from agents.evaluator import evaluate\n",
+    "\n",
+    "print(\"=== SpecialistAgent ===\")\n",
+    "evaluate(specialist_predictor, test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"=== FrontierAgent ===\")\n",
+    "evaluate(frontier_predictor, test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"=== Ensemble (Specialist + Frontier) ===\")\n",
+    "evaluate(ensemble_predictor, test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 8. Optional: quick run with fewer items\n",
+    "\n",
+    "Uncomment to test with 50 items and 3 workers."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# evaluate(specialist_predictor, test, size=50, workers=3)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 9. DealAgentFramework + Gradio UI\n",
+    "\n",
+    "Uses the same framework as week8 day5: planning agent, scanner, messenger. The UI shows a table of deals and a button to run one planning cycle. Ensure `products_vectorstore` is in the current directory (we built it above)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import gradio as gr\n",
+    "from deal_agent_framework import DealAgentFramework\n",
+    "from agents.deals import Opportunity, Deal\n",
+    "\n",
+    "agent_framework = DealAgentFramework()\n",
+    "agent_framework.init_agents_as_needed()\n",
+    "print(\"DealAgentFramework ready.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_table(opps):\n",
+    "    if not opps:\n",
+    "        return [[\"No deals yet\", \"—\", \"—\", \"—\", \"—\"]]\n",
+    "    return [\n",
+    "        [\n",
+    "            opp.deal.product_description[:80] + (\".\" if len(opp.deal.product_description) > 80 else \"\"),\n",
+    "            f\"${opp.deal.price:.2f}\",\n",
+    "            f\"${opp.estimate:.2f}\",\n",
+    "            f\"${opp.discount:.2f}\",\n",
+    "            opp.deal.url or \"—\",\n",
+    "        ]\n",
+    "        for opp in opps\n",
+    "    ]\n",
+    "\n",
+    "def run_one_cycle():\n",
+    "    agent_framework.run()\n",
+    "    return get_table(agent_framework.memory)\n",
+    "\n",
+    "with gr.Blocks(title=\"The Price is Right\", fill_width=True) as ui:\n",
+    "    gr.Markdown(\"<div style='text-align: center; font-size: 24px'>The Price is Right — IbrahimSheriff</div>\")\n",
+    "    gr.Markdown(\"Deals surfaced by the autonomous agent (Specialist + Frontier + Scanner + Planner).\")\n",
+    "    with gr.Row():\n",
+    "        run_btn = gr.Button(\"Run one planning cycle\")\n",
+    "    tbl = gr.Dataframe(\n",
+    "        headers=[\"Description\", \"Price\", \"Estimate\", \"Discount\", \"URL\"],\n",
+    "        wrap=True,\n",
+    "        row_count=10,\n",
+    "        col_count=5,\n",
+    "        max_height=400,\n",
+    "    )\n",
+    "    run_btn.click(fn=run_one_cycle, outputs=[tbl])\n",
+    "    ui.load(fn=lambda: get_table(agent_framework.memory), outputs=[tbl])\n",
+    "\n",
+    "ui.launch(inbrowser=True)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.11.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From 7f0c131b4a01b7aaf601d4f277cc40c7836b3e04 Mon Sep 17 00:00:00 2001
From: Hope Ogbons <hopeogbons@gmail.com>
Date: Fri, 20 Mar 2026 14:16:31 +0100
Subject: [PATCH 2/2] Remove Week 7 exercise for IbrahimSheriff from Week 8
 branch

---
 .../IbrahimSheriff/pricer_beat_39.ipynb       | 432 ------------------
 1 file changed, 432 deletions(-)
 delete mode 100644 week7/community_contributions/IbrahimSheriff/pricer_beat_39.ipynb

diff --git a/week7/community_contributions/IbrahimSheriff/pricer_beat_39.ipynb b/week7/community_contributions/IbrahimSheriff/pricer_beat_39.ipynb
deleted file mode 100644
index 210c2b5617..0000000000
--- a/week7/community_contributions/IbrahimSheriff/pricer_beat_39.ipynb
+++ /dev/null
@@ -1,432 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "# Week 7: Beat the 39 — Price prediction (IbrahimSheriff2)\n",
-        "\n",
-        "**Goal:** Get average absolute error **below 39.85** (ideally into the lower 30s).  \n",
-        "**Metric:** Same as instructor — average $ error on 250 test samples.  \n",
-        "**HF user:** `sheriff`"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "%pip install -q pandas scikit-learn datasets transformers torch peft bitsandbytes trl accelerate matplotlib python-dotenv"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "import os\n",
-        "import re\n",
-        "import math\n",
-        "from pathlib import Path\n",
-        "from datetime import datetime\n",
-        "import torch\n",
-        "import numpy as np\n",
-        "from tqdm import tqdm\n",
-        "from dotenv import load_dotenv\n",
-        "from datasets import load_dataset, Dataset\n",
-        "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, set_seed\n",
-        "from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model, PeftModel\n",
-        "from trl import SFTTrainer, SFTConfig, DataCollatorForCompletionOnlyLM\n",
-        "import matplotlib.pyplot as plt\n",
-        "\n",
-        "load_dotenv(override=True)\n",
-        "set_seed(42)\n",
-        "%matplotlib inline"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Config (hyperparameters to tune)\n",
-        "\n",
-        "**Instructor baseline:** 39.85. Try varying these to get into the lower 30s.  \n",
-        "**Tip:** Data manipulation (filtering, balancing, prompt format) often gives the biggest gain."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "HF_USER = \"sheriff\"\n",
-        "DATASET_NAME = \"ed-donner/pricer-data\"\n",
-        "BASE_MODEL = \"meta-llama/Llama-3.2-3B\"\n",
-        "PROJECT_NAME = \"pricer\"\n",
-        "\n",
-        "# --- Hyperparameters (tune these to beat 39.85) ---\n",
-        "NUM_EPOCHS = 2\n",
-        "LEARNING_RATE = 2e-4\n",
-        "PER_DEVICE_TRAIN_BATCH_SIZE = 4\n",
-        "GRADIENT_ACCUMULATION_STEPS = 4\n",
-        "MAX_SEQ_LENGTH = 256\n",
-        "LORA_R = 8\n",
-        "LORA_ALPHA = 32\n",
-        "LORA_DROPOUT = 0.05\n",
-        "WARMUP_RATIO = 0.05\n",
-        "WEIGHT_DECAY = 0.01\n",
-        "\n",
-        "# Optional: subsample for quick experiments (set to None to use full train)\n",
-        "TRAIN_SUBSAMPLE = None  # e.g. 20000\n",
-        "\n",
-        "EVAL_SIZE = 250  # same as instructor Tester\n",
-        "hf_token = os.environ.get(\"HF_TOKEN\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Load data"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "dataset = load_dataset(DATASET_NAME, token=hf_token)\n",
-        "train_raw = dataset[\"train\"]\n",
-        "test_raw = dataset[\"test\"]\n",
-        "print(f\"Train: {len(train_raw)}, Test: {len(test_raw)}\")\n",
-        "print(\"Columns:\", train_raw.column_names)\n",
-        "if len(train_raw) > 0:\n",
-        "    ex = train_raw[0]\n",
-        "    print(\"Sample keys:\", list(ex.keys()))\n",
-        "    if \"text\" in ex:\n",
-        "        print(\"Sample text (first 300 chars):\", (ex[\"text\"] or \"\")[:300])"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Prepare training data (prompt + completion)\n",
-        "\n",
-        "**Data manipulation idea:** You can filter by price range, oversample rare buckets, or clean `text` here to teach the model better."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "def build_train_text(example):\n",
-        "    # pricer-data: \"text\" is the prompt; model should complete with \"Price is $X\"\n",
-        "    prompt = (example.get(\"text\") or \"\").strip()\n",
-        "    price = example.get(\"price\")\n",
-        "    if price is None:\n",
-        "        return None\n",
-        "    try:\n",
-        "        p = float(price)\n",
-        "    except (TypeError, ValueError):\n",
-        "        return None\n",
-        "    # Completion format expected at eval (extract_price looks for \"Price is $\")\n",
-        "    completion = f\"Price is ${p:.2f}\"\n",
-        "    return prompt + completion\n",
-        "\n",
-        "train_list = []\n",
-        "for i in range(len(train_raw)):\n",
-        "    row = train_raw[i]\n",
-        "    text = build_train_text(row)\n",
-        "    if text:\n",
-        "        train_list.append({\"text\": text})\n",
-        "\n",
-        "if TRAIN_SUBSAMPLE:\n",
-        "    np.random.seed(42)\n",
-        "    idx = np.random.choice(len(train_list), min(TRAIN_SUBSAMPLE, len(train_list)), replace=False)\n",
-        "    train_list = [train_list[i] for i in idx]\n",
-        "\n",
-        "train_ds = Dataset.from_list(train_list)\n",
-        "print(f\"Training samples: {len(train_ds)}\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# Validation: small subset from train for eval_strategy\n",
-        "val_size = min(500, len(train_ds) // 10)\n",
-        "val_ds = train_ds.select(range(val_size))\n",
-        "train_ds = train_ds.select(range(val_size, len(train_ds)))\n",
-        "print(f\"Train: {len(train_ds)}, Val: {len(val_ds)}\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Model & QLoRA"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "bnb_config = BitsAndBytesConfig(\n",
-        "    load_in_4bit=True,\n",
-        "    bnb_4bit_use_double_quant=True,\n",
-        "    bnb_4bit_compute_dtype=torch.bfloat16,\n",
-        "    bnb_4bit_quant_type=\"nf4\",\n",
-        ")\n",
-        "\n",
-        "tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)\n",
-        "tokenizer.pad_token = tokenizer.eos_token\n",
-        "tokenizer.padding_side = \"right\"\n",
-        "\n",
-        "model = AutoModelForCausalLM.from_pretrained(\n",
-        "    BASE_MODEL,\n",
-        "    quantization_config=bnb_config,\n",
-        "    device_map=\"auto\",\n",
-        ")\n",
-        "model.generation_config.pad_token_id = tokenizer.pad_token_id\n",
-        "model = prepare_model_for_kbit_training(model)\n",
-        "\n",
-        "lora_config = LoraConfig(\n",
-        "    r=LORA_R,\n",
-        "    lora_alpha=LORA_ALPHA,\n",
-        "    target_modules=[\"q_proj\", \"v_proj\", \"k_proj\", \"o_proj\"],\n",
-        "    lora_dropout=LORA_DROPOUT,\n",
-        "    bias=\"none\",\n",
-        "    task_type=\"CAUSAL_LM\",\n",
-        ")\n",
-        "model = get_peft_model(model, lora_config)\n",
-        "model.print_trainable_parameters()"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# Completion-only loss: only tokens after \"Price is $\" are trained\n",
-        "RESPONSE_TEMPLATE = \"Price is $\"\n",
-        "collator = DataCollatorForCompletionOnlyLM(RESPONSE_TEMPLATE, tokenizer=tokenizer)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "RUN_NAME = f\"{datetime.now():%Y-%m-%d_%H.%M.%S}\"\n",
-        "OUTPUT_DIR = f\"{PROJECT_NAME}-{RUN_NAME}\"\n",
-        "\n",
-        "training_args = SFTConfig(\n",
-        "    output_dir=OUTPUT_DIR,\n",
-        "    run_name=RUN_NAME,\n",
-        "    dataset_text_field=\"text\",\n",
-        "    max_seq_length=MAX_SEQ_LENGTH,\n",
-        "    num_train_epochs=NUM_EPOCHS,\n",
-        "    per_device_train_batch_size=PER_DEVICE_TRAIN_BATCH_SIZE,\n",
-        "    per_device_eval_batch_size=4,\n",
-        "    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,\n",
-        "    eval_strategy=\"steps\",\n",
-        "    eval_steps=200,\n",
-        "    learning_rate=LEARNING_RATE,\n",
-        "    lr_scheduler_type=\"cosine\",\n",
-        "    warmup_ratio=WARMUP_RATIO,\n",
-        "    optim=\"paged_adamw_32bit\",\n",
-        "    weight_decay=WEIGHT_DECAY,\n",
-        "    bf16=True,\n",
-        "    logging_steps=50,\n",
-        "    save_strategy=\"steps\",\n",
-        "    save_steps=500,\n",
-        "    save_total_limit=2,\n",
-        "    load_best_model_at_end=True,\n",
-        "    metric_for_best_model=\"eval_loss\",\n",
-        "    greater_is_better=False,\n",
-        "    push_to_hub=False,\n",
-        ")\n",
-        "\n",
-        "trainer = SFTTrainer(\n",
-        "    model=model,\n",
-        "    train_dataset=train_ds,\n",
-        "    eval_dataset=val_ds,\n",
-        "    args=training_args,\n",
-        "    data_collator=collator,\n",
-        ")\n",
-        "print(f\"Output dir: {OUTPUT_DIR}\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "trainer.train()\n",
-        "trainer.save_model(OUTPUT_DIR)\n",
-        "tokenizer.save_pretrained(OUTPUT_DIR)\n",
-        "print(f\"Saved to {OUTPUT_DIR}\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Evaluation — beat 39.85\n",
-        "\n",
-        "Load the saved adapter (or set `ADAPTER_PATH` to a previous run) and run the same metric as the instructor: **average absolute error** on 250 test samples."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "ADAPTER_PATH = OUTPUT_DIR  # or e.g. \"pricer-2025-03-09_12.00.00\"\n",
-        "\n",
-        "tokenizer = AutoTokenizer.from_pretrained(ADAPTER_PATH, trust_remote_code=True)\n",
-        "tokenizer.pad_token = tokenizer.eos_token\n",
-        "\n",
-        "base_model = AutoModelForCausalLM.from_pretrained(\n",
-        "    BASE_MODEL,\n",
-        "    quantization_config=bnb_config,\n",
-        "    device_map=\"auto\",\n",
-        ")\n",
-        "model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)\n",
-        "model.eval()"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "def extract_price(s):\n",
-        "    if \"Price is $\" in s:\n",
-        "        contents = s.split(\"Price is $\")[1].replace(\",\", \"\")\n",
-        "        m = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", contents)\n",
-        "        return float(m.group()) if m else 0.0\n",
-        "    return 0.0\n",
-        "\n",
-        "@torch.no_grad()\n",
-        "def predict(prompt, max_new_tokens=15):\n",
-        "    inputs = tokenizer(prompt, return_tensors=\"pt\").to(model.device)\n",
-        "    out = model.generate(\n",
-        "        **inputs,\n",
-        "        max_new_tokens=max_new_tokens,\n",
-        "        do_sample=False,\n",
-        "        pad_token_id=tokenizer.eos_token_id,\n",
-        "    )\n",
-        "    full = tokenizer.decode(out[0], skip_special_tokens=True)\n",
-        "    return extract_price(full)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "GREEN, YELLOW, RED, RESET = \"\\033[92m\", \"\\033[93m\", \"\\033[91m\", \"\\033[0m\"\n",
-        "COLOR_MAP = {\"red\": RED, \"orange\": YELLOW, \"green\": GREEN}\n",
-        "\n",
-        "class Tester:\n",
-        "    def __init__(self, predictor, data, title=None, size=250):\n",
-        "        self.predictor = predictor\n",
-        "        self.data = data\n",
-        "        self.title = title or \"Model\"\n",
-        "        self.size = min(size, len(data))\n",
-        "        self.guesses, self.truths, self.errors, self.sles, self.colors = [], [], [], [], []\n",
-        "\n",
-        "    def color_for(self, error, truth):\n",
-        "        if error < 40 or (truth and error / truth < 0.2):\n",
-        "            return \"green\"\n",
-        "        if error < 80 or (truth and error / truth < 0.4):\n",
-        "            return \"orange\"\n",
-        "        return \"red\"\n",
-        "\n",
-        "    def run_datapoint(self, i):\n",
-        "        row = self.data[i]\n",
-        "        prompt = (row.get(\"text\") or \"\").strip()\n",
-        "        truth = float(row.get(\"price\", 0))\n",
-        "        guess = self.predictor(prompt)\n",
-        "        error = abs(guess - truth)\n",
-        "        log_err = math.log(truth + 1) - math.log(guess + 1)\n",
-        "        sle = log_err ** 2\n",
-        "        color = self.color_for(error, truth)\n",
-        "        title = (prompt[:50] + \"...\") if len(prompt) > 50 else prompt\n",
-        "        self.guesses.append(guess)\n",
-        "        self.truths.append(truth)\n",
-        "        self.errors.append(error)\n",
-        "        self.sles.append(sle)\n",
-        "        self.colors.append(color)\n",
-        "        print(f\"{COLOR_MAP[color]}{i+1}: Guess: ${guess:,.2f} Truth: ${truth:,.2f} Error: ${error:,.2f} SLE: {sle:.2f} {title}{RESET}\")\n",
-        "\n",
-        "    def report(self):\n",
-        "        average_error = sum(self.errors) / self.size\n",
-        "        rmsle = math.sqrt(sum(self.sles) / self.size)\n",
-        "        hits = sum(1 for c in self.colors if c == \"green\")\n",
-        "        title = f\"{self.title} Error=${average_error:,.2f} RMSLE={rmsle:.2f} Hits={hits/self.size*100:.1f}%\"\n",
-        "        plt.figure(figsize=(12, 8))\n",
-        "        max_val = max(max(self.truths), max(self.guesses))\n",
-        "        plt.plot([0, max_val], [0, max_val], color=\"deepskyblue\", lw=2, alpha=0.6)\n",
-        "        plt.scatter(self.truths, self.guesses, s=3, c=self.colors)\n",
-        "        plt.xlabel(\"Ground Truth\")\n",
-        "        plt.ylabel(\"Model Estimate\")\n",
-        "        plt.title(title)\n",
-        "        plt.show()\n",
-        "        return average_error\n",
-        "\n",
-        "    def run(self):\n",
-        "        for i in range(self.size):\n",
-        "            self.run_datapoint(i)\n",
-        "        return self.report()\n",
-        "\n",
-        "    @classmethod\n",
-        "    def test(cls, predictor, data, title=None, size=250):\n",
-        "        t = cls(predictor, data, title=title, size=size)\n",
-        "        return t.run()"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "test_data = test_raw.select(range(EVAL_SIZE))\n",
-        "avg_error = Tester.test(predict, test_data)\n",
-        "print(f\"\\n>>> Average error: ${avg_error:,.2f}  (instructor baseline: 39.85; goal: < 39, lower 30s)\")"
-      ]
-    }
-  ],
-  "metadata": {
-    "kernelspec": {
-      "display_name": ".venv",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "name": "python",
-      "version": "3.12.12"
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}