From a5df4c6db3ce1b1940c118f8d6c4818084ab2df1 Mon Sep 17 00:00:00 2001 From: "Sheriff .n Ibrahim" Date: Mon, 9 Mar 2026 14:31:36 +0100 Subject: [PATCH 1/2] Add Week 8 exercise for IbrahimSheriff: SpecialistAgent, FrontierAgent, ChromaDB, ensemble evaluation, and Gradio UI --- .../IbrahimSheriff/pricer_beat_39.ipynb | 432 ++++++++++++++++++ .../IbrahimSheriff/week8_exercise.ipynb | 348 ++++++++++++++ 2 files changed, 780 insertions(+) create mode 100644 week7/community_contributions/IbrahimSheriff/pricer_beat_39.ipynb create mode 100644 week8/community_contributions/IbrahimSheriff/week8_exercise.ipynb diff --git a/week7/community_contributions/IbrahimSheriff/pricer_beat_39.ipynb b/week7/community_contributions/IbrahimSheriff/pricer_beat_39.ipynb new file mode 100644 index 0000000000..210c2b5617 --- /dev/null +++ b/week7/community_contributions/IbrahimSheriff/pricer_beat_39.ipynb @@ -0,0 +1,432 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Week 7: Beat the 39 — Price prediction (IbrahimSheriff2)\n", + "\n", + "**Goal:** Get average absolute error **below 39.85** (ideally into the lower 30s). \n", + "**Metric:** Same as instructor — average $ error on 250 test samples. \n", + "**HF user:** `sheriff`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -q pandas scikit-learn datasets transformers torch peft bitsandbytes trl accelerate matplotlib python-dotenv" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import re\n", + "import math\n", + "from pathlib import Path\n", + "from datetime import datetime\n", + "import torch\n", + "import numpy as np\n", + "from tqdm import tqdm\n", + "from dotenv import load_dotenv\n", + "from datasets import load_dataset, Dataset\n", + "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, set_seed\n", + "from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model, PeftModel\n", + "from trl import SFTTrainer, SFTConfig, DataCollatorForCompletionOnlyLM\n", + "import matplotlib.pyplot as plt\n", + "\n", + "load_dotenv(override=True)\n", + "set_seed(42)\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Config (hyperparameters to tune)\n", + "\n", + "**Instructor baseline:** 39.85. Try varying these to get into the lower 30s. \n", + "**Tip:** Data manipulation (filtering, balancing, prompt format) often gives the biggest gain." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "HF_USER = \"sheriff\"\n", + "DATASET_NAME = \"ed-donner/pricer-data\"\n", + "BASE_MODEL = \"meta-llama/Llama-3.2-3B\"\n", + "PROJECT_NAME = \"pricer\"\n", + "\n", + "# --- Hyperparameters (tune these to beat 39.85) ---\n", + "NUM_EPOCHS = 2\n", + "LEARNING_RATE = 2e-4\n", + "PER_DEVICE_TRAIN_BATCH_SIZE = 4\n", + "GRADIENT_ACCUMULATION_STEPS = 4\n", + "MAX_SEQ_LENGTH = 256\n", + "LORA_R = 8\n", + "LORA_ALPHA = 32\n", + "LORA_DROPOUT = 0.05\n", + "WARMUP_RATIO = 0.05\n", + "WEIGHT_DECAY = 0.01\n", + "\n", + "# Optional: subsample for quick experiments (set to None to use full train)\n", + "TRAIN_SUBSAMPLE = None # e.g. 20000\n", + "\n", + "EVAL_SIZE = 250 # same as instructor Tester\n", + "hf_token = os.environ.get(\"HF_TOKEN\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = load_dataset(DATASET_NAME, token=hf_token)\n", + "train_raw = dataset[\"train\"]\n", + "test_raw = dataset[\"test\"]\n", + "print(f\"Train: {len(train_raw)}, Test: {len(test_raw)}\")\n", + "print(\"Columns:\", train_raw.column_names)\n", + "if len(train_raw) > 0:\n", + " ex = train_raw[0]\n", + " print(\"Sample keys:\", list(ex.keys()))\n", + " if \"text\" in ex:\n", + " print(\"Sample text (first 300 chars):\", (ex[\"text\"] or \"\")[:300])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prepare training data (prompt + completion)\n", + "\n", + "**Data manipulation idea:** You can filter by price range, oversample rare buckets, or clean `text` here to teach the model better." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def build_train_text(example):\n", + " # pricer-data: \"text\" is the prompt; model should complete with \"Price is $X\"\n", + " prompt = (example.get(\"text\") or \"\").strip()\n", + " price = example.get(\"price\")\n", + " if price is None:\n", + " return None\n", + " try:\n", + " p = float(price)\n", + " except (TypeError, ValueError):\n", + " return None\n", + " # Completion format expected at eval (extract_price looks for \"Price is $\")\n", + " completion = f\"Price is ${p:.2f}\"\n", + " return prompt + completion\n", + "\n", + "train_list = []\n", + "for i in range(len(train_raw)):\n", + " row = train_raw[i]\n", + " text = build_train_text(row)\n", + " if text:\n", + " train_list.append({\"text\": text})\n", + "\n", + "if TRAIN_SUBSAMPLE:\n", + " np.random.seed(42)\n", + " idx = np.random.choice(len(train_list), min(TRAIN_SUBSAMPLE, len(train_list)), replace=False)\n", + " train_list = [train_list[i] for i in idx]\n", + "\n", + "train_ds = Dataset.from_list(train_list)\n", + "print(f\"Training samples: {len(train_ds)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Validation: small subset from train for eval_strategy\n", + "val_size = min(500, len(train_ds) // 10)\n", + "val_ds = train_ds.select(range(val_size))\n", + "train_ds = train_ds.select(range(val_size, len(train_ds)))\n", + "print(f\"Train: {len(train_ds)}, Val: {len(val_ds)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Model & QLoRA" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bnb_config = BitsAndBytesConfig(\n", + " load_in_4bit=True,\n", + " bnb_4bit_use_double_quant=True,\n", + " bnb_4bit_compute_dtype=torch.bfloat16,\n", + " bnb_4bit_quant_type=\"nf4\",\n", + ")\n", + "\n", + "tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)\n", + "tokenizer.pad_token = tokenizer.eos_token\n", + "tokenizer.padding_side = \"right\"\n", + "\n", + "model = AutoModelForCausalLM.from_pretrained(\n", + " BASE_MODEL,\n", + " quantization_config=bnb_config,\n", + " device_map=\"auto\",\n", + ")\n", + "model.generation_config.pad_token_id = tokenizer.pad_token_id\n", + "model = prepare_model_for_kbit_training(model)\n", + "\n", + "lora_config = LoraConfig(\n", + " r=LORA_R,\n", + " lora_alpha=LORA_ALPHA,\n", + " target_modules=[\"q_proj\", \"v_proj\", \"k_proj\", \"o_proj\"],\n", + " lora_dropout=LORA_DROPOUT,\n", + " bias=\"none\",\n", + " task_type=\"CAUSAL_LM\",\n", + ")\n", + "model = get_peft_model(model, lora_config)\n", + "model.print_trainable_parameters()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Completion-only loss: only tokens after \"Price is $\" are trained\n", + "RESPONSE_TEMPLATE = \"Price is $\"\n", + "collator = DataCollatorForCompletionOnlyLM(RESPONSE_TEMPLATE, tokenizer=tokenizer)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "RUN_NAME = f\"{datetime.now():%Y-%m-%d_%H.%M.%S}\"\n", + "OUTPUT_DIR = f\"{PROJECT_NAME}-{RUN_NAME}\"\n", + "\n", + "training_args = SFTConfig(\n", + " output_dir=OUTPUT_DIR,\n", + " run_name=RUN_NAME,\n", + " dataset_text_field=\"text\",\n", + " max_seq_length=MAX_SEQ_LENGTH,\n", + " num_train_epochs=NUM_EPOCHS,\n", + " per_device_train_batch_size=PER_DEVICE_TRAIN_BATCH_SIZE,\n", + " per_device_eval_batch_size=4,\n", + " gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,\n", + " eval_strategy=\"steps\",\n", + " eval_steps=200,\n", + " learning_rate=LEARNING_RATE,\n", + " lr_scheduler_type=\"cosine\",\n", + " warmup_ratio=WARMUP_RATIO,\n", + " optim=\"paged_adamw_32bit\",\n", + " weight_decay=WEIGHT_DECAY,\n", + " bf16=True,\n", + " logging_steps=50,\n", + " save_strategy=\"steps\",\n", + " save_steps=500,\n", + " save_total_limit=2,\n", + " load_best_model_at_end=True,\n", + " metric_for_best_model=\"eval_loss\",\n", + " greater_is_better=False,\n", + " push_to_hub=False,\n", + ")\n", + "\n", + "trainer = SFTTrainer(\n", + " model=model,\n", + " train_dataset=train_ds,\n", + " eval_dataset=val_ds,\n", + " args=training_args,\n", + " data_collator=collator,\n", + ")\n", + "print(f\"Output dir: {OUTPUT_DIR}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "trainer.train()\n", + "trainer.save_model(OUTPUT_DIR)\n", + "tokenizer.save_pretrained(OUTPUT_DIR)\n", + "print(f\"Saved to {OUTPUT_DIR}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluation — beat 39.85\n", + "\n", + "Load the saved adapter (or set `ADAPTER_PATH` to a previous run) and run the same metric as the instructor: **average absolute error** on 250 test samples." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ADAPTER_PATH = OUTPUT_DIR # or e.g. \"pricer-2025-03-09_12.00.00\"\n", + "\n", + "tokenizer = AutoTokenizer.from_pretrained(ADAPTER_PATH, trust_remote_code=True)\n", + "tokenizer.pad_token = tokenizer.eos_token\n", + "\n", + "base_model = AutoModelForCausalLM.from_pretrained(\n", + " BASE_MODEL,\n", + " quantization_config=bnb_config,\n", + " device_map=\"auto\",\n", + ")\n", + "model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)\n", + "model.eval()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def extract_price(s):\n", + " if \"Price is $\" in s:\n", + " contents = s.split(\"Price is $\")[1].replace(\",\", \"\")\n", + " m = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", contents)\n", + " return float(m.group()) if m else 0.0\n", + " return 0.0\n", + "\n", + "@torch.no_grad()\n", + "def predict(prompt, max_new_tokens=15):\n", + " inputs = tokenizer(prompt, return_tensors=\"pt\").to(model.device)\n", + " out = model.generate(\n", + " **inputs,\n", + " max_new_tokens=max_new_tokens,\n", + " do_sample=False,\n", + " pad_token_id=tokenizer.eos_token_id,\n", + " )\n", + " full = tokenizer.decode(out[0], skip_special_tokens=True)\n", + " return extract_price(full)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "GREEN, YELLOW, RED, RESET = \"\\033[92m\", \"\\033[93m\", \"\\033[91m\", \"\\033[0m\"\n", + "COLOR_MAP = {\"red\": RED, \"orange\": YELLOW, \"green\": GREEN}\n", + "\n", + "class Tester:\n", + " def __init__(self, predictor, data, title=None, size=250):\n", + " self.predictor = predictor\n", + " self.data = data\n", + " self.title = title or \"Model\"\n", + " self.size = min(size, len(data))\n", + " self.guesses, self.truths, self.errors, self.sles, self.colors = [], [], [], [], []\n", + "\n", + " def color_for(self, error, truth):\n", + " if error < 40 or (truth and error / truth < 0.2):\n", + " return \"green\"\n", + " if error < 80 or (truth and error / truth < 0.4):\n", + " return \"orange\"\n", + " return \"red\"\n", + "\n", + " def run_datapoint(self, i):\n", + " row = self.data[i]\n", + " prompt = (row.get(\"text\") or \"\").strip()\n", + " truth = float(row.get(\"price\", 0))\n", + " guess = self.predictor(prompt)\n", + " error = abs(guess - truth)\n", + " log_err = math.log(truth + 1) - math.log(guess + 1)\n", + " sle = log_err ** 2\n", + " color = self.color_for(error, truth)\n", + " title = (prompt[:50] + \"...\") if len(prompt) > 50 else prompt\n", + " self.guesses.append(guess)\n", + " self.truths.append(truth)\n", + " self.errors.append(error)\n", + " self.sles.append(sle)\n", + " self.colors.append(color)\n", + " print(f\"{COLOR_MAP[color]}{i+1}: Guess: ${guess:,.2f} Truth: ${truth:,.2f} Error: ${error:,.2f} SLE: {sle:.2f} {title}{RESET}\")\n", + "\n", + " def report(self):\n", + " average_error = sum(self.errors) / self.size\n", + " rmsle = math.sqrt(sum(self.sles) / self.size)\n", + " hits = sum(1 for c in self.colors if c == \"green\")\n", + " title = f\"{self.title} Error=${average_error:,.2f} RMSLE={rmsle:.2f} Hits={hits/self.size*100:.1f}%\"\n", + " plt.figure(figsize=(12, 8))\n", + " max_val = max(max(self.truths), max(self.guesses))\n", + " plt.plot([0, max_val], [0, max_val], color=\"deepskyblue\", lw=2, alpha=0.6)\n", + " plt.scatter(self.truths, self.guesses, s=3, c=self.colors)\n", + " plt.xlabel(\"Ground Truth\")\n", + " plt.ylabel(\"Model Estimate\")\n", + " plt.title(title)\n", + " plt.show()\n", + " return average_error\n", + "\n", + " def run(self):\n", + " for i in range(self.size):\n", + " self.run_datapoint(i)\n", + " return self.report()\n", + "\n", + " @classmethod\n", + " def test(cls, predictor, data, title=None, size=250):\n", + " t = cls(predictor, data, title=title, size=size)\n", + " return t.run()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_data = test_raw.select(range(EVAL_SIZE))\n", + "avg_error = Tester.test(predict, test_data)\n", + "print(f\"\\n>>> Average error: ${avg_error:,.2f} (instructor baseline: 39.85; goal: < 39, lower 30s)\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.12.12" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/week8/community_contributions/IbrahimSheriff/week8_exercise.ipynb b/week8/community_contributions/IbrahimSheriff/week8_exercise.ipynb new file mode 100644 index 0000000000..f78843b7ce --- /dev/null +++ b/week8/community_contributions/IbrahimSheriff/week8_exercise.ipynb @@ -0,0 +1,348 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Week 8 Exercise: The Price is Right — IbrahimSheriff\n", + "\n", + "## Overview\n", + "This notebook implements a complete Week 8–style pipeline:\n", + "- **SpecialistAgent**: Fine-tuned LLM deployed on Modal\n", + "- **FrontierAgent**: RAG + frontier model (GPT) with ChromaDB\n", + "- **ChromaDB**: Vector store of product embeddings (SentenceTransformer)\n", + "- **Ensemble**: Combined predictor (Specialist + Frontier)\n", + "- **Evaluation**: Official `evaluate()` on test set (average absolute error)\n", + "- **DealAgentFramework + Gradio UI**: Autonomous deal-hunting and table UI\n", + "\n", + "## Prerequisites\n", + "- Modal token set; HuggingFace secret in Modal (`huggingface-secret`)\n", + "- Deploy pricer: from **week8** folder run `uv run modal deploy -m pricer_service`\n", + "- `.env`: `HF_TOKEN`, `OPENAI_API_KEY` (for FrontierAgent)\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Setup: path and environment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "import logging\n", + "from pathlib import Path\n", + "from dotenv import load_dotenv\n", + "\n", + "load_dotenv(override=True)\n", + "os.environ.setdefault(\"PYTHONIOENCODING\", \"utf-8\")\n", + "\n", + "notebook_dir = Path.cwd()\n", + "week8_root = notebook_dir.parent.parent\n", + "if str(week8_root) not in sys.path:\n", + " sys.path.insert(0, str(week8_root))\n", + "\n", + "logging.getLogger().setLevel(logging.INFO)\n", + "print(f\"Week 8 root: {week8_root}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "required = [\"HF_TOKEN\", \"OPENAI_API_KEY\"]\n", + "for var in required:\n", + " status = \"SET\" if os.getenv(var) else \"MISSING\"\n", + " print(f\" {var}: {status}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Load data (Item.from_hub)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from agents.items import Item\n", + "from huggingface_hub import login\n", + "\n", + "hf_token = os.environ.get(\"HF_TOKEN\")\n", + "if hf_token:\n", + " login(token=hf_token, add_to_git_credential=False)\n", + "\n", + "LITE_MODE = False\n", + "username = \"ed-donner\"\n", + "dataset = f\"{username}/items_lite\" if LITE_MODE else f\"{username}/items_full\"\n", + "\n", + "train, val, test = Item.from_hub(dataset)\n", + "print(f\"Loaded {len(train):,} train, {len(val):,} val, {len(test):,} test items\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. ChromaDB vector store\n", + "\n", + "We build the vector store in the current folder so DealAgentFramework can use it later. Use a subset for speed if you prefer." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import chromadb\n", + "from sentence_transformers import SentenceTransformer\n", + "from tqdm.notebook import tqdm\n", + "\n", + "DB = \"products_vectorstore\"\n", + "client = chromadb.PersistentClient(path=DB)\n", + "collection = client.get_or_create_collection(\"products\")\n", + "\n", + "def description(item):\n", + " return (item.summary or item.title or \"\").strip() or getattr(item, \"prompt\", \"\")[:500]\n", + "\n", + "N_DOCS = 20_000\n", + "existing = collection.count()\n", + "print(f\"Collection has {existing} documents.\")\n", + "if existing == 0:\n", + " print(f\"Populating with {N_DOCS} items...\")\n", + " encoder = SentenceTransformer(\"sentence-transformers/all-MiniLM-L6-v2\")\n", + " for i in tqdm(range(0, min(N_DOCS, len(train)), 1000)):\n", + " batch = train[i : i + 1000]\n", + " docs = [description(it) for it in batch]\n", + " vecs = encoder.encode(docs).astype(float).tolist()\n", + " metas = [{\"category\": getattr(it, \"category\", \"\"), \"price\": it.price} for it in batch]\n", + " ids = [f\"doc_{j}\" for j in range(i, i + len(batch))]\n", + " collection.add(ids=ids, documents=docs, embeddings=vecs, metadatas=metas)\n", + " print(f\"Done. Collection count: {collection.count()}\")\n", + "else:\n", + " print(\"Using existing vector store.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. SpecialistAgent (Modal fine-tuned pricer)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from agents.specialist_agent import SpecialistAgent\n", + "\n", + "specialist = SpecialistAgent()\n", + "sample = test[0]\n", + "est = specialist.price(description(sample))\n", + "print(f\"Sample: {sample.title[:50]}...\")\n", + "print(f\"Actual: ${sample.price:.2f}, Specialist: ${est:.2f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. FrontierAgent (RAG + frontier model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from agents.frontier_agent import FrontierAgent\n", + "\n", + "frontier = FrontierAgent(collection)\n", + "est_f = frontier.price(description(sample))\n", + "print(f\"Frontier estimate: ${est_f:.2f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Predictors for evaluator\n", + "\n", + "Each takes an `Item` and returns a price (for `evaluate(predictor, test)`)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def specialist_predictor(item):\n", + " return specialist.price(description(item))\n", + "\n", + "def frontier_predictor(item):\n", + " return frontier.price(description(item))\n", + "\n", + "def ensemble_predictor(item):\n", + " s = specialist.price(description(item))\n", + " f = frontier.price(description(item))\n", + " return 0.5 * s + 0.5 * f" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 7. Run evaluation (Week 8 implementation)\n", + "\n", + "Uses the same `evaluate()` as week8 day2: 200 items, report and charts. This **determines your result**." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from agents.evaluator import evaluate\n", + "\n", + "print(\"=== SpecialistAgent ===\")\n", + "evaluate(specialist_predictor, test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"=== FrontierAgent ===\")\n", + "evaluate(frontier_predictor, test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"=== Ensemble (Specialist + Frontier) ===\")\n", + "evaluate(ensemble_predictor, test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 8. Optional: quick run with fewer items\n", + "\n", + "Uncomment to test with 50 items and 3 workers." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# evaluate(specialist_predictor, test, size=50, workers=3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 9. DealAgentFramework + Gradio UI\n", + "\n", + "Uses the same framework as week8 day5: planning agent, scanner, messenger. The UI shows a table of deals and a button to run one planning cycle. Ensure `products_vectorstore` is in the current directory (we built it above)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import gradio as gr\n", + "from deal_agent_framework import DealAgentFramework\n", + "from agents.deals import Opportunity, Deal\n", + "\n", + "agent_framework = DealAgentFramework()\n", + "agent_framework.init_agents_as_needed()\n", + "print(\"DealAgentFramework ready.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_table(opps):\n", + " if not opps:\n", + " return [[\"No deals yet\", \"—\", \"—\", \"—\", \"—\"]]\n", + " return [\n", + " [\n", + " opp.deal.product_description[:80] + (\".\" if len(opp.deal.product_description) > 80 else \"\"),\n", + " f\"${opp.deal.price:.2f}\",\n", + " f\"${opp.estimate:.2f}\",\n", + " f\"${opp.discount:.2f}\",\n", + " opp.deal.url or \"—\",\n", + " ]\n", + " for opp in opps\n", + " ]\n", + "\n", + "def run_one_cycle():\n", + " agent_framework.run()\n", + " return get_table(agent_framework.memory)\n", + "\n", + "with gr.Blocks(title=\"The Price is Right\", fill_width=True) as ui:\n", + " gr.Markdown(\"
The Price is Right — IbrahimSheriff
\")\n", + " gr.Markdown(\"Deals surfaced by the autonomous agent (Specialist + Frontier + Scanner + Planner).\")\n", + " with gr.Row():\n", + " run_btn = gr.Button(\"Run one planning cycle\")\n", + " tbl = gr.Dataframe(\n", + " headers=[\"Description\", \"Price\", \"Estimate\", \"Discount\", \"URL\"],\n", + " wrap=True,\n", + " row_count=10,\n", + " col_count=5,\n", + " max_height=400,\n", + " )\n", + " run_btn.click(fn=run_one_cycle, outputs=[tbl])\n", + " ui.load(fn=lambda: get_table(agent_framework.memory), outputs=[tbl])\n", + "\n", + "ui.launch(inbrowser=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 7f0c131b4a01b7aaf601d4f277cc40c7836b3e04 Mon Sep 17 00:00:00 2001 From: Hope Ogbons Date: Fri, 20 Mar 2026 14:16:31 +0100 Subject: [PATCH 2/2] Remove Week 7 exercise for IbrahimSheriff from Week 8 branch --- .../IbrahimSheriff/pricer_beat_39.ipynb | 432 ------------------ 1 file changed, 432 deletions(-) delete mode 100644 week7/community_contributions/IbrahimSheriff/pricer_beat_39.ipynb diff --git a/week7/community_contributions/IbrahimSheriff/pricer_beat_39.ipynb b/week7/community_contributions/IbrahimSheriff/pricer_beat_39.ipynb deleted file mode 100644 index 210c2b5617..0000000000 --- a/week7/community_contributions/IbrahimSheriff/pricer_beat_39.ipynb +++ /dev/null @@ -1,432 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Week 7: Beat the 39 — Price prediction (IbrahimSheriff2)\n", - "\n", - "**Goal:** Get average absolute error **below 39.85** (ideally into the lower 30s). \n", - "**Metric:** Same as instructor — average $ error on 250 test samples. \n", - "**HF user:** `sheriff`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install -q pandas scikit-learn datasets transformers torch peft bitsandbytes trl accelerate matplotlib python-dotenv" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import re\n", - "import math\n", - "from pathlib import Path\n", - "from datetime import datetime\n", - "import torch\n", - "import numpy as np\n", - "from tqdm import tqdm\n", - "from dotenv import load_dotenv\n", - "from datasets import load_dataset, Dataset\n", - "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, set_seed\n", - "from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model, PeftModel\n", - "from trl import SFTTrainer, SFTConfig, DataCollatorForCompletionOnlyLM\n", - "import matplotlib.pyplot as plt\n", - "\n", - "load_dotenv(override=True)\n", - "set_seed(42)\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Config (hyperparameters to tune)\n", - "\n", - "**Instructor baseline:** 39.85. Try varying these to get into the lower 30s. \n", - "**Tip:** Data manipulation (filtering, balancing, prompt format) often gives the biggest gain." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "HF_USER = \"sheriff\"\n", - "DATASET_NAME = \"ed-donner/pricer-data\"\n", - "BASE_MODEL = \"meta-llama/Llama-3.2-3B\"\n", - "PROJECT_NAME = \"pricer\"\n", - "\n", - "# --- Hyperparameters (tune these to beat 39.85) ---\n", - "NUM_EPOCHS = 2\n", - "LEARNING_RATE = 2e-4\n", - "PER_DEVICE_TRAIN_BATCH_SIZE = 4\n", - "GRADIENT_ACCUMULATION_STEPS = 4\n", - "MAX_SEQ_LENGTH = 256\n", - "LORA_R = 8\n", - "LORA_ALPHA = 32\n", - "LORA_DROPOUT = 0.05\n", - "WARMUP_RATIO = 0.05\n", - "WEIGHT_DECAY = 0.01\n", - "\n", - "# Optional: subsample for quick experiments (set to None to use full train)\n", - "TRAIN_SUBSAMPLE = None # e.g. 20000\n", - "\n", - "EVAL_SIZE = 250 # same as instructor Tester\n", - "hf_token = os.environ.get(\"HF_TOKEN\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dataset = load_dataset(DATASET_NAME, token=hf_token)\n", - "train_raw = dataset[\"train\"]\n", - "test_raw = dataset[\"test\"]\n", - "print(f\"Train: {len(train_raw)}, Test: {len(test_raw)}\")\n", - "print(\"Columns:\", train_raw.column_names)\n", - "if len(train_raw) > 0:\n", - " ex = train_raw[0]\n", - " print(\"Sample keys:\", list(ex.keys()))\n", - " if \"text\" in ex:\n", - " print(\"Sample text (first 300 chars):\", (ex[\"text\"] or \"\")[:300])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Prepare training data (prompt + completion)\n", - "\n", - "**Data manipulation idea:** You can filter by price range, oversample rare buckets, or clean `text` here to teach the model better." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def build_train_text(example):\n", - " # pricer-data: \"text\" is the prompt; model should complete with \"Price is $X\"\n", - " prompt = (example.get(\"text\") or \"\").strip()\n", - " price = example.get(\"price\")\n", - " if price is None:\n", - " return None\n", - " try:\n", - " p = float(price)\n", - " except (TypeError, ValueError):\n", - " return None\n", - " # Completion format expected at eval (extract_price looks for \"Price is $\")\n", - " completion = f\"Price is ${p:.2f}\"\n", - " return prompt + completion\n", - "\n", - "train_list = []\n", - "for i in range(len(train_raw)):\n", - " row = train_raw[i]\n", - " text = build_train_text(row)\n", - " if text:\n", - " train_list.append({\"text\": text})\n", - "\n", - "if TRAIN_SUBSAMPLE:\n", - " np.random.seed(42)\n", - " idx = np.random.choice(len(train_list), min(TRAIN_SUBSAMPLE, len(train_list)), replace=False)\n", - " train_list = [train_list[i] for i in idx]\n", - "\n", - "train_ds = Dataset.from_list(train_list)\n", - "print(f\"Training samples: {len(train_ds)}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Validation: small subset from train for eval_strategy\n", - "val_size = min(500, len(train_ds) // 10)\n", - "val_ds = train_ds.select(range(val_size))\n", - "train_ds = train_ds.select(range(val_size, len(train_ds)))\n", - "print(f\"Train: {len(train_ds)}, Val: {len(val_ds)}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Model & QLoRA" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bnb_config = BitsAndBytesConfig(\n", - " load_in_4bit=True,\n", - " bnb_4bit_use_double_quant=True,\n", - " bnb_4bit_compute_dtype=torch.bfloat16,\n", - " bnb_4bit_quant_type=\"nf4\",\n", - ")\n", - "\n", - "tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)\n", - "tokenizer.pad_token = tokenizer.eos_token\n", - "tokenizer.padding_side = \"right\"\n", - "\n", - "model = AutoModelForCausalLM.from_pretrained(\n", - " BASE_MODEL,\n", - " quantization_config=bnb_config,\n", - " device_map=\"auto\",\n", - ")\n", - "model.generation_config.pad_token_id = tokenizer.pad_token_id\n", - "model = prepare_model_for_kbit_training(model)\n", - "\n", - "lora_config = LoraConfig(\n", - " r=LORA_R,\n", - " lora_alpha=LORA_ALPHA,\n", - " target_modules=[\"q_proj\", \"v_proj\", \"k_proj\", \"o_proj\"],\n", - " lora_dropout=LORA_DROPOUT,\n", - " bias=\"none\",\n", - " task_type=\"CAUSAL_LM\",\n", - ")\n", - "model = get_peft_model(model, lora_config)\n", - "model.print_trainable_parameters()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Completion-only loss: only tokens after \"Price is $\" are trained\n", - "RESPONSE_TEMPLATE = \"Price is $\"\n", - "collator = DataCollatorForCompletionOnlyLM(RESPONSE_TEMPLATE, tokenizer=tokenizer)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "RUN_NAME = f\"{datetime.now():%Y-%m-%d_%H.%M.%S}\"\n", - "OUTPUT_DIR = f\"{PROJECT_NAME}-{RUN_NAME}\"\n", - "\n", - "training_args = SFTConfig(\n", - " output_dir=OUTPUT_DIR,\n", - " run_name=RUN_NAME,\n", - " dataset_text_field=\"text\",\n", - " max_seq_length=MAX_SEQ_LENGTH,\n", - " num_train_epochs=NUM_EPOCHS,\n", - " per_device_train_batch_size=PER_DEVICE_TRAIN_BATCH_SIZE,\n", - " per_device_eval_batch_size=4,\n", - " gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,\n", - " eval_strategy=\"steps\",\n", - " eval_steps=200,\n", - " learning_rate=LEARNING_RATE,\n", - " lr_scheduler_type=\"cosine\",\n", - " warmup_ratio=WARMUP_RATIO,\n", - " optim=\"paged_adamw_32bit\",\n", - " weight_decay=WEIGHT_DECAY,\n", - " bf16=True,\n", - " logging_steps=50,\n", - " save_strategy=\"steps\",\n", - " save_steps=500,\n", - " save_total_limit=2,\n", - " load_best_model_at_end=True,\n", - " metric_for_best_model=\"eval_loss\",\n", - " greater_is_better=False,\n", - " push_to_hub=False,\n", - ")\n", - "\n", - "trainer = SFTTrainer(\n", - " model=model,\n", - " train_dataset=train_ds,\n", - " eval_dataset=val_ds,\n", - " args=training_args,\n", - " data_collator=collator,\n", - ")\n", - "print(f\"Output dir: {OUTPUT_DIR}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "trainer.train()\n", - "trainer.save_model(OUTPUT_DIR)\n", - "tokenizer.save_pretrained(OUTPUT_DIR)\n", - "print(f\"Saved to {OUTPUT_DIR}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Evaluation — beat 39.85\n", - "\n", - "Load the saved adapter (or set `ADAPTER_PATH` to a previous run) and run the same metric as the instructor: **average absolute error** on 250 test samples." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ADAPTER_PATH = OUTPUT_DIR # or e.g. \"pricer-2025-03-09_12.00.00\"\n", - "\n", - "tokenizer = AutoTokenizer.from_pretrained(ADAPTER_PATH, trust_remote_code=True)\n", - "tokenizer.pad_token = tokenizer.eos_token\n", - "\n", - "base_model = AutoModelForCausalLM.from_pretrained(\n", - " BASE_MODEL,\n", - " quantization_config=bnb_config,\n", - " device_map=\"auto\",\n", - ")\n", - "model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)\n", - "model.eval()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def extract_price(s):\n", - " if \"Price is $\" in s:\n", - " contents = s.split(\"Price is $\")[1].replace(\",\", \"\")\n", - " m = re.search(r\"[-+]?\\d*\\.\\d+|\\d+\", contents)\n", - " return float(m.group()) if m else 0.0\n", - " return 0.0\n", - "\n", - "@torch.no_grad()\n", - "def predict(prompt, max_new_tokens=15):\n", - " inputs = tokenizer(prompt, return_tensors=\"pt\").to(model.device)\n", - " out = model.generate(\n", - " **inputs,\n", - " max_new_tokens=max_new_tokens,\n", - " do_sample=False,\n", - " pad_token_id=tokenizer.eos_token_id,\n", - " )\n", - " full = tokenizer.decode(out[0], skip_special_tokens=True)\n", - " return extract_price(full)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "GREEN, YELLOW, RED, RESET = \"\\033[92m\", \"\\033[93m\", \"\\033[91m\", \"\\033[0m\"\n", - "COLOR_MAP = {\"red\": RED, \"orange\": YELLOW, \"green\": GREEN}\n", - "\n", - "class Tester:\n", - " def __init__(self, predictor, data, title=None, size=250):\n", - " self.predictor = predictor\n", - " self.data = data\n", - " self.title = title or \"Model\"\n", - " self.size = min(size, len(data))\n", - " self.guesses, self.truths, self.errors, self.sles, self.colors = [], [], [], [], []\n", - "\n", - " def color_for(self, error, truth):\n", - " if error < 40 or (truth and error / truth < 0.2):\n", - " return \"green\"\n", - " if error < 80 or (truth and error / truth < 0.4):\n", - " return \"orange\"\n", - " return \"red\"\n", - "\n", - " def run_datapoint(self, i):\n", - " row = self.data[i]\n", - " prompt = (row.get(\"text\") or \"\").strip()\n", - " truth = float(row.get(\"price\", 0))\n", - " guess = self.predictor(prompt)\n", - " error = abs(guess - truth)\n", - " log_err = math.log(truth + 1) - math.log(guess + 1)\n", - " sle = log_err ** 2\n", - " color = self.color_for(error, truth)\n", - " title = (prompt[:50] + \"...\") if len(prompt) > 50 else prompt\n", - " self.guesses.append(guess)\n", - " self.truths.append(truth)\n", - " self.errors.append(error)\n", - " self.sles.append(sle)\n", - " self.colors.append(color)\n", - " print(f\"{COLOR_MAP[color]}{i+1}: Guess: ${guess:,.2f} Truth: ${truth:,.2f} Error: ${error:,.2f} SLE: {sle:.2f} {title}{RESET}\")\n", - "\n", - " def report(self):\n", - " average_error = sum(self.errors) / self.size\n", - " rmsle = math.sqrt(sum(self.sles) / self.size)\n", - " hits = sum(1 for c in self.colors if c == \"green\")\n", - " title = f\"{self.title} Error=${average_error:,.2f} RMSLE={rmsle:.2f} Hits={hits/self.size*100:.1f}%\"\n", - " plt.figure(figsize=(12, 8))\n", - " max_val = max(max(self.truths), max(self.guesses))\n", - " plt.plot([0, max_val], [0, max_val], color=\"deepskyblue\", lw=2, alpha=0.6)\n", - " plt.scatter(self.truths, self.guesses, s=3, c=self.colors)\n", - " plt.xlabel(\"Ground Truth\")\n", - " plt.ylabel(\"Model Estimate\")\n", - " plt.title(title)\n", - " plt.show()\n", - " return average_error\n", - "\n", - " def run(self):\n", - " for i in range(self.size):\n", - " self.run_datapoint(i)\n", - " return self.report()\n", - "\n", - " @classmethod\n", - " def test(cls, predictor, data, title=None, size=250):\n", - " t = cls(predictor, data, title=title, size=size)\n", - " return t.run()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "test_data = test_raw.select(range(EVAL_SIZE))\n", - "avg_error = Tester.test(predict, test_data)\n", - "print(f\"\\n>>> Average error: ${avg_error:,.2f} (instructor baseline: 39.85; goal: < 39, lower 30s)\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.12.12" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -}