Sheriff10 · Sheriff10 · Mar 9, 2026
diff --git a/week6/community-contributions/IbrahimSheriff/README.md b/week6/community-contributions/IbrahimSheriff/README.md
@@ -0,0 +1,30 @@
+# Support Ticket → Category Classifier
+
+Week 6 community contribution: classify support messages into **Billing**, **Shipping**, **Technical**, **Refund**, or **Other**.
+
+## Dataset
+
+- **support_tickets.csv**: 500 rows, columns `message`, `category`.
+- Synthetic support-style messages, roughly 100 per category.
+- To regenerate: `python generate_dataset.py` (optional).
+
+## Categories
+
+- Billing, Shipping, Technical, Refund, Other
+
+## How to run
+
+1. Ensure `support_tickets.csv` is in this folder (or run `python generate_dataset.py`).
+2. Set `OPENROUTER_API_KEY` in your environment or `.env` (get a key at [openrouter.ai](https://openrouter.ai)).
+3. Open `support_ticket_classifier.ipynb` and run all cells.
+
+Dependencies: `pandas`, `scikit-learn`, `openai`, `python-dotenv` (same as course). LLM calls go via OpenRouter (OpenAI-compatible client).
+
+## What the notebook does
+
+- Loads the CSV and splits 80% train / 20% test (stratified).
+- **Baseline**: keyword rules + majority-class fallback; reports accuracy and weighted F1.
+- **LLM**: OpenRouter (default model `openai/gpt-4o-mini`) with a single prompt (reply with only the category name); same metrics.
+- **Comparison**: prints Baseline vs LLM accuracy and F1.
+
+No fine-tuning in this minimal version.
diff --git a/week6/community-contributions/IbrahimSheriff/generate_dataset.py b/week6/community-contributions/IbrahimSheriff/generate_dataset.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+"""Generate support_tickets.csv with 500 labeled support-style messages (synthetic)."""
+import csv
+import random
+
+random.seed(42)
+
+CATEGORIES = ["Billing", "Shipping", "Technical", "Refund", "Other"]
+
+TEMPLATES = {
+    "Billing": [
+        "I was charged twice for my order #{}. Can you fix this?",
+        "My invoice for {} is wrong. I need a correction.",
+        "Why was I charged ${}? I didn't authorize this.",
+        "I want to cancel my subscription. I'm still being billed.",
+        "Please send me a copy of my invoice for last month.",
+        "There's an unexpected charge of ${} on my account.",
+        "I never received a refund for my cancelled order.",
+        "My payment failed but the amount was deducted from my card.",
+        "I need to update my payment method for my subscription.",
+        "Can you explain the ${} fee on my last statement?",
+        "I was overcharged for my order. Order ID: {}.",
+        "When will my refund be processed? It's been 2 weeks.",
+        "I need to dispute a charge from {}.",
+        "My billing address is wrong. How do I change it?",
+        "Why am I being charged a monthly fee I didn't sign up for?",
+    ],
+    "Shipping": [
+        "My order hasn't arrived yet. It's been 2 weeks. Order #{}.",
+        "Where is my package? Tracking says delivered but I didn't get it.",
+        "I need to change the delivery address for my order.",
+        "The package arrived damaged. What do I do?",
+        "Can I get a tracking number for my shipment?",
+        "My delivery was left at the wrong address.",
+        "When will my order ship? I placed it {} days ago.",
+        "I never received my order. Please resend.",
+        "The shipping address is wrong. Order #{}. Can you update it?",
+        "My package has been stuck in transit for a week.",
+        "I need to cancel my order before it ships.",
+        "Do you ship to international addresses?",
+        "What are the shipping options for my region?",
+        "The courier says they attempted delivery but I was home.",
+        "My order was sent to the wrong city. Order #{}.",
+    ],
+    "Technical": [
+        "I can't log in to my account. It says password invalid.",
+        "The app keeps crashing when I open the settings.",
+        "I forgot my password. How do I reset it?",
+        "The website is not loading on my browser.",
+        "I'm getting an error code {} when I try to checkout.",
+        "My account is locked. How do I unlock it?",
+        "The app won't let me upload my profile photo.",
+        "I need help with two-factor authentication setup.",
+        "The page keeps timing out when I submit the form.",
+        "I can't receive the verification email. I checked spam.",
+        "The mobile app is very slow after the last update.",
+        "I get a 404 error when I click the link in your email.",
+        "My session expires after 1 minute. Is that normal?",
+        "The checkout button doesn't work on my phone.",
+        "I want to delete my account. Where is the option?",
+    ],
+    "Refund": [
+        "I want a refund for my order #{}.",
+        "I need to return this item. How do I get my money back?",
+        "I cancelled my order. When will I get the refund?",
+        "Please process the refund for my returned item.",
+        "I was told I'd get a refund but it hasn't appeared.",
+        "Can I get a full refund? The product was defective.",
+        "I need to return an item and get a refund. Order #{}.",
+        "How long do refunds take to show up on my card?",
+        "I want to cancel and get a refund before shipping.",
+        "I returned the item 2 weeks ago. Where is my refund?",
+        "The refund amount is wrong. I paid ${}.",
+        "I'd like to request a refund for a duplicate charge.",
+        "My refund was declined. Can you tell me why?",
+        "I need a refund to my original payment method.",
+        "Can I get a partial refund? The item was damaged.",
+    ],
+    "Other": [
+        "I have a general question about your services.",
+        "How do I contact the sales team?",
+        "I want to know more about your return policy.",
+        "Can you send me the terms and conditions?",
+        "I need to update my email address on file.",
+        "How do I change my account username?",
+        "I'd like to speak to a manager please.",
+        "Where can I find your privacy policy?",
+        "I have feedback about your customer service.",
+        "What are your business hours for support?",
+        "I need to verify my identity. What documents do you need?",
+        "Can I get a certificate of purchase for my order?",
+        "I want to subscribe to your newsletter.",
+        "How do I refer a friend to your service?",
+        "I have a complaint I'd like to escalate.",
+    ],
+}
+
+def main():
+    rows = [{"message": "", "category": ""}]
+    rows.clear()
+    for category in CATEGORIES:
+        templates = TEMPLATES[category]
+        for i in range(100):
+            t = templates[i % len(templates)]
+            # Inject variation for placeholders
+            if "${}" in t:
+                msg = t.replace("${}", f"${random.randint(20, 200)}")
+            elif "{}" in t:
+                fill = random.randint(1000, 99999) if "#" in t or "Order" in t or "order" in t else random.randint(1, 30)
+                msg = t.format(fill)
+            else:
+                msg = t
+            rows.append({"message": msg, "category": category})
+    random.shuffle(rows)
+    path = "support_tickets.csv"
+    with open(path, "w", newline="", encoding="utf-8") as f:
+        w = csv.DictWriter(f, fieldnames=["message", "category"])
+        w.writeheader()
+        w.writerows(rows)
+    print(f"Wrote {len(rows)} rows to {path}")
+
+if __name__ == "__main__":
+    main()
diff --git a/week6/community-contributions/IbrahimSheriff/support_ticket_classifier.ipynb b/week6/community-contributions/IbrahimSheriff/support_ticket_classifier.ipynb
@@ -0,0 +1,207 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# Support Ticket → Category Classifier\n",
+        "\n",
+        "Classify support messages into: **Billing**, **Shipping**, **Technical**, **Refund**, **Other**.\n",
+        "\n",
+        "- Load 500-row CSV, train/test split (80/20)\n",
+        "- Baseline: keyword rules + majority fallback\n",
+        "- LLM: single prompt, category-only reply\n",
+        "- Compare accuracy and F1"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "import pandas as pd\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "from sklearn.metrics import accuracy_score, f1_score, classification_report\n",
+        "from dotenv import load_dotenv\n",
+        "\n",
+        "load_dotenv(override=True)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Categories (must match CSV labels)\n",
+        "CATEGORIES = [\"Billing\", \"Shipping\", \"Technical\", \"Refund\", \"Other\"]\n",
+        "CSV_PATH = \"support_tickets.csv\"\n",
+        "RANDOM_STATE = 42\n",
+        "TEST_SIZE = 0.2"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Load CSV and split\n",
+        "df = pd.read_csv(CSV_PATH)\n",
+        "df = df.dropna(subset=[\"message\", \"category\"])\n",
+        "# Keep only rows whose category is in CATEGORIES\n",
+        "df = df[df[\"category\"].isin(CATEGORIES)]\n",
+        "\n",
+        "train_df, test_df = train_test_split(\n",
+        "    df, test_size=TEST_SIZE, random_state=RANDOM_STATE, stratify=df[\"category\"]\n",
+        ")\n",
+        "train_messages = train_df[\"message\"].tolist()\n",
+        "train_labels = train_df[\"category\"].tolist()\n",
+        "test_messages = test_df[\"message\"].tolist()\n",
+        "test_labels = test_df[\"category\"].tolist()\n",
+        "\n",
+        "print(f\"Train: {len(train_messages)}, Test: {len(test_messages)}\")\n",
+        "print(\"Category counts (test):\", test_df[\"category\"].value_counts().to_dict())"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Baseline: keyword rules + majority fallback"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from collections import Counter\n",
+        "\n",
+        "# Keyword rules (lowercase match)\n",
+        "KEYWORDS = {\n",
+        "    \"Billing\": [\"charge\", \"charged\", \"invoice\", \"payment\", \"billing\", \"subscription\", \"refund\" ],\n",
+        "    \"Shipping\": [\"delivery\", \"package\", \"tracking\", \"shipped\", \"ship\", \"order\", \"arrived\", \"address\"],\n",
+        "    \"Technical\": [\"login\", \"password\", \"app\", \"error\", \"crash\", \"website\", \"account\", \"reset\", \"code\"],\n",
+        "    \"Refund\": [\"refund\", \"return\", \"money back\", \"cancel\"],\n",
+        "}\n",
+        "\n",
+        "def baseline_predict(message: str) -> str:\n",
+        "    msg_lower = message.lower()\n",
+        "    for cat, words in KEYWORDS.items():\n",
+        "        if any(w in msg_lower for w in words):\n",
+        "            # Refund/Billing: prefer Refund if refund/return/cancel\n",
+        "            if cat == \"Billing\" and any(w in msg_lower for w in [\"refund\", \"return\", \"money back\"]):\n",
+        "                return \"Refund\"\n",
+        "            if cat == \"Refund\":\n",
+        "                return \"Refund\"\n",
+        "            return cat\n",
+        "    return majority_class\n",
+        "\n",
+        "# Majority class from training set\n",
+        "majority_class = Counter(train_labels).most_common(1)[0][0]\n",
+        "baseline_preds = [baseline_predict(m) for m in test_messages]\n",
+        "baseline_acc = accuracy_score(test_labels, baseline_preds)\n",
+        "baseline_f1 = f1_score(test_labels, baseline_preds, average=\"weighted\")\n",
+        "\n",
+        "print(f\"Baseline accuracy: {baseline_acc:.2%}\")\n",
+        "print(f\"Baseline F1 (weighted): {baseline_f1:.4f}\")\n",
+        "print(\"\\nClassification report (baseline):\")\n",
+        "print(classification_report(test_labels, baseline_preds, zero_division=0))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## LLM classifier"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from openai import OpenAI\n",
+        "\n",
+        "# OpenRouter: OpenAI-compatible API at openrouter.ai (use OPENROUTER_API_KEY in .env)\n",
+        "client = OpenAI(\n",
+        "    base_url=\"https://openrouter.ai/api/v1\",\n",
+        "    api_key=os.environ.get(\"OPENROUTER_API_KEY\"),\n",
+        ")\n",
+        "MODEL = \"openai/gpt-4o-mini\"  # or e.g. anthropic/claude-3-haiku, google/gemini-flash-1.5\n",
+        "categories_str = \", \".join(CATEGORIES)\n",
+        "SYSTEM = f\"Classify the support message into exactly one category. Reply with only the category name, nothing else. Categories: {categories_str}.\"\n",
+        "\n",
+        "def llm_predict(message: str) -> str:\n",
+        "    response = client.chat.completions.create(\n",
+        "        model=MODEL,\n",
+        "        messages=[\n",
+        "            {\"role\": \"system\", \"content\": SYSTEM},\n",
+        "            {\"role\": \"user\", \"content\": message},\n",
+        "        ],\n",
+        "        max_tokens=20,\n",
+        "    )\n",
+        "    raw = (response.choices[0].message.content or \"\").strip()\n",
+        "    # Normalize: capitalize like our labels, handle extra text\n",
+        "    for c in CATEGORIES:\n",
+        "        if c.lower() in raw.lower() or raw.lower() == c.lower():\n",
+        "            return c\n",
+        "    return majority_class  # fallback if parse fails"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Run LLM on test set (may take a minute)\n",
+        "llm_preds = [llm_predict(m) for m in test_messages]\n",
+        "llm_acc = accuracy_score(test_labels, llm_preds)\n",
+        "llm_f1 = f1_score(test_labels, llm_preds, average=\"weighted\")\n",
+        "\n",
+        "print(f\"LLM accuracy: {llm_acc:.2%}\")\n",
+        "print(f\"LLM F1 (weighted): {llm_f1:.4f}\")\n",
+        "print(\"\\nClassification report (LLM):\")\n",
+        "print(classification_report(test_labels, llm_preds, zero_division=0))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Comparison"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "print(\"Summary:\")\n",
+        "print(f\"  Baseline: accuracy = {baseline_acc:.2%}, F1 = {baseline_f1:.4f}\")\n",
+        "print(f\"  LLM ({MODEL}): accuracy = {llm_acc:.2%}, F1 = {llm_f1:.4f}\")\n",
+        "print(f\"  Delta: accuracy {llm_acc - baseline_acc:+.2%}, F1 {llm_f1 - baseline_f1:+.4f}\")"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python",
+      "version": "3.12.0"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 4
+}