diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 58632c3..6312227 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -48,10 +48,10 @@ jobs: run: uv sync --group dev - name: ruff — check - run: uv run ruff check . + run: uv run ruff check app data evaluation frontend pipelines scripts vector_store - name: ruff — format - run: uv run ruff format --check . + run: uv run ruff format --check app data evaluation frontend pipelines scripts vector_store test: name: Test diff --git a/README.md b/README.md index 01af060..26a988d 100644 --- a/README.md +++ b/README.md @@ -3,14 +3,14 @@

Global Retail Intelligence Engine
- - Version + + Version

Advanced RAG pipeline for product search, regional pricing, policies, and secure querying across 11 markets.

- - CI + + CI Python FastAPI diff --git a/app/api/chat.py b/app/api/chat.py index e700232..31c9fb6 100644 --- a/app/api/chat.py +++ b/app/api/chat.py @@ -2,7 +2,7 @@ Chat API: POST /chat - accepts query and optional country, returns RAG response. """ -from fastapi import APIRouter, HTTPException +from fastapi import APIRouter from pydantic import BaseModel, Field from app.rag.pipeline import run_rag diff --git a/app/main.py b/app/main.py index 551b9da..ba2fb4a 100644 --- a/app/main.py +++ b/app/main.py @@ -6,15 +6,16 @@ from dotenv import load_dotenv -# Load .env from project root (parent of app/) -_env_path = Path(__file__).resolve().parent.parent / ".env" -load_dotenv(_env_path) - from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from app.api.chat import router as chat_router +# Load .env from project root (parent of app/) +_env_path = Path(__file__).resolve().parent.parent / ".env" +load_dotenv(_env_path) + + app = FastAPI( title="Global Retail Intelligence Engine", description="RAG API for product search, regional pricing, and policy answers.", diff --git a/app/rag/hybrid_search.py b/app/rag/hybrid_search.py index d03c670..712fa95 100644 --- a/app/rag/hybrid_search.py +++ b/app/rag/hybrid_search.py @@ -152,7 +152,7 @@ def search( bm25_scores = self._bm25.get_scores(tokenized_query) order_bm25 = np.argsort(bm25_scores)[::-1][:vector_k] indices_bm25 = order_bm25.tolist() - scores_bm25_list = bm25_scores.tolist() + _ = bm25_scores.tolist() # scores_bm25_list # Reciprocal rank fusion: score = 1/(rank_vec) + 1/(rank_bm25) rank_vec = {idx: r for r, idx in enumerate(indices_vec, 1)} diff --git a/app/rag/pipeline.py b/app/rag/pipeline.py index f91feeb..b111c36 100644 --- a/app/rag/pipeline.py +++ b/app/rag/pipeline.py @@ -7,7 +7,7 @@ import os import re from dataclasses import dataclass -from typing import Any, List, Optional +from typing import List, Optional from app.guardrails.prompt_injection import detect_prompt_injection from app.guardrails.security_filter import check_restricted_data @@ -83,7 +83,7 @@ def _merge_retrieval_results( for lst in result_lists: for doc in lst: doc_id = doc.get(id_key) or id(doc) - score = doc.get("score", 0.0) + _ = doc.get("score", 0.0) # score if doc_id not in by_id or (doc.get("score") or 0) > ( by_id[doc_id].get("score") or 0 ): diff --git a/scripts/run_indexing.py b/scripts/run_indexing.py index 53f0c4b..bb2cd87 100644 --- a/scripts/run_indexing.py +++ b/scripts/run_indexing.py @@ -5,20 +5,19 @@ import sys from pathlib import Path +from pipelines.indexing.build_vector_index import main as index_main +from pipelines.ingestion.clean_data import main as clean_main + # Add project root project_root = Path(__file__).resolve().parent.parent sys.path.insert(0, str(project_root)) -from pipelines.indexing.build_vector_index import main as index_main -from pipelines.ingestion.clean_data import main as clean_main - def main(): # Ingest Task 1 xlsx if present (adds GH-K-001, UK-W-202 Policy, NL-L-5042, etc.) task_xlsx = project_root / "Task 1_ Global Retail Intelligence Engine Data.xlsx" if task_xlsx.exists(): - from pipelines.ingestion.ingest_task_data import \ - main as ingest_task_main + from pipelines.ingestion.ingest_task_data import main as ingest_task_main ingest_task_main() print("Re-running clean to merge task data...") diff --git a/scripts/run_retrieval.py b/scripts/run_retrieval.py index fe27f37..6bdc225 100644 --- a/scripts/run_retrieval.py +++ b/scripts/run_retrieval.py @@ -7,6 +7,8 @@ import sys from pathlib import Path +from app.rag.hybrid_search import HybridRetriever + # Quiet HuggingFace / sentence-transformers logs when running as CLI os.environ.setdefault("TRANSFORMERS_VERBOSITY", "error") os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1") @@ -14,8 +16,6 @@ project_root = Path(__file__).resolve().parent.parent sys.path.insert(0, str(project_root)) -from app.rag.hybrid_search import HybridRetriever - def main(): query = "How much does the Solar Inverter cost?"