diff --git a/GEMINI.md b/GEMINI.md new file mode 100644 index 0000000..3562f76 --- /dev/null +++ b/GEMINI.md @@ -0,0 +1,43 @@ +# GEMINI Agent Instructions + +## Terminal Blindness & Output Issues +This environment (Windows CMD) has difficulty with standard output capturing and file redirection from `run_command`. + +### Symptoms +- `run_command` returns no output even for simple commands. +- File redirection (`> output.txt`) often fails to create the file. + +### Solutions +1. **Force Unbuffered Output**: Always run Python scripts with `python -u` or set `PYTHONUNBUFFERED=1`. + ```bash + python -u script_name.py + ``` +2. **Avoid Shell Redirection**: Do not rely on `>` or `2>&1` in the `run_command` string. Instead, write to files *inside* the python script. +3. **Explicit File IO**: If you need to see output, create a script that explicitly `open('output.txt', 'w').write(...)` and then use `view_file` to read it. +4. **Environment Variables**: + - Check if `PYTHONIOENCODING` is set to `utf-8`. + +## Persistent Fix +To permanently fix this for Python: +- Create/Edit `.env` (if loaded) or just remember to use `-u`. + +## Terminal Blindness Status +**Status:** FIXED (Workaround Active) +**Diagnosis Update:** +- The Google Antigravity `run_command` wrapper mechanism is permanently swallowing stdout/execution hooks (`VSCODE_INJECTION` / wrapper artifacts). +- Deleting registry `AutoRun` keys (`HKEY_CURRENT_USER\Software\Microsoft\Command Processor`) did NOT resolve the wrapper's blindness. +- However, standard execution is perfectly intact inside an isolated, long-running background `cmd.exe` shell. + +## Fix / Workaround +To execute commands without blindness: +1. Initialize a single background `cmd.exe` terminal via `run_command` (e.g., CommandID: `26c75e13...`). +2. Use `send_command_input` to explicitly send commands (like `python -u script.py\n`) to this background terminal. +3. Observe the `command_status` output. This completely restores standard output viewing and avoids the fatal wrapper bugs! + +## Preferences +- **Execution Policy:** ALWAYS use `send_command_input` to a dedicated background terminal instead of direct `run_command` wrappers for scripts generating output. + +## Preferences +- **Preferred Shell:** Bash (if available) to avoid CMD blindness. +- **Terminal Check:** Always verify command execution via file side-effects (e.g., creating a status file). + diff --git a/pipelines/professors/hierarchical_summarization/clustering.py b/pipelines/professors/hierarchical_summarization/clustering.py index d7405c7..46e3fbe 100644 --- a/pipelines/professors/hierarchical_summarization/clustering.py +++ b/pipelines/professors/hierarchical_summarization/clustering.py @@ -50,18 +50,24 @@ def cluster_reviews( # Group reviews by cluster clusters: Dict[int, List[ProcessedReview]] = {} - noise_count = 0 + noise_reviews: List[ProcessedReview] = [] for review, label in zip(reviews, cluster_labels): if label == -1: # Noise - noise_count += 1 + noise_reviews.append(review) continue if label not in clusters: clusters[label] = [] clusters[label].append(review) - print(f"Found {len(clusters)} clusters, {noise_count} noise points") + print(f"Found {len(clusters)} clusters, {len(noise_reviews)} noise points") + + # Fallback: if no clusters found, put all reviews (including noise) into cluster 0 + # This ensures we still generate summaries for small review sets + if not clusters and noise_reviews: + clusters[0] = noise_reviews + print(f" -> Fallback: using all {len(noise_reviews)} reviews as single cluster") return clusters diff --git a/pipelines/professors/hierarchical_summarization/pipeline.py b/pipelines/professors/hierarchical_summarization/pipeline.py index 9f92983..11cc5da 100644 --- a/pipelines/professors/hierarchical_summarization/pipeline.py +++ b/pipelines/professors/hierarchical_summarization/pipeline.py @@ -119,8 +119,16 @@ def process_professor_reviews( processed_reviews, embeddings ) + # Calculate actual review counts per course (before clustering drops noise) + review_counts_by_course: Dict[str, int] = {} + for review in processed_reviews: + course = review.course_code or "UNKNOWN" + review_counts_by_course[course] = review_counts_by_course.get(course, 0) + 1 + # Step 4: Generate course summaries - course_summaries = self._generate_course_summaries(course_clusters) + course_summaries = self._generate_course_summaries( + course_clusters, review_counts_by_course + ) # Step 5: Generate professor summary professor_summary = self._generate_professor_summary( @@ -133,7 +141,9 @@ def process_professor_reviews( return professor_summary def _generate_course_summaries( - self, course_clusters: Dict[str, Dict[int, List[ProcessedReview]]] + self, + course_clusters: Dict[str, Dict[int, List[ProcessedReview]]], + review_counts_by_course: Dict[str, int], ) -> List[CourseSummary]: """Generate structured summaries for each course""" course_summaries = [] @@ -150,10 +160,13 @@ def _generate_course_summaries( clusters, cluster_types ) + # Use actual review count for this course (not just clustered reviews) + actual_review_count = review_counts_by_course.get(course_code, 0) + # Build structured course summary course_summary = CourseSummary( course=course_code, - total_reviews=sum(len(reviews) for reviews in clusters.values()), + total_reviews=actual_review_count, ) # Organize summaries by type diff --git a/requirements.txt b/requirements.txt index 7648c68..4e793f2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -36,7 +36,9 @@ psycopg2==2.9.11 pydantic==2.12.5 pydantic-core==2.41.5 python-dotenv==1.2.1 +pydantic-settings pyyaml==6.0.3 +pywebpush regex==2025.11.3 requests==2.32.5 safetensors==0.7.0 @@ -48,6 +50,7 @@ setuptools==80.9.0 slowapi==0.1.9 soupsieve==2.8.1 sqlalchemy==2.0.45 +supertokens_python starlette==0.50.0 sympy==1.14.0 threadpoolctl==3.6.0 @@ -61,3 +64,5 @@ typing-inspection==0.4.2 urllib3==2.6.2 uvicorn==0.40.0 yarl==1.22.0 +novu==1.13.0 +redis==5.0.0 diff --git a/src/aggiermp/api/main.py b/src/aggiermp/api/main.py index ac07f64..d7131a6 100644 --- a/src/aggiermp/api/main.py +++ b/src/aggiermp/api/main.py @@ -308,18 +308,73 @@ def _build_cors_origins() -> List[str]: for loc in ("http://localhost:3000", "http://127.0.0.1:3000"): if loc not in origins: origins.append(loc) + # Never use ["*"] with allow_credentials=True β€” browsers will block it. return origins if origins else ["http://localhost:3000"] +def _normalize_days_of_week(value: Any) -> list[str]: + """ + Normalize meeting days into a list of day tokens. + + DB `section_meetings.days_of_week` can be stored as either: + - an array (already iterable as list[str]) + - a compact string like "MWF" or "TTh" + - a comma/space separated string like "M,W,F" or "Tue Thu" + """ + if value is None: + return [] + if isinstance(value, list): + return [str(v).strip() for v in value if str(v).strip()] + if isinstance(value, tuple): + return [str(v).strip() for v in value if str(v).strip()] + + s = str(value).strip() + if not s: + return [] + + # Normalize common separators to spaces + s = s.replace(",", " ").replace("/", " ").replace(" ", " ") + + # If it's already tokens like "Mon Wed Fri" + parts = [p for p in s.split() if p] + if len(parts) > 1: + return parts + + # Compact formats: "MWF", "TR", "TTh", "MTWRF" + # Strategy: scan left-to-right, recognize "Th" as a single token. + out: list[str] = [] + i = 0 + while i < len(s): + ch = s[i] + # Handle "Th" (Thursday) in compact strings + if ch in ("T", "t") and i + 1 < len(s) and s[i + 1] in ("h", "H"): + out.append("Th") + i += 2 + continue + # Single-letter tokens: M T W R F S U (and lowercase) + if ch.isalpha(): + out.append(ch.upper()) + i += 1 + return out + + _cors_origins = _build_cors_origins() -_cors_kwargs: Dict[str, Any] = dict( +_cors_kwargs = dict( allow_origins=_cors_origins, allow_credentials=True, allow_methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"], allow_headers=["Content-Type"] + get_all_cors_headers(), ) +# Vercel preview deployments use unique subdomains; regex allows them without +# listing each preview URL in env. if getattr(settings, "cors_allow_vercel_previews", False): - _cors_kwargs["allow_origin_regex"] = r"https://.*\.vercel\.app" + _cors_kwargs["allow_origin_regex"] = ( + r"^https://.*\.vercel\.app$|^http://localhost(:\d+)?$|^http://127\.0\.0\.1(:\d+)?$" + ) +else: + _cors_kwargs["allow_origin_regex"] = ( + r"^http://localhost(:\d+)?$|^http://127\.0\.0\.1(:\d+)?$" + ) app.add_middleware(CORSMiddleware, **_cors_kwargs) @@ -844,7 +899,7 @@ async def get_sections( meetings_by_section[row.section_id] = [] meetings_by_section[row.section_id].append( { - "daysOfWeek": row.days_of_week or [], + "daysOfWeek": _normalize_days_of_week(row.days_of_week), "beginTime": row.begin_time, "endTime": row.end_time, "startDate": row.start_date, @@ -1043,7 +1098,7 @@ async def get_sections_by_term( meetings_by_section[row.section_id] = [] meetings_by_section[row.section_id].append( { - "daysOfWeek": row.days_of_week or [], + "daysOfWeek": _normalize_days_of_week(row.days_of_week), "beginTime": row.begin_time, "endTime": row.end_time, "startDate": row.start_date, @@ -1265,7 +1320,7 @@ async def get_sections_by_term_and_course( meetings_by_section[row.section_id] = [] meetings_by_section[row.section_id].append( { - "daysOfWeek": row.days_of_week or [], + "daysOfWeek": _normalize_days_of_week(row.days_of_week), "beginTime": row.begin_time, "endTime": row.end_time, "startDate": row.start_date, @@ -1498,6 +1553,7 @@ async def get_course_professors_details( - otherCourseSummaries: summaries for other courses they teach """ import re + from difflib import SequenceMatcher from aggiermp.database.base import ( SectionDB, SectionInstructorDB, @@ -1505,7 +1561,7 @@ async def get_course_professors_details( ProfessorSummaryNewDB, GpaDataDB, ) - from sqlalchemy import func + from sqlalchemy import func, or_ try: # Parse course_code (e.g., "CSCE121" -> dept="CSCE", course_num="121") @@ -1554,23 +1610,95 @@ async def get_course_professors_details( # Step 2: Match instructor names to professor IDs and get summaries result_professors = [] + def normalize_name_part(value: str) -> str: + return re.sub(r"[^a-z]", "", value.lower()) + + def parse_name(full_name: str) -> tuple[str, List[str]]: + raw_parts = [p for p in full_name.strip().split() if p] + if not raw_parts: + return ("", []) + # Drop common suffixes and punctuation-only tokens. + suffixes = {"jr", "sr", "ii", "iii", "iv", "phd", "md"} + parts = [ + normalize_name_part(p) + for p in raw_parts + if normalize_name_part(p) and normalize_name_part(p) not in suffixes + ] + if not parts: + return ("", []) + first = parts[0] + surname_tokens = parts[1:] if len(parts) > 1 else [parts[-1]] + return (first, surname_tokens) + for instructor_name, sections in instructor_sections.items(): - name_parts = instructor_name.strip().split() - if not name_parts: + first_name, surname_tokens = parse_name(instructor_name) + if not surname_tokens: continue - last_name = name_parts[-1] if name_parts else instructor_name - first_name = name_parts[0] if len(name_parts) > 1 else "" - - # Query professor - prof_query = db.query(ProfessorDB).filter( - ProfessorDB.last_name.ilike(f"%{last_name}%") - ) + # Query broad candidate pool, then score in Python for robust matching. + surname_filters = [ + ProfessorDB.last_name.ilike(f"{token[:1]}%") + for token in surname_tokens + if token + ] + prof_query = db.query(ProfessorDB) + if surname_filters: + prof_query = prof_query.filter(or_(*surname_filters)) if first_name: prof_query = prof_query.filter( - ProfessorDB.first_name.ilike(f"{first_name}%") + ProfessorDB.first_name.ilike(f"{first_name[:1]}%") ) - professor = prof_query.first() + candidates = prof_query.limit(200).all() + + professor = None + best_score = 0.0 + for candidate in candidates: + cand_first = normalize_name_part(candidate.first_name or "") + cand_last = normalize_name_part(candidate.last_name or "") + if not cand_last: + continue + + last_score = max( + ( + SequenceMatcher(None, surname_token, cand_last).ratio() + for surname_token in surname_tokens + ), + default=0.0, + ) + first_score = ( + SequenceMatcher(None, first_name, cand_first).ratio() + if first_name and cand_first + else 0.0 + ) + bonus = 0.0 + if first_name and cand_first and cand_first.startswith(first_name[:1]): + bonus += 0.05 + if any( + cand_last.startswith(surname_token[:3]) + for surname_token in surname_tokens + if len(surname_token) >= 3 + ): + bonus += 0.05 + + review_signal = min(float(candidate.num_ratings or 0), 50.0) / 50.0 + score = (last_score * 0.72) + (first_score * 0.2) + bonus + ( + review_signal * 0.08 + ) + if score > best_score: + best_score = score + professor = candidate + + # Fallback to stricter lookup when candidate pool is empty. + if not professor: + fallback_token = surname_tokens[-1] + fallback_query = db.query(ProfessorDB).filter( + ProfessorDB.last_name.ilike(f"%{fallback_token}%") + ) + if first_name: + fallback_query = fallback_query.filter( + ProfessorDB.first_name.ilike(f"{first_name}%") + ) + professor = fallback_query.first() # Get overall summary for totalReviews overall_summary = None @@ -1618,6 +1746,7 @@ def format_course_summary(s: Any) -> Dict[str, Any]: "policies": s.policies, "other": s.other, "reviewCount": s.total_reviews, + "confidence": s.confidence, } for cs in all_course_summaries: @@ -1625,10 +1754,14 @@ def format_course_summary(s: Any) -> Dict[str, Any]: # This is the target course prof_data["courseSummary"] = format_course_summary(cs) else: - # Other courses - prof_data["otherCourseSummaries"].append( - format_course_summary(cs) - ) + prof_data["otherCourseSummaries"].append(format_course_summary(cs)) + + # Keep other-course list small + high-signal + prof_data["otherCourseSummaries"] = sorted( + prof_data["otherCourseSummaries"], + key=lambda x: x.get("reviewCount", 0), + reverse=True, + )[:5] # Overall summary (aggregated across all courses) if overall_summary: @@ -1638,9 +1771,19 @@ def format_course_summary(s: Any) -> Dict[str, Any]: "complaints": overall_summary.complaints or [], "consistency": overall_summary.consistency, "reviewCount": overall_summary.total_reviews, + "confidence": overall_summary.confidence, } # Get grade distribution for this course + professor + gpa_last_names: List[str] = [] + if professor and professor.last_name: + gpa_last_names.append(str(professor.last_name)) + gpa_last_names.extend(surname_tokens) + gpa_last_names = [n for n in {name.strip() for name in gpa_last_names} if n] + + gpa_last_name_filters = [ + GpaDataDB.professor.ilike(f"%{name}%") for name in gpa_last_names + ] gpa_rows = ( db.query( func.avg(GpaDataDB.gpa).label("avg_gpa"), @@ -1654,7 +1797,9 @@ def format_course_summary(s: Any) -> Dict[str, Any]: .filter( GpaDataDB.dept == dept, GpaDataDB.course_number == course_num, - GpaDataDB.professor.ilike(f"%{last_name}%"), + or_(*gpa_last_name_filters) + if gpa_last_name_filters + else GpaDataDB.professor.isnot(None), ) .first() ) diff --git a/src/aggiermp/api/routers/discover.py b/src/aggiermp/api/routers/discover.py index de17713..9e5bc08 100644 --- a/src/aggiermp/api/routers/discover.py +++ b/src/aggiermp/api/routers/discover.py @@ -7,6 +7,7 @@ from pydantic import BaseModel from fastapi import Request import math +import json router: APIRouter = APIRouter(prefix="/discover") @@ -85,6 +86,311 @@ def calculate_confidence_score(total_reviews: int, gpa_student_count: int) -> fl return min(1.0, math.log10(total_data_points + 1) / 3.0) + +class TermDepartment(BaseModel): + code: str + name: str + + +class ScheduleBlockInput(BaseModel): + days: List[str] + start: str + end: str + + +class DiscoverFitRequest(BaseModel): + course_keys: List[str] + schedule_blocks: List[ScheduleBlockInput] + campus: Optional[str] = None + + +class DiscoverFitCourseMatch(BaseModel): + course_key: str + dept: str + course_number: str + course_title: str + compatible_section_count: int + sample_section_id: Optional[str] = None + sample_crn: Optional[str] = None + + +class UccFitCandidatesRequest(BaseModel): + categories: List[str] + campus: Optional[str] = None + + +class DiscoverFitCandidateCourse(BaseModel): + dept: str + courseNumber: str + courseTitle: str + easinessScore: float = 0.0 + + +@router.get( + "/{term_code}/departments", + summary="/discover/{term_code}/departments", +) +@cached(TTL_WEEK) +async def discover_term_departments( + request: Request, term_code: str, db: Session = Depends(get_session) +) -> List[TermDepartment]: + """ + Get all distinct departments that have sections in a given term. + Returns the dept code and description so they exactly match the discover endpoint filter. + """ + try: + query = text(""" + SELECT DISTINCT dept, dept_desc + FROM sections + WHERE term_code = :term_code + AND dept IS NOT NULL + ORDER BY dept + """) + result = db.execute(query, {"term_code": term_code}) + return [ + TermDepartment(code=row.dept, name=row.dept_desc or row.dept) + for row in result + ] + except Exception as e: + raise HTTPException(status_code=500, detail=f"Database error: {str(e)}") + + +@router.post( + "/{term_code}/ucc-fit-candidates", + summary="/discover/{term_code}/ucc-fit-candidates", +) +async def discover_ucc_fit_candidates( + request: Request, + term_code: str, + payload: UccFitCandidatesRequest, + db: Session = Depends(get_session), +) -> List[DiscoverFitCandidateCourse]: + """ + Lightweight UCC candidate list for Fit My Schedule. + This intentionally avoids professor joins so it remains reliable and fast. + """ + try: + categories = [c for c in payload.categories if c and c.strip()] + if not categories: + return [] + + query = text( + """ + SELECT DISTINCT + s.dept, + s.course_number, + s.course_title + FROM sections s + JOIN section_attributes_detailed sad + ON sad.section_id = s.id + WHERE s.term_code = :term_code + AND sad.attribute_desc = ANY(:categories) + AND (:campus IS NULL OR s.campus = :campus) + AND s.dept IS NOT NULL + AND s.course_number IS NOT NULL + ORDER BY s.dept, s.course_number + """ + ) + + rows = db.execute( + query, + { + "term_code": term_code, + "categories": categories, + "campus": payload.campus, + }, + ).fetchall() + + # Some terms are missing section_attributes_detailed rows. + # Fallback: infer UCC eligibility by course code from any term where + # attribute mappings exist, then apply to the selected term's sections. + if not rows: + fallback_query = text( + """ + SELECT DISTINCT + s.dept, + s.course_number, + s.course_title + FROM sections s + WHERE s.term_code = :term_code + AND (:campus IS NULL OR s.campus = :campus) + AND s.dept IS NOT NULL + AND s.course_number IS NOT NULL + AND EXISTS ( + SELECT 1 + FROM sections sx + JOIN section_attributes_detailed sad + ON sad.section_id = sx.id + WHERE sx.dept = s.dept + AND sx.course_number = s.course_number + AND sad.attribute_desc = ANY(:categories) + ) + ORDER BY s.dept, s.course_number + """ + ) + rows = db.execute( + fallback_query, + { + "term_code": term_code, + "categories": categories, + "campus": payload.campus, + }, + ).fetchall() + + return [ + DiscoverFitCandidateCourse( + dept=str(r.dept), + courseNumber=str(r.course_number), + courseTitle=str(r.course_title or f"{r.dept} {r.course_number}"), + easinessScore=0.0, + ) + for r in rows + ] + except Exception as e: + raise HTTPException(status_code=500, detail=f"Database error: {str(e)}") + + +@router.post( + "/{term_code}/fit-sections", + summary="/discover/{term_code}/fit-sections", +) +async def discover_fit_sections( + request: Request, + term_code: str, + payload: DiscoverFitRequest, + db: Session = Depends(get_session), +) -> List[DiscoverFitCourseMatch]: + """ + Fast schedule-fit check for a batch of candidate courses. + Uses SQL-side conflict detection to avoid N+1 section requests. + """ + try: + if not payload.course_keys: + return [] + if not payload.schedule_blocks: + return [] + + schedule_json = json.dumps( + [ + { + "days": b.days, + "start_t": b.start, + "end_t": b.end, + } + for b in payload.schedule_blocks + ] + ) + + query = text( + """ + WITH user_schedule AS ( + SELECT + unnest(days) AS day, + start_t::time AS start_t, + end_t::time AS end_t + FROM jsonb_to_recordset(CAST(:schedule_json AS jsonb)) + AS x(days text[], start_t text, end_t text) + ), + candidate_sections AS ( + SELECT + s.id, + s.crn, + s.dept, + s.course_number, + s.course_title + FROM sections s + WHERE s.term_code = :term_code + AND (s.dept || '-' || s.course_number) = ANY(:course_keys) + AND (:campus IS NULL OR s.campus = :campus) + ), + meeting_times AS ( + SELECT + cs.id AS section_id, + cs.dept, + cs.course_number, + cs.course_title, + cs.crn, + sm.days_of_week, + CASE + WHEN sm.begin_time IS NULL OR TRIM(sm.begin_time) = '' THEN NULL + WHEN TRIM(sm.begin_time) ~* '(AM|PM)$' + THEN TO_TIMESTAMP(TRIM(sm.begin_time), 'HH12:MI AM')::time + WHEN TRIM(sm.begin_time) ~ '^[0-9]{2}:[0-9]{2}$' + THEN TRIM(sm.begin_time)::time + ELSE NULL + END AS section_start, + CASE + WHEN sm.end_time IS NULL OR TRIM(sm.end_time) = '' THEN NULL + WHEN TRIM(sm.end_time) ~* '(AM|PM)$' + THEN TO_TIMESTAMP(TRIM(sm.end_time), 'HH12:MI AM')::time + WHEN TRIM(sm.end_time) ~ '^[0-9]{2}:[0-9]{2}$' + THEN TRIM(sm.end_time)::time + ELSE NULL + END AS section_end + FROM candidate_sections cs + JOIN section_meetings sm ON sm.section_id = cs.id + ), + conflicting_sections AS ( + SELECT DISTINCT mt.section_id + FROM meeting_times mt + JOIN user_schedule us + ON us.day = ANY(mt.days_of_week) + WHERE mt.section_start IS NOT NULL + AND mt.section_end IS NOT NULL + AND mt.section_start < us.end_t + AND mt.section_end > us.start_t + ), + compatible_sections AS ( + SELECT DISTINCT + cs.id, + cs.crn, + cs.dept, + cs.course_number, + cs.course_title + FROM candidate_sections cs + LEFT JOIN conflicting_sections cf ON cf.section_id = cs.id + WHERE cf.section_id IS NULL + ) + SELECT + (dept || '-' || course_number) AS course_key, + dept, + course_number, + MIN(course_title) AS course_title, + COUNT(*)::int AS compatible_section_count, + MIN(id) AS sample_section_id, + MIN(crn) AS sample_crn + FROM compatible_sections + GROUP BY dept, course_number + ORDER BY dept, course_number + """ + ) + + rows = db.execute( + query, + { + "term_code": term_code, + "course_keys": payload.course_keys, + "schedule_json": schedule_json, + "campus": payload.campus, + }, + ).fetchall() + + return [ + DiscoverFitCourseMatch( + course_key=str(r.course_key), + dept=str(r.dept), + course_number=str(r.course_number), + course_title=str(r.course_title), + compatible_section_count=int(r.compatible_section_count or 0), + sample_section_id=str(r.sample_section_id) if r.sample_section_id else None, + sample_crn=str(r.sample_crn) if r.sample_crn else None, + ) + for r in rows + ] + except Exception as e: + raise HTTPException(status_code=500, detail=f"Database error: {str(e)}") + + @router.get( "/{term_code}/ucc", summary="/discover/{term_code}/ucc", @@ -221,3 +527,139 @@ async def discover_ucc_courses( except Exception as e: raise HTTPException(status_code=500, detail=f"Database error: {str(e)}") + + +@router.get( + "/{term_code}/{dept_code}", + summary="/discover/{term_code}/{dept_code}", +) +async def discover_dept_courses( + request: Request, + term_code: str, + dept_code: str, + campus: Optional[str] = None, + include_graduate: bool = False, + db: Session = Depends(get_session), +) -> List[UccCourseDiscovery]: + """ + Get all courses for a specific department and term, ordered by easiness score. + + Query params: + - campus: filter by campus name (partial match, e.g. "College Station") + - include_graduate: include 600+ level courses (default: False) + """ + try: + # Build dynamic WHERE clauses + extra_filters = [] + params: Dict[str, Any] = {"term_code": term_code, "dept_code": dept_code} + + # Always hide 491 (research/independent study) + extra_filters.append("s.course_number != '491'") + + # Graduate courses are 600+ + if not include_graduate: + extra_filters.append("s.course_number::text < '600'") + + # Campus filter (partial case-insensitive match) + if campus: + extra_filters.append("s.campus ILIKE :campus") + params["campus"] = f"%{campus}%" + + where_extra = "" + if extra_filters: + where_extra = " AND " + " AND ".join(extra_filters) + + query = text(f""" + WITH gpa_agg AS ( + SELECT + dept, + course_number, + professor, + AVG(gpa) as avg_gpa, + SUM(grade_a + grade_b)::float / NULLIF(SUM(total_students), 0) * 100 as percent_ab, + SUM(total_students) as total_students + FROM gpa_data + GROUP BY dept, course_number, professor + ) + SELECT DISTINCT + s.dept, + s.course_number, + s.course_title, + s.credit_hours, + p.id as professor_id, + p.first_name, + p.last_name, + COALESCE(psn.avg_rating, p.avg_rating, 0) as avg_rating, + COALESCE(psn.avg_difficulty, p.avg_difficulty, 0) as avg_difficulty, + COALESCE(psn.total_reviews, p.num_ratings, 0) as total_reviews, + psn.common_tags, + g.avg_gpa, + g.percent_ab, + g.total_students as gpa_student_count + FROM sections s + JOIN section_instructors si ON s.id = si.section_id + JOIN professors p ON ( + si.instructor_name ILIKE p.first_name || '%' + AND si.instructor_name ILIKE '%' || p.last_name + ) + LEFT JOIN professor_summaries_new psn ON ( + p.id = psn.professor_id + AND psn.course_code = s.dept || s.course_number + ) + LEFT JOIN gpa_agg g ON ( + g.dept = s.dept + AND g.course_number = s.course_number + AND g.professor ILIKE p.last_name || '%' + ) + WHERE s.term_code = :term_code + AND s.dept = :dept_code + {where_extra} + """) + + result = db.execute(query, params) + + courses: List[Dict[str, Any]] = [] + for row in result: + tags = row.common_tags[:5] if row.common_tags else [] + + avg_gpa = float(row.avg_gpa) if row.avg_gpa else None + avg_difficulty = float(row.avg_difficulty) if row.avg_difficulty else 0.0 + avg_rating = float(row.avg_rating) if row.avg_rating else 0.0 + total_reviews = row.total_reviews or 0 + gpa_student_count = row.gpa_student_count or 0 + + easiness = calculate_easiness_score(avg_gpa or 0, avg_difficulty, avg_rating) + confidence = calculate_confidence_score(total_reviews, gpa_student_count) + + courses.append( + { + "dept": row.dept, + "courseNumber": row.course_number, + "courseTitle": row.course_title, + "credits": row.credit_hours, + "easinessScore": round(easiness * 100, 1), + "confidenceScore": round(confidence * 100, 1), + "professor": { + "id": row.professor_id, + "firstName": row.first_name, + "lastName": row.last_name, + "avgRating": round(avg_rating, 2), + "avgDifficulty": round(avg_difficulty, 2), + "totalRatings": total_reviews, + "tags": tags, + "avgGpa": round(avg_gpa, 2) if avg_gpa else None, + "percentAB": round(float(row.percent_ab), 1) + if row.percent_ab + else None, + "gpaStudentCount": gpa_student_count, + }, + } + ) + + courses.sort(key=lambda x: x["easinessScore"], reverse=True) + courses = courses[:100] + + return [UccCourseDiscovery(**c) for c in courses] + + except Exception as e: + raise HTTPException(status_code=500, detail=f"Database error: {str(e)}") diff --git a/src/aggiermp/api/routers/users.py b/src/aggiermp/api/routers/users.py index da6fe81..4a774d3 100644 --- a/src/aggiermp/api/routers/users.py +++ b/src/aggiermp/api/routers/users.py @@ -1,4 +1,4 @@ -ο»Ώfrom typing import List, Optional, Union +from typing import List, Optional, Union from fastapi import APIRouter, Depends, HTTPException from sqlalchemy import text from sqlalchemy.orm import Session @@ -21,6 +21,8 @@ class PushSubscriptionRequest(BaseModel): endpoint: str p256dh: str auth: str + device_name: Optional[str] = None + user_agent: Optional[str] = None model_config = ConfigDict(populate_by_name=True) @@ -37,6 +39,16 @@ class CreateTrackingRequest(BaseModel): class PushSubscriptionDeleteRequest(BaseModel): endpoint: str + +class PushSubscriptionDevice(BaseModel): + id: str + endpoint: str + device_name: Optional[str] = None + user_agent: Optional[str] = None + created_at: Optional[str] = None + last_seen_at: Optional[str] = None + + @router.post("/push-subscription") async def save_push_subscription( request: PushSubscriptionRequest, @@ -62,26 +74,38 @@ async def save_push_subscription( # Update existing subscription's keys (maybe they rotated) update_query = text(""" UPDATE user_subscriptions - SET p256dh = :p256dh, auth = :auth + SET p256dh = :p256dh, + auth = :auth, + device_name = :device_name, + user_agent = :user_agent, + last_seen_at = NOW() WHERE id = :id """) db.execute(update_query, { "id": existing.id, "p256dh": request.p256dh, - "auth": request.auth + "auth": request.auth, + "device_name": request.device_name, + "user_agent": request.user_agent, }) else: # Insert new subscription insert_query = text(""" - INSERT INTO user_subscriptions (id, user_id, endpoint, p256dh, auth) - VALUES (:id, :user_id, :endpoint, :p256dh, :auth) + INSERT INTO user_subscriptions ( + id, user_id, endpoint, p256dh, auth, device_name, user_agent, last_seen_at + ) + VALUES ( + :id, :user_id, :endpoint, :p256dh, :auth, :device_name, :user_agent, NOW() + ) """) db.execute(insert_query, { "id": str(uuid.uuid4()), "user_id": user_id, "endpoint": request.endpoint, "p256dh": request.p256dh, - "auth": request.auth + "auth": request.auth, + "device_name": request.device_name, + "user_agent": request.user_agent, }) db.commit() @@ -132,6 +156,42 @@ async def delete_push_subscription( ) +@router.get("/push-subscriptions", response_model=List[PushSubscriptionDevice]) +async def list_push_subscriptions( + session: SessionContainer = Depends(verify_session()), + db: Session = Depends(get_session), +): + """List current user's registered push subscription endpoints.""" + user_id = session.get_user_id() + + try: + query = text( + """ + SELECT id, endpoint, device_name, user_agent, created_at, last_seen_at + FROM user_subscriptions + WHERE user_id = :user_id + ORDER BY COALESCE(last_seen_at, created_at) DESC + """ + ) + result = db.execute(query, {"user_id": user_id}) + rows = result.fetchall() + + return [ + PushSubscriptionDevice( + id=str(row.id), + endpoint=row.endpoint, + device_name=row.device_name, + user_agent=row.user_agent, + created_at=row.created_at.isoformat() if row.created_at else None, + last_seen_at=row.last_seen_at.isoformat() if row.last_seen_at else None, + ) + for row in rows + ] + + except Exception as e: + raise HTTPException(status_code=500, detail=f"Database error: {str(e)}") + + @router.post("/schedules", response_model=UserSchedule) async def create_schedule( request: CreateScheduleRequest, diff --git a/src/aggiermp/collectors/section_watcher.py b/src/aggiermp/collectors/section_watcher.py new file mode 100644 index 0000000..99f3cde --- /dev/null +++ b/src/aggiermp/collectors/section_watcher.py @@ -0,0 +1,361 @@ +""" +section_watcher.py +================== +Optimised script that checks all actively-watched sections and sends a +web-push notification to each user whose watched section just opened. + +Design goals +------------ +* Single DB round-trip to get every watched section + its current open + status in one query (no N+1 queries). +* Parallel push delivery via a thread pool so network latency of one + subscription never blocks others. +* "Already notified" guard: a `last_notified_open_at` column on + `user_tracked_sections` prevents repeated spam when a section stays + open across multiple runs. + +Usage +----- + # Run once (e.g. from a cron job / scheduler): + python -m aggiermp.collectors.section_watcher + + # Or import and call programmatically: + from aggiermp.collectors.section_watcher import run_watcher + run_watcher() +""" + +from __future__ import annotations + +import json +import logging +import os +import time +from concurrent.futures import ThreadPoolExecutor, as_completed +from datetime import datetime +from typing import Any, Dict, List + +from dotenv import load_dotenv +from pywebpush import WebPushException, webpush +from sqlalchemy import text + +load_dotenv() + +# --------------------------------------------------------------------------- +# Logging +# --------------------------------------------------------------------------- +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", +) +logger = logging.getLogger("section_watcher") + +# --------------------------------------------------------------------------- +# Config +# --------------------------------------------------------------------------- +VAPID_PRIVATE_KEY: str = os.getenv("VAPID_PRIVATE_KEY", "") +VAPID_CONTACT: str = os.getenv("VAPID_CONTACT_EMAIL", "mailto:support@AggieSBP.com") +MAX_PUSH_WORKERS: int = 20 # concurrent push threads + + +# --------------------------------------------------------------------------- +# DB helpers (lazy import to avoid circular deps at import time) +# --------------------------------------------------------------------------- +def _get_session(): # type: ignore[return] + from aggiermp.database.base import get_session # noqa: PLC0415 + return get_session() + + +# --------------------------------------------------------------------------- +# Ensure the last_notified_open_at column exists +# --------------------------------------------------------------------------- +_CHECK_COLUMN_SQL = text(""" + SELECT 1 + FROM information_schema.columns + WHERE table_name = 'user_tracked_sections' + AND column_name = 'last_notified_open_at' +""") + + +def _ensure_migration(db: Any) -> None: + """Add last_notified_open_at column if it doesn't already exist.""" + try: + row = db.execute(_CHECK_COLUMN_SQL).fetchone() + if row: + logger.debug("Column last_notified_open_at already exists, skipping migration.") + return + + logger.info("Adding last_notified_open_at column to user_tracked_sections...") + # Use a short lock timeout so we fail fast instead of hanging forever + db.execute(text("SET lock_timeout = '5s'")) + db.execute(text( + "ALTER TABLE user_tracked_sections " + "ADD COLUMN IF NOT EXISTS last_notified_open_at TIMESTAMPTZ" + )) + db.commit() + logger.info("Migration complete.") + except Exception as exc: + logger.warning("Migration skipped: %s", exc) + db.rollback() + + +# --------------------------------------------------------------------------- +# Core query – one round-trip for everything we need +# --------------------------------------------------------------------------- +_WATCH_QUERY = text(""" + SELECT + uts.id AS track_id, + uts.user_id, + uts.section_id, + uts.last_notified_open_at, + s.is_open, + s.dept, + s.course_number, + s.section_number, + s.course_title, + s.crn, + s.term_code + FROM user_tracked_sections uts + JOIN sections s ON ( + -- section_id format: TERMCODE-CRN-DEPT-COURSENUM-SECNUM + -- sections.id format: TERMCODE_CRN + s.id = split_part(uts.section_id, '-', 1) || '_' || split_part(uts.section_id, '-', 2) + OR s.id = uts.section_id + OR s.id = uts.term_code || '_' || uts.section_id + ) + WHERE uts.status = 'active' +""") + + +# --------------------------------------------------------------------------- +# Push notification helper (runs in thread pool) +# --------------------------------------------------------------------------- +def _send_push(sub_row: Any, payload: str) -> tuple[str, bool]: + """Send one push notification. Returns (endpoint, success).""" + subscription_info = { + "endpoint": sub_row.endpoint, + "keys": {"p256dh": sub_row.p256dh, "auth": sub_row.auth}, + } + try: + webpush( + subscription_info=subscription_info, + data=payload, + vapid_private_key=VAPID_PRIVATE_KEY, + vapid_claims={"sub": VAPID_CONTACT}, + ) + return sub_row.endpoint, True + except WebPushException as exc: + status = exc.response.status_code if exc.response else None + logger.warning("Push failed (status=%s): %s", status, exc) + return sub_row.endpoint, False + except Exception as exc: + logger.error("Push error: %s", exc) + return sub_row.endpoint, False + + +def _build_payload(row: Any) -> str: + return json.dumps( + { + "title": "Seat Available! πŸŽ‰", + "body": ( + f"{row.dept} {row.course_number}.{row.section_number} " + f"({row.course_title}) just opened up!" + ), + "url": f"/sections/{row.term_code}", + "crn": row.crn, + "sectionId": row.section_id, + } + ) + + +# --------------------------------------------------------------------------- +# Main watcher logic +# --------------------------------------------------------------------------- +def run_watcher() -> Dict[str, Any]: + """ + Check all watched sections and push notifications for newly-opened ones. + + Returns a summary dict: + { + "checked": int, # total active watches + "newly_open": int, # sections that just became open + "notified": int, # push attempts that succeeded + "elapsed_s": float, + } + """ + t0 = time.perf_counter() + + if not VAPID_PRIVATE_KEY: + logger.error("VAPID_PRIVATE_KEY not set – cannot send push notifications.") + return {"error": "VAPID_PRIVATE_KEY not configured"} + + db = _get_session() + try: + _ensure_migration(db) + logger.info("Querying active watches...") + + # ── DIAGNOSTIC: dump raw tracked rows + verify JOIN ───────────── + raw_tracked = db.execute(text( + "SELECT id, user_id, section_id, term_code, status FROM user_tracked_sections WHERE status = 'active'" + )).fetchall() + logger.info("RAW tracked rows (%d): %s", len(raw_tracked), + [(r.section_id, r.status) for r in raw_tracked]) + + if raw_tracked: + sid_sample = raw_tracked[0].section_id + match = db.execute(text("SELECT id, is_open FROM sections WHERE id = :sid"), + {"sid": sid_sample}).fetchone() + logger.info("Sections lookup for section_id=%r β†’ %s", sid_sample, match) + + # ── 1. Fetch all active watches with section status ────────────── + rows = db.execute(_WATCH_QUERY).fetchall() + logger.info("Fetched %d active watch row(s).", len(rows)) + + # ── 2. Filter: section is open AND we haven't notified for this + # open event yet (last_notified_open_at is null OR + # the section was closed since the last notification, + # meaning it became open *again* β€” we detect that by + # checking is_open vs. last_notified state via the + # timestamp: if is_open is True and we last notified + # before is_open became True we'd need a + # `became_open_at` column; simplest safe approach + # is: notify if is_open AND last_notified_open_at IS + # NULL, then set it; clear it from a separate + # poll when is_open becomes False.) + # ──────────────────────────────────────────────────────────────── + needs_notify: List[Any] = [ + r for r in rows + if r.is_open and r.last_notified_open_at is None + ] + + # ── 3. Clear stale notifications for sections that closed again ── + closed_track_ids = [ + r.track_id for r in rows + if not r.is_open and r.last_notified_open_at is not None + ] + if closed_track_ids: + db.execute( + text( + "UPDATE user_tracked_sections " + "SET last_notified_open_at = NULL " + "WHERE id = ANY(:ids)" + ), + {"ids": closed_track_ids}, + ) + db.commit() + logger.info( + "Reset last_notified_open_at for %d closed section(s).", + len(closed_track_ids), + ) + + if not needs_notify: + elapsed = time.perf_counter() - t0 + logger.info("No new openings found. Completed in %.2fs.", elapsed) + return { + "checked": len(rows), + "newly_open": 0, + "notified": 0, + "elapsed_s": round(elapsed, 3), + } + + logger.info("%d section(s) newly opened, fetching subscriptions...", len(needs_notify)) + + # ── 4. Collect unique user_ids and fetch all their subscriptions + # in ONE query ─────────────────────────────────────────── + user_ids = list({r.user_id for r in needs_notify}) + sub_rows = db.execute( + text( + "SELECT user_id, endpoint, p256dh, auth " + "FROM user_subscriptions " + "WHERE user_id = ANY(:user_ids)" + ), + {"user_ids": user_ids}, + ).fetchall() + + # Index subscriptions by user_id for fast lookup + subs_by_user: Dict[str, List[Any]] = {} + for sub in sub_rows: + subs_by_user.setdefault(sub.user_id, []).append(sub) + + logger.info( + "Found %d subscription(s) across %d user(s).", + len(sub_rows), + len(user_ids), + ) + + # ── 5. Dispatch push notifications in parallel ─────────────────── + push_tasks: List[tuple[Any, str]] = [] # (sub_row, payload) + for row in needs_notify: + payload = _build_payload(row) + for sub in subs_by_user.get(row.user_id, []): + push_tasks.append((sub, payload)) + + notified_count = 0 + stale_endpoints: List[str] = [] + + with ThreadPoolExecutor(max_workers=min(MAX_PUSH_WORKERS, len(push_tasks) or 1)) as pool: + future_map = { + pool.submit(_send_push, sub, payload): sub + for sub, payload in push_tasks + } + for future in as_completed(future_map): + endpoint, ok = future.result() + if ok: + notified_count += 1 + else: + stale_endpoints.append(endpoint) + + # ── 6. Remove stale subscriptions (expired/invalid) ───────────── + if stale_endpoints: + db.execute( + text( + "DELETE FROM user_subscriptions WHERE endpoint = ANY(:eps)" + ), + {"eps": stale_endpoints}, + ) + db.commit() + logger.info("Removed %d stale subscription(s).", len(stale_endpoints)) + + # ── 7. Stamp successfully-notified watches ─────────────────────── + now = datetime.utcnow() + notified_track_ids = [r.track_id for r in needs_notify] + db.execute( + text( + "UPDATE user_tracked_sections " + "SET last_notified_open_at = :now " + "WHERE id = ANY(:ids)" + ), + {"now": now, "ids": notified_track_ids}, + ) + db.commit() + + elapsed = time.perf_counter() - t0 + logger.info( + "Done. checked=%d newly_open=%d notified=%d elapsed=%.2fs", + len(rows), + len(needs_notify), + notified_count, + elapsed, + ) + return { + "checked": len(rows), + "newly_open": len(needs_notify), + "notified": notified_count, + "elapsed_s": round(elapsed, 3), + } + + except Exception as exc: + logger.exception("Watcher failed: %s", exc) + db.rollback() + return {"error": str(exc)} + finally: + db.close() + + +# --------------------------------------------------------------------------- +# Entrypoint +# --------------------------------------------------------------------------- +if __name__ == "__main__": + summary = run_watcher() + print("\n=== Section Watcher Summary ===") + for k, v in summary.items(): + print(f" {k:12s}: {v}") diff --git a/src/aggiermp/core/cache.py b/src/aggiermp/core/cache.py index 82c6ea4..3030b4e 100644 --- a/src/aggiermp/core/cache.py +++ b/src/aggiermp/core/cache.py @@ -17,8 +17,7 @@ from fastapi import Request from pydantic import BaseModel -# Redis connection settings -REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379") +from ..core.config import settings # Default TTLs (in seconds) TTL_SHORT = 300 # 5 minutes - for stats @@ -38,8 +37,11 @@ async def get_redis() -> Optional[redis.Redis]: global _redis_client if _redis_client is None: try: + # Use configured redis_url or fallback + redis_url = settings.redis_url or "redis://localhost:6379" + _redis_client = redis.from_url( - REDIS_URL, + redis_url, encoding="utf-8", decode_responses=True, ) diff --git a/src/aggiermp/core/notifications.py b/src/aggiermp/core/notifications.py index c30b831..773207d 100644 --- a/src/aggiermp/core/notifications.py +++ b/src/aggiermp/core/notifications.py @@ -1,4 +1,4 @@ -ο»Ώimport json +import json import logging from typing import Dict, Any, List from pywebpush import webpush, WebPushException diff --git a/src/aggiermp/database/base.py b/src/aggiermp/database/base.py index 288803a..56b11c5 100644 --- a/src/aggiermp/database/base.py +++ b/src/aggiermp/database/base.py @@ -470,21 +470,25 @@ def __repr__(self) -> str: class UserSubscriptionDB(Base): - """Web push subscriptions for authenticated users.""" + """Database model for Web Push Subscriptions""" __tablename__ = "user_subscriptions" - id = Column(String, primary_key=True) + id = Column(String, primary_key=True) # UUID user_id = Column(String, nullable=False, index=True) endpoint = Column(Text, nullable=False) p256dh = Column(String, nullable=False) auth = Column(String, nullable=False) + device_name = Column(String, nullable=True) + user_agent = Column(Text, nullable=True) + last_seen_at = Column(DateTime, nullable=False, default=datetime.now) created_at = Column(DateTime, nullable=False, default=datetime.now) def __repr__(self) -> str: return f"" + # Global engine instance for connection pooling _engine = None _session_factory = None diff --git a/src/aggiermp/models/schema.py b/src/aggiermp/models/schema.py index 7d8bf84..4f30d6e 100644 --- a/src/aggiermp/models/schema.py +++ b/src/aggiermp/models/schema.py @@ -1,7 +1,7 @@ from datetime import datetime from typing import Optional, List, Dict, Any, Union -from uuid import UUID from pydantic import BaseModel, Field, field_validator, ConfigDict +from uuid import UUID from enum import Enum @@ -201,21 +201,21 @@ class Summary(BaseModel): class UserSchedule(BaseModel): - """User saved schedule (API response).""" - + """User Saved Schedule Model""" + id: Optional[UUID] = None user_id: str name: str term_code: str - courses: List[Union[str, int]] = [] + courses: List[Union[str, int]] = [] # List of CRNs or Course IDs created_at: Optional[datetime] = None model_config = ConfigDict(populate_by_name=True) class UserTrackedSection(BaseModel): - """User tracked section for seat alerts (API response).""" - + """User Tracked Section Model""" + id: Optional[UUID] = None user_id: str section_id: str @@ -227,7 +227,7 @@ class UserTrackedSection(BaseModel): class UserSubscription(BaseModel): - """Web push subscription (API response).""" + """User Subscription Model""" id: Optional[UUID] = None user_id: str @@ -266,5 +266,4 @@ class UserSubscription(BaseModel): "summaries": Summary, "user_schedules": UserSchedule, "user_tracked_sections": UserTrackedSection, - "user_subscriptions": UserSubscription, } diff --git a/tests/test_api_endpoints.py b/tests/test_api_endpoints.py index aa22587..3e536be 100644 --- a/tests/test_api_endpoints.py +++ b/tests/test_api_endpoints.py @@ -89,3 +89,29 @@ def test_ucc_discovery_endpoint(client: TestClient) -> None: category_group = data[0] assert "category" in category_group assert "courses" in category_group + + +def test_dept_discovery_endpoint(client: TestClient) -> None: + """Test the department discovery endpoint.""" + term_code = "202611" + dept_code = "CSCE" + + # Verify the term exists or fallback + terms_response = client.get("/terms") + if terms_response.status_code == 200 and len(terms_response.json()) > 0: + term_code = terms_response.json()[0]["termCode"] + + response = client.get(f"/discover/{term_code}/{dept_code}") + + # Expect 200 if the query is valid. + assert response.status_code == 200 + + data = response.json() + assert isinstance(data, list) + if len(data) > 0: + course = data[0] + assert "dept" in course + assert "courseNumber" in course + assert "easinessScore" in course + assert "professor" in course + assert "firstName" in course["professor"]