nitheesh-cpu · nitheesh-cpu · Apr 14, 2026 · Mar 9, 2026 · Mar 10, 2026 · Mar 10, 2026
diff --git a/GEMINI.md b/GEMINI.md
@@ -0,0 +1,43 @@
+# GEMINI Agent Instructions
+
+## Terminal Blindness & Output Issues
+This environment (Windows CMD) has difficulty with standard output capturing and file redirection from `run_command`.
+
+### Symptoms
+- `run_command` returns no output even for simple commands.
+- File redirection (`> output.txt`) often fails to create the file.
+
+### Solutions
+1. **Force Unbuffered Output**: Always run Python scripts with `python -u` or set `PYTHONUNBUFFERED=1`.
+   ```bash
+   python -u script_name.py
+   ```
+2. **Avoid Shell Redirection**: Do not rely on `>` or `2>&1` in the `run_command` string. Instead, write to files *inside* the python script.
+3. **Explicit File IO**: If you need to see output, create a script that explicitly `open('output.txt', 'w').write(...)` and then use `view_file` to read it.
+4. **Environment Variables**:
+   - Check if `PYTHONIOENCODING` is set to `utf-8`.
+
+## Persistent Fix
+To permanently fix this for Python:
+- Create/Edit `.env` (if loaded) or just remember to use `-u`.
+
+## Terminal Blindness Status
+**Status:** FIXED (Workaround Active)
+**Diagnosis Update:** 
+- The Google Antigravity `run_command` wrapper mechanism is permanently swallowing stdout/execution hooks (`VSCODE_INJECTION` / wrapper artifacts).
+- Deleting registry `AutoRun` keys (`HKEY_CURRENT_USER\Software\Microsoft\Command Processor`) did NOT resolve the wrapper's blindness.
+- However, standard execution is perfectly intact inside an isolated, long-running background `cmd.exe` shell.
+
+## Fix / Workaround
+To execute commands without blindness:
+1. Initialize a single background `cmd.exe` terminal via `run_command` (e.g., CommandID: `26c75e13...`).
+2. Use `send_command_input` to explicitly send commands (like `python -u script.py\n`) to this background terminal.
+3. Observe the `command_status` output. This completely restores standard output viewing and avoids the fatal wrapper bugs!
+
+## Preferences
+- **Execution Policy:** ALWAYS use `send_command_input` to a dedicated background terminal instead of direct `run_command` wrappers for scripts generating output.
+
+## Preferences
+- **Preferred Shell:** Bash (if available) to avoid CMD blindness.
+- **Terminal Check:** Always verify command execution via file side-effects (e.g., creating a status file).
+
diff --git a/pipelines/professors/hierarchical_summarization/clustering.py b/pipelines/professors/hierarchical_summarization/clustering.py
@@ -50,18 +50,24 @@ def cluster_reviews(
 
         # Group reviews by cluster
         clusters: Dict[int, List[ProcessedReview]] = {}
-        noise_count = 0
+        noise_reviews: List[ProcessedReview] = []
 
         for review, label in zip(reviews, cluster_labels):
             if label == -1:  # Noise
-                noise_count += 1
+                noise_reviews.append(review)
                 continue
 
             if label not in clusters:
                 clusters[label] = []
             clusters[label].append(review)
 
-        print(f"Found {len(clusters)} clusters, {noise_count} noise points")
+        print(f"Found {len(clusters)} clusters, {len(noise_reviews)} noise points")
+
+        # Fallback: if no clusters found, put all reviews (including noise) into cluster 0
+        # This ensures we still generate summaries for small review sets
+        if not clusters and noise_reviews:
+            clusters[0] = noise_reviews
+            print(f"  -> Fallback: using all {len(noise_reviews)} reviews as single cluster")
 
         return clusters
 

diff --git a/pipelines/professors/hierarchical_summarization/pipeline.py b/pipelines/professors/hierarchical_summarization/pipeline.py
@@ -119,8 +119,16 @@ def process_professor_reviews(
             processed_reviews, embeddings
         )
 
+        # Calculate actual review counts per course (before clustering drops noise)
+        review_counts_by_course: Dict[str, int] = {}
+        for review in processed_reviews:
+            course = review.course_code or "UNKNOWN"
+            review_counts_by_course[course] = review_counts_by_course.get(course, 0) + 1
+
         # Step 4: Generate course summaries
-        course_summaries = self._generate_course_summaries(course_clusters)
+        course_summaries = self._generate_course_summaries(
+            course_clusters, review_counts_by_course
+        )
 
         # Step 5: Generate professor summary
         professor_summary = self._generate_professor_summary(
@@ -133,7 +141,9 @@ def process_professor_reviews(
         return professor_summary
 
     def _generate_course_summaries(
-        self, course_clusters: Dict[str, Dict[int, List[ProcessedReview]]]
+        self,
+        course_clusters: Dict[str, Dict[int, List[ProcessedReview]]],
+        review_counts_by_course: Dict[str, int],
     ) -> List[CourseSummary]:
         """Generate structured summaries for each course"""
         course_summaries = []
@@ -150,10 +160,13 @@ def _generate_course_summaries(
                 clusters, cluster_types
             )
 
+            # Use actual review count for this course (not just clustered reviews)
+            actual_review_count = review_counts_by_course.get(course_code, 0)
+
             # Build structured course summary
             course_summary = CourseSummary(
                 course=course_code,
-                total_reviews=sum(len(reviews) for reviews in clusters.values()),
+                total_reviews=actual_review_count,
             )
 
             # Organize summaries by type

diff --git a/requirements.txt b/requirements.txt
@@ -36,7 +36,9 @@ psycopg2==2.9.11
 pydantic==2.12.5
 pydantic-core==2.41.5
 python-dotenv==1.2.1
+pydantic-settings
 pyyaml==6.0.3
+pywebpush
 regex==2025.11.3
 requests==2.32.5
 safetensors==0.7.0
@@ -48,6 +50,7 @@ setuptools==80.9.0
 slowapi==0.1.9
 soupsieve==2.8.1
 sqlalchemy==2.0.45
+supertokens_python
 starlette==0.50.0
 sympy==1.14.0
 threadpoolctl==3.6.0
@@ -61,3 +64,5 @@ typing-inspection==0.4.2
 urllib3==2.6.2
 uvicorn==0.40.0
 yarl==1.22.0
+novu==1.13.0
+redis==5.0.0