DevSecNinja · Copilot · Jan 17, 2026 · Jan 17, 2026 · Jan 17, 2026 · Jan 17, 2026
diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
@@ -44,7 +44,9 @@ SQLite Database ←→ GitHub API
 
 ### GitHub Service (`app/services/github_service.py`)
 
-- Search repositories by topic via GitHub API
+- Search repositories by topic via GitHub API (default: `hadiscover` or `ha-discover`)
+- Optional no-topic search mode for testing/development (`ENABLE_NO_TOPIC_SEARCH`)
+- Configurable repository limit for testing (`MAX_REPOSITORIES`)
 - Fetch file contents from repos
 - Discover automation files using path patterns
 - Optional GitHub token for higher rate limits (5k/hr vs 60/hr)
@@ -140,6 +142,8 @@ Fault-tolerant YAML parsing handles varied Home Assistant configurations. Partia
 
 Only indexes repositories with explicit `hadiscover` topic. Respects privacy and user consent.
 
+**Testing Mode**: For development and testing, can be configured to search without topic requirement using `ENABLE_NO_TOPIC_SEARCH=true` environment variable. A `MAX_REPOSITORIES` limit can also be set to control result size.
+
 ### Hourly Scheduled Indexing
 
 APScheduler runs automatic indexing every hour. Manual trigger available in development mode only.

diff --git a/README.md b/README.md
@@ -121,6 +121,18 @@ cd backend && source venv/bin/activate && pytest tests/ -v
 
 CI automatically tests Docker containers, API endpoints, and integration on every PR.
 
+#### Testing Configuration (Optional)
+
+For testing and development purposes, you can enable no-topic search to find more repositories:
+
+```bash
+# In backend/.env or as environment variables
+ENABLE_NO_TOPIC_SEARCH=true    # Search without requiring hadiscover topic
+MAX_REPOSITORIES=10             # Limit results for testing (optional)
+```
+
+**Warning**: Keep `ENABLE_NO_TOPIC_SEARCH=false` in production to maintain the opt-in privacy model.
+
 ### API Documentation
 
 OpenAPI/Swagger docs available at <http://localhost:8000/docs> once running.

diff --git a/backend/.env.example b/backend/.env.example
@@ -20,3 +20,15 @@ ENVIRONMENT=production
 # Example: ROOT_PATH=/api/v1 if your cloud platform handles routing
 # and you want the app accessible at the root URL
 ROOT_PATH=
+
+# No-Topic Search Configuration (optional, for testing)
+# Set to "true" to search for automation files without requiring the "hadiscover" topic
+# This is useful for testing scenarios to find more diverse repositories
+# WARNING: Keep this "false" in production to maintain opt-in privacy
+ENABLE_NO_TOPIC_SEARCH=false
+
+# Maximum Repositories Configuration (optional, for testing)
+# Set a maximum number of repositories to index (e.g., 10 for testing)
+# Leave empty or unset for no limit
+# This is useful for testing to avoid performance issues with large result sets
+MAX_REPOSITORIES=
diff --git a/backend/app/api/routes.py b/backend/app/api/routes.py
@@ -273,7 +273,17 @@ async def trigger_indexing(
 
     async def run_indexing():
         """Background task to run indexing."""
-        indexer = IndexingService()
+        # Get no-topic search configuration
+        enable_no_topic_search = os.getenv(
+            "ENABLE_NO_TOPIC_SEARCH", "false"
+        ).lower() in ("true", "1", "yes")
+        max_repositories_str = os.getenv("MAX_REPOSITORIES")
+        max_repositories = int(max_repositories_str) if max_repositories_str else None
+
+        indexer = IndexingService(
+            enable_no_topic_search=enable_no_topic_search,
+            max_repositories=max_repositories,
+        )
         # Create a new session for background task
         from app.models import SessionLocal
 

diff --git a/backend/app/cli.py b/backend/app/cli.py
@@ -41,8 +41,28 @@ async def run_indexing():
     if not github_token:
         logger.warning("GITHUB_TOKEN not set - API rate limits will be lower")
 
+    # Get no-topic search configuration
+    enable_no_topic_search = os.getenv("ENABLE_NO_TOPIC_SEARCH", "false").lower() in (
+        "true",
+        "1",
+        "yes",
+    )
+    max_repositories_str = os.getenv("MAX_REPOSITORIES")
+    max_repositories = int(max_repositories_str) if max_repositories_str else None
+
+    if enable_no_topic_search:
+        logger.info("No-topic search enabled")
+        if max_repositories:
+            logger.info(f"Maximum repositories limit: {max_repositories}")
+    else:
+        logger.info("Topic-based search enabled (default)")
+
     # Create indexing service
-    indexer = IndexingService(github_token=github_token)
+    indexer = IndexingService(
+        github_token=github_token,
+        enable_no_topic_search=enable_no_topic_search,
+        max_repositories=max_repositories,
+    )
 
     # Get database session (this also initializes the database)
     db = get_db_session()

diff --git a/backend/app/services/github_service.py b/backend/app/services/github_service.py
@@ -34,9 +34,23 @@ class GitHubService:
         "ha-discover",
     ]  # Support both topics for backwards compatibility
 
-    def __init__(self, token: Optional[str] = None):
-        """Initialize GitHub service with optional authentication token."""
+    def __init__(
+        self,
+        token: Optional[str] = None,
+        enable_no_topic_search: bool = False,
+        max_repositories: Optional[int] = None,
+    ):
+        """
+        Initialize GitHub service with optional authentication token.
+
+        Args:
+            token: GitHub personal access token
+            enable_no_topic_search: If True, search for automation files without topic requirement
+            max_repositories: Maximum number of repositories to return (None = no limit)
+        """
         self.token = token or os.getenv("GITHUB_TOKEN")
+        self.enable_no_topic_search = enable_no_topic_search
+        self.max_repositories = max_repositories
         self.headers = {
             "Accept": "application/vnd.github.v3+json",
         }
@@ -68,6 +82,7 @@ def _check_rate_limit(self, response: httpx.Response, operation: str) -> None:
     async def search_repositories(self, per_page: int = 100) -> List[Dict]:
         """
         Search for repositories with the hadiscover or ha-discover topics.
+        If enable_no_topic_search is True, search for automation files without topic requirement.
 
         Args:
             per_page: Number of results per page (max 100)
@@ -79,14 +94,16 @@ async def search_repositories(self, per_page: int = 100) -> List[Dict]:
         seen_repos = set()  # Track repos to avoid duplicates
 
         async with httpx.AsyncClient() as client:
-            # Search for each topic
-            for topic in self.SEARCH_TOPICS:
+            if self.enable_no_topic_search:
+                # Search for repositories with automation files (no topic requirement)
+                # Use a broad search for Home Assistant automation files
                 page = 1
+                reached_limit = False
                 while True:
                     try:
                         url = f"{self.BASE_URL}/search/repositories"
                         params = {
-                            "q": f"topic:{topic}",
+                            "q": "automations.yaml in:path",
                             "per_page": per_page,
                             "page": page,
                         }
@@ -126,21 +143,97 @@ async def search_repositories(self, per_page: int = 100) -> List[Dict]:
                                 }
                             )
 
-                        # Check if there are more pages
-                        if len(items) < per_page:
+                            # Check if we've reached the maximum number of repositories
+                            if (
+                                self.max_repositories is not None
+                                and len(all_repositories) >= self.max_repositories
+                            ):
+                                logger.info(
+                                    f"Reached max repository limit: {self.max_repositories}"
+                                )
+                                reached_limit = True
+                                break
+
+                        # Stop pagination if we've reached the limit or there are no more pages
+                        if reached_limit or len(items) < per_page:
                             break
 
                         page += 1
 
                     except httpx.HTTPError as e:
                         logger.error(
-                            f"Error searching repositories with topic '{topic}': {e}"
+                            f"Error searching repositories with automation files: {e}"
                         )
                         break
 
-        logger.info(
-            f"Found {len(all_repositories)} repositories with topics {self.SEARCH_TOPICS}"
-        )
+                logger.info(
+                    f"Found {len(all_repositories)} repositories with automation files (no topic search)"
+                )
+            else:
+                # Original topic-based search
+                # Search for each topic
+                for topic in self.SEARCH_TOPICS:
+                    page = 1
+                    while True:
+                        try:
+                            url = f"{self.BASE_URL}/search/repositories"
+                            params = {
+                                "q": f"topic:{topic}",
+                                "per_page": per_page,
+                                "page": page,
+                            }
+
+                            response = await client.get(
+                                url, headers=self.headers, params=params, timeout=30.0
+                            )
+
+                            # Check for rate limiting (status 429 or 403 with rate limit message)
+                            self._check_rate_limit(response, "search_repositories")
+
+                            response.raise_for_status()
+
+                            data = response.json()
+                            items = data.get("items", [])
+
+                            if not items:
+                                break
+
+                            for repo in items:
+                                repo_key = f"{repo['owner']['login']}/{repo['name']}"
+                                # Skip if we've already seen this repo
+                                if repo_key in seen_repos:
+                                    continue
+
+                                seen_repos.add(repo_key)
+                                all_repositories.append(
+                                    {
+                                        "name": repo["name"],
+                                        "owner": repo["owner"]["login"],
+                                        "description": repo.get("description", ""),
+                                        "url": repo["html_url"],
+                                        "default_branch": repo.get(
+                                            "default_branch", "main"
+                                        ),
+                                        "stars": repo.get("stargazers_count", 0),
+                                    }
+                                )
+
+                            # Check if there are more pages
+                            if len(items) < per_page:
+                                break
+
+                            page += 1
+
+                        except httpx.HTTPError as e:
+                            logger.error(
+                                f"Error searching repositories with topic '{topic}': {e}"
+                            )
+                            break
+
+                logger.info(
+                    f"Found {len(all_repositories)} repositories with topics {self.SEARCH_TOPICS}"
+                )
+
         return all_repositories
 
     async def get_file_content(

diff --git a/backend/app/services/indexer.py b/backend/app/services/indexer.py
@@ -16,9 +16,25 @@
 class IndexingService:
     """Service for indexing Home Assistant automations from GitHub repositories."""
 
-    def __init__(self, github_token: Optional[str] = None):
-        """Initialize indexing service with GitHub API access."""
-        self.github_service = GitHubService(token=github_token)
+    def __init__(
+        self,
+        github_token: Optional[str] = None,
+        enable_no_topic_search: bool = False,
+        max_repositories: Optional[int] = None,
+    ):
+        """
+        Initialize indexing service with GitHub API access.
+
+        Args:
+            github_token: GitHub personal access token
+            enable_no_topic_search: If True, search for automation files without topic requirement
+            max_repositories: Maximum number of repositories to index (None = no limit)
+        """
+        self.github_service = GitHubService(
+            token=github_token,
+            enable_no_topic_search=enable_no_topic_search,
+            max_repositories=max_repositories,
+        )
         self.parser = AutomationParser()
 
     async def index_repositories(self, db: Session) -> dict:

diff --git a/backend/app/services/scheduler.py b/backend/app/services/scheduler.py
@@ -46,9 +46,20 @@ async def run_indexing_task(self):
         if not github_token:
             logger.warning("GITHUB_TOKEN not set - API rate limits will be lower")
 
+        # Get no-topic search configuration
+        enable_no_topic_search = os.getenv(
+            "ENABLE_NO_TOPIC_SEARCH", "false"
+        ).lower() in ("true", "1", "yes")
+        max_repositories_str = os.getenv("MAX_REPOSITORIES")
+        max_repositories = int(max_repositories_str) if max_repositories_str else None
+
         # Create indexing service if not already created
         if not self.indexer:
-            self.indexer = IndexingService(github_token=github_token)
+            self.indexer = IndexingService(
+                github_token=github_token,
+                enable_no_topic_search=enable_no_topic_search,
+                max_repositories=max_repositories,
+            )
 
         # Get database session
         db = self._get_db()