From 8585ea9e54259e1b6a5474f78a42fb525861633e Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 17 Jan 2026 07:50:10 +0000
Subject: [PATCH 1/4] Initial plan


From 772d9d6f6a369fbf3895c524107731ef3b25b9d3 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 17 Jan 2026 07:55:25 +0000
Subject: [PATCH 2/4] Add no-topic search feature with environment variables
 and tests

Co-authored-by: DevSecNinja <14926452+DevSecNinja@users.noreply.github.com>
---
 backend/.env.example                   |  12 +
 backend/app/api/routes.py              |  12 +-
 backend/app/cli.py                     |  22 +-
 backend/app/services/github_service.py | 116 +++++++++-
 backend/app/services/indexer.py        |  22 +-
 backend/app/services/scheduler.py      |  13 +-
 backend/tests/test_no_topic_search.py  | 291 +++++++++++++++++++++++++
 docker-compose.yml                     |   4 +
 8 files changed, 475 insertions(+), 17 deletions(-)
 create mode 100644 backend/tests/test_no_topic_search.py

diff --git a/backend/.env.example b/backend/.env.example
index f8bf7d2..a2b871a 100644
--- a/backend/.env.example
+++ b/backend/.env.example
@@ -20,3 +20,15 @@ ENVIRONMENT=production
 # Example: ROOT_PATH=/api/v1 if your cloud platform handles routing
 # and you want the app accessible at the root URL
 ROOT_PATH=
+
+# No-Topic Search Configuration (optional, for testing)
+# Set to "true" to search for automation files without requiring the "hadiscover" topic
+# This is useful for testing scenarios to find more diverse repositories
+# WARNING: Keep this "false" in production to maintain opt-in privacy
+ENABLE_NO_TOPIC_SEARCH=false
+
+# Maximum Repositories Configuration (optional, for testing)
+# Set a maximum number of repositories to index (e.g., 10 for testing)
+# Leave empty or unset for no limit
+# This is useful for testing to avoid performance issues with large result sets
+MAX_REPOSITORIES=
diff --git a/backend/app/api/routes.py b/backend/app/api/routes.py
index 28713a0..9a4e0ae 100644
--- a/backend/app/api/routes.py
+++ b/backend/app/api/routes.py
@@ -273,7 +273,17 @@ async def trigger_indexing(
 
     async def run_indexing():
         """Background task to run indexing."""
-        indexer = IndexingService()
+        # Get no-topic search configuration
+        enable_no_topic_search = os.getenv(
+            "ENABLE_NO_TOPIC_SEARCH", "false"
+        ).lower() in ("true", "1", "yes")
+        max_repositories_str = os.getenv("MAX_REPOSITORIES")
+        max_repositories = int(max_repositories_str) if max_repositories_str else None
+
+        indexer = IndexingService(
+            enable_no_topic_search=enable_no_topic_search,
+            max_repositories=max_repositories,
+        )
         # Create a new session for background task
         from app.models import SessionLocal
 
diff --git a/backend/app/cli.py b/backend/app/cli.py
index 45dcc4a..4d7973f 100644
--- a/backend/app/cli.py
+++ b/backend/app/cli.py
@@ -41,8 +41,28 @@ async def run_indexing():
     if not github_token:
         logger.warning("GITHUB_TOKEN not set - API rate limits will be lower")
 
+    # Get no-topic search configuration
+    enable_no_topic_search = os.getenv("ENABLE_NO_TOPIC_SEARCH", "false").lower() in (
+        "true",
+        "1",
+        "yes",
+    )
+    max_repositories_str = os.getenv("MAX_REPOSITORIES")
+    max_repositories = int(max_repositories_str) if max_repositories_str else None
+
+    if enable_no_topic_search:
+        logger.info("No-topic search enabled")
+        if max_repositories:
+            logger.info(f"Maximum repositories limit: {max_repositories}")
+    else:
+        logger.info("Topic-based search enabled (default)")
+
     # Create indexing service
-    indexer = IndexingService(github_token=github_token)
+    indexer = IndexingService(
+        github_token=github_token,
+        enable_no_topic_search=enable_no_topic_search,
+        max_repositories=max_repositories,
+    )
 
     # Get database session (this also initializes the database)
     db = get_db_session()
diff --git a/backend/app/services/github_service.py b/backend/app/services/github_service.py
index 361a013..43479d6 100644
--- a/backend/app/services/github_service.py
+++ b/backend/app/services/github_service.py
@@ -34,9 +34,23 @@ class GitHubService:
         "ha-discover",
     ]  # Support both topics for backwards compatibility
 
-    def __init__(self, token: Optional[str] = None):
-        """Initialize GitHub service with optional authentication token."""
+    def __init__(
+        self,
+        token: Optional[str] = None,
+        enable_no_topic_search: bool = False,
+        max_repositories: Optional[int] = None,
+    ):
+        """
+        Initialize GitHub service with optional authentication token.
+
+        Args:
+            token: GitHub personal access token
+            enable_no_topic_search: If True, search for automation files without topic requirement
+            max_repositories: Maximum number of repositories to return (None = no limit)
+        """
         self.token = token or os.getenv("GITHUB_TOKEN")
+        self.enable_no_topic_search = enable_no_topic_search
+        self.max_repositories = max_repositories
         self.headers = {
             "Accept": "application/vnd.github.v3+json",
         }
@@ -68,6 +82,7 @@ def _check_rate_limit(self, response: httpx.Response, operation: str) -> None:
     async def search_repositories(self, per_page: int = 100) -> List[Dict]:
         """
         Search for repositories with the hadiscover or ha-discover topics.
+        If enable_no_topic_search is True, search for automation files without topic requirement.
 
         Args:
             per_page: Number of results per page (max 100)
@@ -79,14 +94,15 @@ async def search_repositories(self, per_page: int = 100) -> List[Dict]:
         seen_repos = set()  # Track repos to avoid duplicates
 
         async with httpx.AsyncClient() as client:
-            # Search for each topic
-            for topic in self.SEARCH_TOPICS:
+            if self.enable_no_topic_search:
+                # Search for repositories with automation files (no topic requirement)
+                # Use a broad search for Home Assistant automation files
                 page = 1
                 while True:
                     try:
                         url = f"{self.BASE_URL}/search/repositories"
                         params = {
-                            "q": f"topic:{topic}",
+                            "q": "automations.yaml in:path",
                             "per_page": per_page,
                             "page": page,
                         }
@@ -126,21 +142,99 @@ async def search_repositories(self, per_page: int = 100) -> List[Dict]:
                                 }
                             )
 
-                        # Check if there are more pages
-                        if len(items) < per_page:
+                            # Check if we've reached the maximum number of repositories
+                            if (
+                                self.max_repositories is not None
+                                and len(all_repositories) >= self.max_repositories
+                            ):
+                                logger.info(
+                                    f"Reached max repository limit: {self.max_repositories}"
+                                )
+                                break
+
+                        # Check if we've reached the maximum or there are no more pages
+                        if (
+                            self.max_repositories is not None
+                            and len(all_repositories) >= self.max_repositories
+                        ) or len(items) < per_page:
                             break
 
                         page += 1
 
                     except httpx.HTTPError as e:
                         logger.error(
-                            f"Error searching repositories with topic '{topic}': {e}"
+                            f"Error searching repositories with automation files: {e}"
                         )
                         break
 
-        logger.info(
-            f"Found {len(all_repositories)} repositories with topics {self.SEARCH_TOPICS}"
-        )
+                logger.info(
+                    f"Found {len(all_repositories)} repositories with automation files (no topic search)"
+                )
+            else:
+                # Original topic-based search
+                # Search for each topic
+                for topic in self.SEARCH_TOPICS:
+                    page = 1
+                    while True:
+                        try:
+                            url = f"{self.BASE_URL}/search/repositories"
+                            params = {
+                                "q": f"topic:{topic}",
+                                "per_page": per_page,
+                                "page": page,
+                            }
+
+                            response = await client.get(
+                                url, headers=self.headers, params=params, timeout=30.0
+                            )
+
+                            # Check for rate limiting (status 429 or 403 with rate limit message)
+                            self._check_rate_limit(response, "search_repositories")
+
+                            response.raise_for_status()
+
+                            data = response.json()
+                            items = data.get("items", [])
+
+                            if not items:
+                                break
+
+                            for repo in items:
+                                repo_key = f"{repo['owner']['login']}/{repo['name']}"
+                                # Skip if we've already seen this repo
+                                if repo_key in seen_repos:
+                                    continue
+
+                                seen_repos.add(repo_key)
+                                all_repositories.append(
+                                    {
+                                        "name": repo["name"],
+                                        "owner": repo["owner"]["login"],
+                                        "description": repo.get("description", ""),
+                                        "url": repo["html_url"],
+                                        "default_branch": repo.get(
+                                            "default_branch", "main"
+                                        ),
+                                        "stars": repo.get("stargazers_count", 0),
+                                    }
+                                )
+
+                            # Check if there are more pages
+                            if len(items) < per_page:
+                                break
+
+                            page += 1
+
+                        except httpx.HTTPError as e:
+                            logger.error(
+                                f"Error searching repositories with topic '{topic}': {e}"
+                            )
+                            break
+
+                logger.info(
+                    f"Found {len(all_repositories)} repositories with topics {self.SEARCH_TOPICS}"
+                )
+
         return all_repositories
 
     async def get_file_content(
diff --git a/backend/app/services/indexer.py b/backend/app/services/indexer.py
index 49cec63..b9da22b 100644
--- a/backend/app/services/indexer.py
+++ b/backend/app/services/indexer.py
@@ -16,9 +16,25 @@
 class IndexingService:
     """Service for indexing Home Assistant automations from GitHub repositories."""
 
-    def __init__(self, github_token: Optional[str] = None):
-        """Initialize indexing service with GitHub API access."""
-        self.github_service = GitHubService(token=github_token)
+    def __init__(
+        self,
+        github_token: Optional[str] = None,
+        enable_no_topic_search: bool = False,
+        max_repositories: Optional[int] = None,
+    ):
+        """
+        Initialize indexing service with GitHub API access.
+
+        Args:
+            github_token: GitHub personal access token
+            enable_no_topic_search: If True, search for automation files without topic requirement
+            max_repositories: Maximum number of repositories to index (None = no limit)
+        """
+        self.github_service = GitHubService(
+            token=github_token,
+            enable_no_topic_search=enable_no_topic_search,
+            max_repositories=max_repositories,
+        )
         self.parser = AutomationParser()
 
     async def index_repositories(self, db: Session) -> dict:
diff --git a/backend/app/services/scheduler.py b/backend/app/services/scheduler.py
index da4d94c..3e12da1 100644
--- a/backend/app/services/scheduler.py
+++ b/backend/app/services/scheduler.py
@@ -46,9 +46,20 @@ async def run_indexing_task(self):
         if not github_token:
             logger.warning("GITHUB_TOKEN not set - API rate limits will be lower")
 
+        # Get no-topic search configuration
+        enable_no_topic_search = os.getenv(
+            "ENABLE_NO_TOPIC_SEARCH", "false"
+        ).lower() in ("true", "1", "yes")
+        max_repositories_str = os.getenv("MAX_REPOSITORIES")
+        max_repositories = int(max_repositories_str) if max_repositories_str else None
+
         # Create indexing service if not already created
         if not self.indexer:
-            self.indexer = IndexingService(github_token=github_token)
+            self.indexer = IndexingService(
+                github_token=github_token,
+                enable_no_topic_search=enable_no_topic_search,
+                max_repositories=max_repositories,
+            )
 
         # Get database session
         db = self._get_db()
diff --git a/backend/tests/test_no_topic_search.py b/backend/tests/test_no_topic_search.py
new file mode 100644
index 0000000..469fe4d
--- /dev/null
+++ b/backend/tests/test_no_topic_search.py
@@ -0,0 +1,291 @@
+"""Tests for no-topic search functionality."""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from app.services.github_service import GitHubService
+from app.services.indexer import IndexingService
+
+
+@pytest.mark.asyncio
+async def test_github_service_no_topic_search_disabled_by_default():
+    """Test that GitHubService defaults to topic-based search."""
+    service = GitHubService()
+    assert service.enable_no_topic_search is False
+    assert service.max_repositories is None
+
+
+@pytest.mark.asyncio
+async def test_github_service_no_topic_search_enabled():
+    """Test that GitHubService can enable no-topic search."""
+    service = GitHubService(enable_no_topic_search=True)
+    assert service.enable_no_topic_search is True
+
+
+@pytest.mark.asyncio
+async def test_github_service_max_repositories():
+    """Test that GitHubService respects max_repositories limit."""
+    service = GitHubService(enable_no_topic_search=True, max_repositories=10)
+    assert service.max_repositories == 10
+
+
+@pytest.mark.asyncio
+async def test_search_repositories_with_no_topic_search():
+    """Test that search_repositories uses different query when no-topic search is enabled."""
+    service = GitHubService(enable_no_topic_search=True)
+
+    # Mock response with test data
+    mock_response = MagicMock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = {
+        "items": [
+            {
+                "name": "test-repo",
+                "owner": {"login": "testowner"},
+                "description": "Test repository",
+                "html_url": "https://github.com/testowner/test-repo",
+                "default_branch": "main",
+                "stargazers_count": 5,
+            }
+        ]
+    }
+
+    with patch("httpx.AsyncClient") as mock_client_class:
+        mock_client = AsyncMock()
+        mock_client.__aenter__.return_value = mock_client
+        mock_client.get.return_value = mock_response
+        mock_client_class.return_value = mock_client
+
+        repos = await service.search_repositories()
+
+        # Verify the correct query was used
+        call_args = mock_client.get.call_args
+        assert call_args is not None
+        params = call_args[1]["params"]
+        assert "automations.yaml in:path" in params["q"]
+        assert "topic:" not in params["q"]
+
+        # Verify repository was returned
+        assert len(repos) == 1
+        assert repos[0]["name"] == "test-repo"
+
+
+@pytest.mark.asyncio
+async def test_search_repositories_with_topic_search():
+    """Test that search_repositories uses topic query when no-topic search is disabled."""
+    service = GitHubService(enable_no_topic_search=False)
+
+    # Mock response with test data
+    mock_response = MagicMock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = {
+        "items": [
+            {
+                "name": "test-repo",
+                "owner": {"login": "testowner"},
+                "description": "Test repository",
+                "html_url": "https://github.com/testowner/test-repo",
+                "default_branch": "main",
+                "stargazers_count": 5,
+            }
+        ]
+    }
+
+    with patch("httpx.AsyncClient") as mock_client_class:
+        mock_client = AsyncMock()
+        mock_client.__aenter__.return_value = mock_client
+        mock_client.get.return_value = mock_response
+        mock_client_class.return_value = mock_client
+
+        repos = await service.search_repositories()
+
+        # Verify the correct query was used (should be topic-based)
+        call_args = mock_client.get.call_args
+        assert call_args is not None
+        params = call_args[1]["params"]
+        assert "topic:" in params["q"]
+
+        # Verify repository was returned
+        assert len(repos) == 1
+        assert repos[0]["name"] == "test-repo"
+
+
+@pytest.mark.asyncio
+async def test_search_repositories_respects_max_repositories():
+    """Test that search_repositories stops after reaching max_repositories limit."""
+    service = GitHubService(enable_no_topic_search=True, max_repositories=2)
+
+    # Mock response with 3 repos, but we should only get 2
+    mock_response = MagicMock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = {
+        "items": [
+            {
+                "name": f"test-repo-{i}",
+                "owner": {"login": "testowner"},
+                "description": f"Test repository {i}",
+                "html_url": f"https://github.com/testowner/test-repo-{i}",
+                "default_branch": "main",
+                "stargazers_count": 5,
+            }
+            for i in range(3)
+        ]
+    }
+
+    with patch("httpx.AsyncClient") as mock_client_class:
+        mock_client = AsyncMock()
+        mock_client.__aenter__.return_value = mock_client
+        mock_client.get.return_value = mock_response
+        mock_client_class.return_value = mock_client
+
+        repos = await service.search_repositories()
+
+        # Verify only 2 repositories were returned
+        assert len(repos) == 2
+
+
+@pytest.mark.asyncio
+async def test_search_repositories_no_max_limit():
+    """Test that search_repositories returns all repos when max_repositories is None."""
+    service = GitHubService(enable_no_topic_search=True, max_repositories=None)
+
+    # Mock response with 3 repos
+    mock_response = MagicMock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = {
+        "items": [
+            {
+                "name": f"test-repo-{i}",
+                "owner": {"login": "testowner"},
+                "description": f"Test repository {i}",
+                "html_url": f"https://github.com/testowner/test-repo-{i}",
+                "default_branch": "main",
+                "stargazers_count": 5,
+            }
+            for i in range(3)
+        ]
+    }
+
+    with patch("httpx.AsyncClient") as mock_client_class:
+        mock_client = AsyncMock()
+        mock_client.__aenter__.return_value = mock_client
+        mock_client.get.return_value = mock_response
+        mock_client_class.return_value = mock_client
+
+        repos = await service.search_repositories()
+
+        # Verify all 3 repositories were returned
+        assert len(repos) == 3
+
+
+@pytest.mark.asyncio
+async def test_indexing_service_passes_parameters():
+    """Test that IndexingService correctly passes parameters to GitHubService."""
+    indexer = IndexingService(
+        github_token="test_token",
+        enable_no_topic_search=True,
+        max_repositories=10,
+    )
+
+    assert indexer.github_service.enable_no_topic_search is True
+    assert indexer.github_service.max_repositories == 10
+    assert indexer.github_service.token == "test_token"
+
+
+@pytest.mark.asyncio
+async def test_search_repositories_avoids_duplicates_in_no_topic_mode():
+    """Test that no-topic search doesn't return duplicate repositories."""
+    service = GitHubService(enable_no_topic_search=True)
+
+    # Mock response with duplicate repos
+    mock_response_page1 = MagicMock()
+    mock_response_page1.status_code = 200
+    mock_response_page1.json.return_value = {
+        "items": [
+            {
+                "name": "test-repo",
+                "owner": {"login": "testowner"},
+                "description": "Test repository",
+                "html_url": "https://github.com/testowner/test-repo",
+                "default_branch": "main",
+                "stargazers_count": 5,
+            }
+        ]
+    }
+
+    # Second page returns same repo (unlikely but possible)
+    mock_response_page2 = MagicMock()
+    mock_response_page2.status_code = 200
+    mock_response_page2.json.return_value = {
+        "items": []  # Empty to stop pagination
+    }
+
+    with patch("httpx.AsyncClient") as mock_client_class:
+        mock_client = AsyncMock()
+        mock_client.__aenter__.return_value = mock_client
+        mock_client.get.side_effect = [mock_response_page1, mock_response_page2]
+        mock_client_class.return_value = mock_client
+
+        repos = await service.search_repositories()
+
+        # Should only have one repo (no duplicates)
+        assert len(repos) == 1
+
+
+@pytest.mark.asyncio
+async def test_environment_variable_parsing():
+    """Test that environment variables are correctly parsed for boolean values."""
+    import os
+
+    # Test various truthy values
+    for value in ["true", "True", "TRUE", "1", "yes", "Yes"]:
+        os.environ["ENABLE_NO_TOPIC_SEARCH"] = value
+        result = os.getenv("ENABLE_NO_TOPIC_SEARCH", "false").lower() in (
+            "true",
+            "1",
+            "yes",
+        )
+        assert result is True, f"Failed for value: {value}"
+
+    # Test various falsy values
+    for value in ["false", "False", "FALSE", "0", "no", "No", ""]:
+        os.environ["ENABLE_NO_TOPIC_SEARCH"] = value
+        result = os.getenv("ENABLE_NO_TOPIC_SEARCH", "false").lower() in (
+            "true",
+            "1",
+            "yes",
+        )
+        assert result is False, f"Failed for value: {value}"
+
+    # Clean up
+    if "ENABLE_NO_TOPIC_SEARCH" in os.environ:
+        del os.environ["ENABLE_NO_TOPIC_SEARCH"]
+
+
+@pytest.mark.asyncio
+async def test_max_repositories_integer_parsing():
+    """Test that MAX_REPOSITORIES environment variable is correctly parsed as integer."""
+    import os
+
+    # Test valid integer
+    os.environ["MAX_REPOSITORIES"] = "10"
+    max_repositories_str = os.getenv("MAX_REPOSITORIES")
+    max_repositories = int(max_repositories_str) if max_repositories_str else None
+    assert max_repositories == 10
+
+    # Test None when not set
+    if "MAX_REPOSITORIES" in os.environ:
+        del os.environ["MAX_REPOSITORIES"]
+    max_repositories_str = os.getenv("MAX_REPOSITORIES")
+    max_repositories = int(max_repositories_str) if max_repositories_str else None
+    assert max_repositories is None
+
+    # Test empty string
+    os.environ["MAX_REPOSITORIES"] = ""
+    max_repositories_str = os.getenv("MAX_REPOSITORIES")
+    max_repositories = int(max_repositories_str) if max_repositories_str else None
+    assert max_repositories is None
+
+    # Clean up
+    if "MAX_REPOSITORIES" in os.environ:
+        del os.environ["MAX_REPOSITORIES"]
diff --git a/docker-compose.yml b/docker-compose.yml
index fb68627..d212953 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -14,6 +14,10 @@ services:
       # Optional: Add your GitHub token for higher API rate limits
       - GITHUB_TOKEN=${GITHUB_TOKEN:-}
       - ENVIRONMENT=development
+      # Optional: Enable no-topic search for testing (default: false)
+      - ENABLE_NO_TOPIC_SEARCH=${ENABLE_NO_TOPIC_SEARCH:-false}
+      # Optional: Limit maximum repositories for testing (no limit if not set)
+      - MAX_REPOSITORIES=${MAX_REPOSITORIES:-}
     volumes:
       # Persist database across container restarts
       - api_data:/app/data

From ae6ed1a66d163ed96bc9ae25c47185fc2001941b Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 17 Jan 2026 07:56:15 +0000
Subject: [PATCH 3/4] Update documentation for no-topic search feature

Co-authored-by: DevSecNinja <14926452+DevSecNinja@users.noreply.github.com>
---
 ARCHITECTURE.md |  6 +++++-
 README.md       | 12 ++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
index 9bdc9ea..5b9f1b6 100644
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@@ -44,7 +44,9 @@ SQLite Database ←→ GitHub API
 
 ### GitHub Service (`app/services/github_service.py`)
 
-- Search repositories by topic via GitHub API
+- Search repositories by topic via GitHub API (default: `hadiscover` or `ha-discover`)
+- Optional no-topic search mode for testing/development (`ENABLE_NO_TOPIC_SEARCH`)
+- Configurable repository limit for testing (`MAX_REPOSITORIES`)
 - Fetch file contents from repos
 - Discover automation files using path patterns
 - Optional GitHub token for higher rate limits (5k/hr vs 60/hr)
@@ -140,6 +142,8 @@ Fault-tolerant YAML parsing handles varied Home Assistant configurations. Partia
 
 Only indexes repositories with explicit `hadiscover` topic. Respects privacy and user consent.
 
+**Testing Mode**: For development and testing, can be configured to search without topic requirement using `ENABLE_NO_TOPIC_SEARCH=true` environment variable. A `MAX_REPOSITORIES` limit can also be set to control result size.
+
 ### Hourly Scheduled Indexing
 
 APScheduler runs automatic indexing every hour. Manual trigger available in development mode only.
diff --git a/README.md b/README.md
index 5188a65..adfe427 100644
--- a/README.md
+++ b/README.md
@@ -121,6 +121,18 @@ cd backend && source venv/bin/activate && pytest tests/ -v
 
 CI automatically tests Docker containers, API endpoints, and integration on every PR.
 
+#### Testing Configuration (Optional)
+
+For testing and development purposes, you can enable no-topic search to find more repositories:
+
+```bash
+# In backend/.env or as environment variables
+ENABLE_NO_TOPIC_SEARCH=true    # Search without requiring hadiscover topic
+MAX_REPOSITORIES=10             # Limit results for testing (optional)
+```
+
+**Warning**: Keep `ENABLE_NO_TOPIC_SEARCH=false` in production to maintain the opt-in privacy model.
+
 ### API Documentation
 
 OpenAPI/Swagger docs available at <http://localhost:8000/docs> once running.

From d1c49070a4de09408d279a951c28a7ea8d7f862b Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 17 Jan 2026 07:57:34 +0000
Subject: [PATCH 4/4] Fix code review feedback: improve pagination loop control

Co-authored-by: DevSecNinja <14926452+DevSecNinja@users.noreply.github.com>
---
 backend/app/services/github_service.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/backend/app/services/github_service.py b/backend/app/services/github_service.py
index 43479d6..80ec2eb 100644
--- a/backend/app/services/github_service.py
+++ b/backend/app/services/github_service.py
@@ -98,6 +98,7 @@ async def search_repositories(self, per_page: int = 100) -> List[Dict]:
                 # Search for repositories with automation files (no topic requirement)
                 # Use a broad search for Home Assistant automation files
                 page = 1
+                reached_limit = False
                 while True:
                     try:
                         url = f"{self.BASE_URL}/search/repositories"
@@ -150,13 +151,11 @@ async def search_repositories(self, per_page: int = 100) -> List[Dict]:
                                 logger.info(
                                     f"Reached max repository limit: {self.max_repositories}"
                                 )
+                                reached_limit = True
                                 break
 
-                        # Check if we've reached the maximum or there are no more pages
-                        if (
-                            self.max_repositories is not None
-                            and len(all_repositories) >= self.max_repositories
-                        ) or len(items) < per_page:
+                        # Stop pagination if we've reached the limit or there are no more pages
+                        if reached_limit or len(items) < per_page:
                             break
 
                         page += 1