Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
a794a47
feat: implement Discovery Playground with Redis caching and comprehen…
SimplicityGuy Jul 19, 2025
1a7ee12
test: improve test coverage with comprehensive cache and health serve…
SimplicityGuy Jul 19, 2025
dc1c779
fix: resolve mypy type annotation errors in discovery tests
SimplicityGuy Jul 19, 2025
e1d8cf6
fix: update docker-validate workflow to include redis service
SimplicityGuy Jul 19, 2025
18e5357
fix: remove iOS-specific wheels from uv.lock to fix Docker builds
SimplicityGuy Jul 19, 2025
be4efc4
feat: add comprehensive iOS wheel prevention measures
SimplicityGuy Jul 19, 2025
eff6e0e
chore: add discovery.
SimplicityGuy Jul 20, 2025
b479c36
Merge branch 'main' of github.com:SimplicityGuy/discogsography into i…
SimplicityGuy Jul 20, 2025
f8cf3e8
Merge branch 'main' of github.com:SimplicityGuy/discogsography into i…
SimplicityGuy Jul 25, 2025
a48f523
fix: fixed uv.lock file.
SimplicityGuy Jul 25, 2025
be4016b
chore: update python packages
SimplicityGuy Jul 25, 2025
fac76b9
Merge branch 'main' into improve-discovery
SimplicityGuy Jul 26, 2025
8f8fa65
Merge branch 'main' into improve-discovery
SimplicityGuy Jan 2, 2026
ca2034b
chore: remove redundant iOS wheels prevention infrastructure
SimplicityGuy Jan 2, 2026
0ec68a9
feat: add comprehensive testing, security, and observability to disco…
SimplicityGuy Jan 2, 2026
b592440
feat: add comprehensive Locust load testing suite (Phase 2.10)
SimplicityGuy Jan 4, 2026
9f864a5
docs: add comprehensive performance benchmarking guide (Phase 2.11)
SimplicityGuy Jan 4, 2026
84b5b6b
fix: suppress harmless Neo4j warnings in Discovery service logs
SimplicityGuy Jan 4, 2026
0741bbb
feat: implement two-level cache with automatic warming (Phase 2.4, 2.…
SimplicityGuy Jan 4, 2026
20f47c3
feat: add database connection pool metrics and monitoring (Phase 2.7)
SimplicityGuy Jan 4, 2026
514ff88
feat: add Neo4j indexing system for optimal query performance (Phase …
SimplicityGuy Jan 4, 2026
2dea587
feat: implement cursor-based pagination for all endpoints (Phase 2.9)
SimplicityGuy Jan 4, 2026
43c0086
feat: add Prometheus metrics and middleware instrumentation (Phase 2.3)
SimplicityGuy Jan 4, 2026
e5de237
fix: correct heatmap type validation in playground API (Phase 2.2)
SimplicityGuy Jan 4, 2026
42c3e1d
fix: resolve type errors in discovery service initialization
SimplicityGuy Jan 4, 2026
c1b31ae
feat: add structured logging tests and update test configuration (Pha…
SimplicityGuy Jan 4, 2026
98d1072
feat: implement Phase 3 Discovery service improvements
SimplicityGuy Jan 5, 2026
72e517b
feat: implement Phase 4.1.1 - ML API infrastructure (placeholder mode)
SimplicityGuy Jan 5, 2026
650f8c6
feat(discovery): implement Phase 4.1.2-4.1.4 API endpoints
SimplicityGuy Jan 5, 2026
67d94ff
feat(discovery): enhance OpenAPI documentation (Phase 4.1.5)
SimplicityGuy Jan 5, 2026
8a8c825
test(discovery): add E2E tests for Phase 4 APIs (4.2.1-4.2.3)
SimplicityGuy Jan 5, 2026
b73ebd4
docs(discovery): add Phase 4 completion summary
SimplicityGuy Jan 5, 2026
e8f916d
feat(discovery): integrate Phase 3 ML components into API (Phase 4.2)
SimplicityGuy Jan 5, 2026
ba463fd
feat(discovery): integrate Phase 3 Search components into API (Phase …
SimplicityGuy Jan 5, 2026
da9b28a
docs(discovery): update Phase 4 progress - Search API complete
SimplicityGuy Jan 5, 2026
8cf2b30
feat(discovery): integrate Phase 3 Graph Analytics components (Phase …
SimplicityGuy Jan 5, 2026
59a18ed
docs(discovery): update Phase 4 progress - Graph Analytics complete
SimplicityGuy Jan 5, 2026
d30e57c
feat(discovery): implement Real-Time API with WebSocket support
SimplicityGuy Jan 5, 2026
b42e9b1
docs(discovery): update progress tracker - Phase 4.2 complete
SimplicityGuy Jan 5, 2026
be6621c
feat: implement Phase 4.3 UI - Discovery API dashboard visualizations
SimplicityGuy Jan 5, 2026
bdeda6a
docs: update PHASE_4_PROGRESS.md with Phase 4.3 completion
SimplicityGuy Jan 5, 2026
da40571
Merge branch 'improve-discovery' of github.com:SimplicityGuy/discogso…
SimplicityGuy Jan 5, 2026
c0c82ec
fix: ruff fixes
SimplicityGuy Jan 5, 2026
8a19925
fix(tests): fix discovery test fixture mocking and assertions
SimplicityGuy Jan 5, 2026
366377b
fix: add missing discovery dependencies to root pyproject.toml
SimplicityGuy Jan 5, 2026
2ed00bf
fix(ci): add statuses:write permission to test workflow
SimplicityGuy Jan 5, 2026
dbe5d1d
test(coverage): improve core component test coverage to 73%
SimplicityGuy Jan 6, 2026
8af2595
test(discovery): add comprehensive tests for 8 discovery modules
SimplicityGuy Jan 6, 2026
a7a7eed
fix(tests): resolve all ruff linting violations to fix CI workflow
SimplicityGuy Jan 7, 2026
ef2a662
fix(tests): resolve mock parameter mismatches causing CI failures
SimplicityGuy Jan 7, 2026
c94c3bd
fix(tests): resolve queue error handling test failure
SimplicityGuy Jan 7, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/workflows/docker-validate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ jobs:
- name: 🔍 Check docker-compose services
run: |
services=$(docker-compose config --services | sort | tr "\n" " " | sed "s/ $//")
expected="dashboard discovery extractor graphinator neo4j postgres rabbitmq tableinator"
expected="dashboard discovery extractor graphinator neo4j postgres rabbitmq redis tableinator"

if [ "$services" != "$expected" ]; then
echo "❌ Service mismatch!"
Expand All @@ -104,14 +104,14 @@ jobs:
run: |
# Check that services have correct dependencies
deps=$(docker-compose config | yq eval '.services.dashboard.depends_on | keys | sort | join(" ")' -)
if [ "$deps" != "neo4j postgres rabbitmq" ]; then
echo "❌ Dashboard should depend on neo4j, postgres, and rabbitmq"
if [ "$deps" != "neo4j postgres rabbitmq redis" ]; then
echo "❌ Dashboard should depend on neo4j, postgres, rabbitmq, and redis"
exit 1
fi

deps=$(docker-compose config | yq eval '.services.discovery.depends_on | keys | sort | join(" ")' -)
if [ "$deps" != "neo4j postgres rabbitmq" ]; then
echo "❌ Discovery should depend on neo4j, postgres, and rabbitmq"
if [ "$deps" != "neo4j postgres rabbitmq redis" ]; then
echo "❌ Discovery should depend on neo4j, postgres, rabbitmq, and redis"
exit 1
fi

Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ env:
permissions:
contents: read
pull-requests: write # Required for coverage report comments
statuses: write # Required for coverage status creation

jobs:
python-test:
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ repos:
- id: shfmt
args: ["-i", "2", "-ci", "-bn", "-sr"] # 2 space indent, indent case, binary next line, redirect operators

# Rust checks
# Local hooks
- repo: local
hooks:
- id: cargo-fmt
Expand Down
52 changes: 51 additions & 1 deletion common/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import logging
import sys
import warnings
from dataclasses import dataclass
from os import getenv
from pathlib import Path
Expand Down Expand Up @@ -165,7 +166,14 @@ def setup_logging(
level: str | None = "INFO",
log_file: Path | None = None,
) -> None:
"""Set up structured logging configuration to match Rust extractor format."""
"""Set up structured logging configuration with correlation IDs and service context.

This configures structlog to:
- Include correlation IDs from contextvars in all log entries
- Add service-specific context (name, version, environment)
- Output structured JSON logs to console and optionally to file
- Support distributed tracing via request IDs
"""

# Default to INFO if level is None
if level is None:
Expand All @@ -175,6 +183,8 @@ def setup_logging(
timestamper = structlog.processors.TimeStamper(fmt="iso", utc=True)

shared_processors = [
# Merge contextvars (correlation IDs, request context) into log entries
structlog.contextvars.merge_contextvars,
structlog.stdlib.add_log_level,
structlog.stdlib.add_logger_name,
structlog.stdlib.PositionalArgumentsFormatter(),
Expand All @@ -198,6 +208,12 @@ def setup_logging(
cache_logger_on_first_use=True,
)

# Bind service-specific context that will be included in all log entries
structlog.contextvars.bind_contextvars(
service=service_name,
environment=getenv("ENVIRONMENT", "development"),
)

# Set up standard logging handlers
handlers: list[logging.Handler] = []

Expand Down Expand Up @@ -246,6 +262,16 @@ def setup_logging(
logging.getLogger("pika.adapters.blocking_connection").setLevel(logging.WARNING)
logging.getLogger("pika.connection").setLevel(logging.WARNING)

# Suppress Neo4j schema warnings (unknown labels/relationships)
# These warnings appear when database is empty or being populated
# and don't indicate actual errors in the code
logging.getLogger("neo4j.notifications").setLevel(logging.ERROR)
logging.getLogger("neo4j").setLevel(logging.ERROR) # Suppress all Neo4j warnings, keep errors

# Suppress Neo4j Python warnings about single record results
# This is expected behavior when using OPTIONAL MATCH patterns
warnings.filterwarnings("ignore", message="Expected a result with a single record", category=UserWarning, module="neo4j")

# Get structured logger
log = structlog.get_logger()
log.info("✅ Logging configured for service", service=service_name)
Expand All @@ -265,6 +291,10 @@ class DashboardConfig:
postgres_database: str
rabbitmq_management_user: str
rabbitmq_management_password: str
redis_url: str = "redis://localhost:6379/0"
cors_origins: list[str] | None = None # None = default to localhost only
cache_warming_enabled: bool = True # Enable cache warming on service startup
cache_webhook_secret: str | None = None # Secret for cache invalidation webhooks

@classmethod
def from_env(cls) -> "DashboardConfig":
Expand All @@ -273,10 +303,26 @@ def from_env(cls) -> "DashboardConfig":
graphinator_config = GraphinatorConfig.from_env()
tableinator_config = TableinatorConfig.from_env()

# Redis configuration
redis_url = getenv("REDIS_URL", "redis://localhost:6379/0")

# Get RabbitMQ management credentials
rabbitmq_management_user = getenv("RABBITMQ_MANAGEMENT_USER", "discogsography")
rabbitmq_management_password = getenv("RABBITMQ_MANAGEMENT_PASSWORD", "discogsography")

# CORS configuration
cors_origins_env = getenv("CORS_ORIGINS")
cors_origins = None
if cors_origins_env:
# Parse comma-separated list of origins
cors_origins = [origin.strip() for origin in cors_origins_env.split(",") if origin.strip()]

# Cache warming configuration
cache_warming_enabled = getenv("CACHE_WARMING_ENABLED", "true").lower() in ("true", "1", "yes")

# Cache invalidation webhook configuration
cache_webhook_secret = getenv("CACHE_WEBHOOK_SECRET")

return cls(
amqp_connection=graphinator_config.amqp_connection,
neo4j_address=graphinator_config.neo4j_address,
Expand All @@ -286,8 +332,12 @@ def from_env(cls) -> "DashboardConfig":
postgres_username=tableinator_config.postgres_username,
postgres_password=tableinator_config.postgres_password,
postgres_database=tableinator_config.postgres_database,
redis_url=redis_url,
rabbitmq_management_user=rabbitmq_management_user,
rabbitmq_management_password=rabbitmq_management_password,
cors_origins=cors_origins,
cache_warming_enabled=cache_warming_enabled,
cache_webhook_secret=cache_webhook_secret,
)


Expand Down
152 changes: 152 additions & 0 deletions dashboard/dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,158 @@ async def prometheus_metrics() -> Response:
return Response(content=generate_latest(), media_type="text/plain")


# Discovery API Proxy Endpoints for Phase 4.3 UI


@app.get("/api/discovery/ml/status") # type: ignore[untyped-decorator]
async def get_ml_status() -> ORJSONResponse:
"""Get ML API status from Discovery service."""
API_REQUESTS.labels(endpoint="/api/discovery/ml/status", method="GET").inc()
try:
async with httpx.AsyncClient(timeout=5.0) as client:
response = await client.get("http://discovery:8005/api/ml/status")
if response.status_code == 200:
return ORJSONResponse(content=response.json())
return ORJSONResponse(content={"error": f"Discovery API returned {response.status_code}"}, status_code=response.status_code)
except Exception as e:
logger.error(f"❌ Error fetching ML status: {e}")
return ORJSONResponse(content={"error": str(e)}, status_code=503)


@app.post("/api/discovery/ml/recommend/collaborative") # type: ignore[untyped-decorator]
async def get_collaborative_recommendations(artist_id: str = "The Beatles", limit: int = 10, min_similarity: float = 0.1) -> ORJSONResponse:
"""Get collaborative filtering recommendations from Discovery service."""
API_REQUESTS.labels(endpoint="/api/discovery/ml/recommend/collaborative", method="POST").inc()
try:
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.post(
"http://discovery:8005/api/ml/recommend/collaborative",
json={"artist_id": artist_id, "limit": limit, "min_similarity": min_similarity},
)
if response.status_code == 200:
return ORJSONResponse(content=response.json())
return ORJSONResponse(content={"error": f"Discovery API returned {response.status_code}"}, status_code=response.status_code)
except Exception as e:
logger.error(f"❌ Error fetching collaborative recommendations: {e}")
return ORJSONResponse(content={"error": str(e)}, status_code=503)


@app.post("/api/discovery/ml/recommend/hybrid") # type: ignore[untyped-decorator]
async def get_hybrid_recommendations(artist_name: str = "The Beatles", limit: int = 10, strategy: str = "weighted") -> ORJSONResponse:
"""Get hybrid recommendations from Discovery service."""
API_REQUESTS.labels(endpoint="/api/discovery/ml/recommend/hybrid", method="POST").inc()
try:
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.post(
"http://discovery:8005/api/ml/recommend/hybrid",
json={"artist_name": artist_name, "limit": limit, "strategy": strategy},
)
if response.status_code == 200:
return ORJSONResponse(content=response.json())
return ORJSONResponse(content={"error": f"Discovery API returned {response.status_code}"}, status_code=response.status_code)
except Exception as e:
logger.error(f"❌ Error fetching hybrid recommendations: {e}")
return ORJSONResponse(content={"error": str(e)}, status_code=503)


@app.get("/api/discovery/search/status") # type: ignore[untyped-decorator]
async def get_search_status() -> ORJSONResponse:
"""Get Search API status from Discovery service."""
API_REQUESTS.labels(endpoint="/api/discovery/search/status", method="GET").inc()
try:
async with httpx.AsyncClient(timeout=5.0) as client:
response = await client.get("http://discovery:8005/api/search/status")
if response.status_code == 200:
return ORJSONResponse(content=response.json())
return ORJSONResponse(content={"error": f"Discovery API returned {response.status_code}"}, status_code=response.status_code)
except Exception as e:
logger.error(f"❌ Error fetching search status: {e}")
return ORJSONResponse(content={"error": str(e)}, status_code=503)


@app.get("/api/discovery/search/stats") # type: ignore[untyped-decorator]
async def get_search_stats() -> ORJSONResponse:
"""Get search statistics from Discovery service."""
API_REQUESTS.labels(endpoint="/api/discovery/search/stats", method="GET").inc()
try:
async with httpx.AsyncClient(timeout=5.0) as client:
response = await client.get("http://discovery:8005/api/search/stats")
if response.status_code == 200:
return ORJSONResponse(content=response.json())
return ORJSONResponse(content={"error": f"Discovery API returned {response.status_code}"}, status_code=response.status_code)
except Exception as e:
logger.error(f"❌ Error fetching search stats: {e}")
return ORJSONResponse(content={"error": str(e)}, status_code=503)


@app.get("/api/discovery/graph/status") # type: ignore[untyped-decorator]
async def get_graph_status() -> ORJSONResponse:
"""Get Graph Analytics API status from Discovery service."""
API_REQUESTS.labels(endpoint="/api/discovery/graph/status", method="GET").inc()
try:
async with httpx.AsyncClient(timeout=5.0) as client:
response = await client.get("http://discovery:8005/api/graph/status")
if response.status_code == 200:
return ORJSONResponse(content=response.json())
return ORJSONResponse(content={"error": f"Discovery API returned {response.status_code}"}, status_code=response.status_code)
except Exception as e:
logger.error(f"❌ Error fetching graph status: {e}")
return ORJSONResponse(content={"error": str(e)}, status_code=503)


@app.post("/api/discovery/graph/centrality") # type: ignore[untyped-decorator]
async def get_centrality_metrics(
metric: str = "pagerank", limit: int = 20, node_type: str = "artist", sample_size: int | None = None
) -> ORJSONResponse:
"""Get centrality metrics from Discovery service."""
API_REQUESTS.labels(endpoint="/api/discovery/graph/centrality", method="POST").inc()
try:
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
"http://discovery:8005/api/graph/centrality",
json={"metric": metric, "limit": limit, "node_type": node_type, "sample_size": sample_size},
)
if response.status_code == 200:
return ORJSONResponse(content=response.json())
return ORJSONResponse(content={"error": f"Discovery API returned {response.status_code}"}, status_code=response.status_code)
except Exception as e:
logger.error(f"❌ Error fetching centrality metrics: {e}")
return ORJSONResponse(content={"error": str(e)}, status_code=503)


@app.get("/api/discovery/realtime/status") # type: ignore[untyped-decorator]
async def get_realtime_status() -> ORJSONResponse:
"""Get Real-Time API status from Discovery service."""
API_REQUESTS.labels(endpoint="/api/discovery/realtime/status", method="GET").inc()
try:
async with httpx.AsyncClient(timeout=5.0) as client:
response = await client.get("http://discovery:8005/api/realtime/status")
if response.status_code == 200:
return ORJSONResponse(content=response.json())
return ORJSONResponse(content={"error": f"Discovery API returned {response.status_code}"}, status_code=response.status_code)
except Exception as e:
logger.error(f"❌ Error fetching realtime status: {e}")
return ORJSONResponse(content={"error": str(e)}, status_code=503)


@app.post("/api/discovery/realtime/trending") # type: ignore[untyped-decorator]
async def get_trending(category: str = "artists", limit: int = 10, time_window: str = "day") -> ORJSONResponse:
"""Get trending items from Discovery service."""
API_REQUESTS.labels(endpoint="/api/discovery/realtime/trending", method="POST").inc()
try:
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.post(
"http://discovery:8005/api/realtime/trending",
json={"category": category, "limit": limit, "time_window": time_window},
)
if response.status_code == 200:
return ORJSONResponse(content=response.json())
return ORJSONResponse(content={"error": f"Discovery API returned {response.status_code}"}, status_code=response.status_code)
except Exception as e:
logger.error(f"❌ Error fetching trending items: {e}")
return ORJSONResponse(content={"error": str(e)}, status_code=503)


@app.websocket("/ws") # type: ignore[untyped-decorator]
async def websocket_endpoint(websocket: WebSocket) -> None:
"""WebSocket endpoint for real-time updates."""
Expand Down
Loading