diff --git a/deploy/helm/tracebility/templates/api-deployment.yaml b/deploy/helm/tracebility/templates/api-deployment.yaml index b339d94..17c7961 100644 --- a/deploy/helm/tracebility/templates/api-deployment.yaml +++ b/deploy/helm/tracebility/templates/api-deployment.yaml @@ -49,6 +49,7 @@ spec: value: {{ .Values.logging.level | quote }} {{- include "tracebility.envFromSecret" (dict "cfg" .Values.postgres "name" "TRACEBILITY_PG_DSN") | nindent 12 }} {{- include "tracebility.envFromSecret" (dict "cfg" .Values.clickhouse "name" "TRACEBILITY_CLICKHOUSE_URL") | nindent 12 }} + {{- include "tracebility.envFromSecret" (dict "cfg" .Values.redis "name" "TRACEBILITY_REDIS_URL") | nindent 12 }} {{- include "tracebility.envFromSecret" (dict "cfg" .Values.session "name" "TRACEBILITY_SESSION_SECRET") | nindent 12 }} {{- if .Values.oauth.existingSecret }} - name: OAUTH_GOOGLE_CLIENT_ID diff --git a/deploy/helm/tracebility/templates/ingest-api-deployment.yaml b/deploy/helm/tracebility/templates/ingest-api-deployment.yaml index 5f8f3e1..4de8f92 100644 --- a/deploy/helm/tracebility/templates/ingest-api-deployment.yaml +++ b/deploy/helm/tracebility/templates/ingest-api-deployment.yaml @@ -69,6 +69,7 @@ spec: value: /var/lib/tracebility/ingest-buffer {{- include "tracebility.envFromSecret" (dict "cfg" .Values.postgres "name" "TRACEBILITY_PG_DSN") | nindent 12 }} {{- include "tracebility.envFromSecret" (dict "cfg" .Values.redis "name" "TRACEBILITY_REDIS_URL") | nindent 12 }} + {{- include "tracebility.envFromSecret" (dict "cfg" .Values.clickhouse "name" "TRACEBILITY_CLICKHOUSE_URL") | nindent 12 }} {{- if .Values.ingestApi.diskBuffer.enabled }} volumeMounts: - name: ingest-buffer diff --git a/services/_shared/tenant/tracebility_tenant/audit.py b/services/_shared/tenant/tracebility_tenant/audit.py index 981235e..d675037 100644 --- a/services/_shared/tenant/tracebility_tenant/audit.py +++ b/services/_shared/tenant/tracebility_tenant/audit.py @@ -86,17 +86,16 @@ async def from_url( cls, url: str, *, - username: str | None = None, - password: str | None = None, - database: str = "default", table: str = "audit_log", ) -> AuditWriter: - client = await clickhouse_connect.get_async_client( - dsn=url, - username=username, - password=password, - database=database, - ) + # Pass the full DSN through. Credentials and database are encoded + # in the URL (http://user:pass@host:port/db). Earlier versions + # accepted username/password/database kwargs, but that turned out + # to override the DSN's embedded credentials when callers passed + # default values — yielding cryptic AUTHENTICATION_FAILED errors + # in deployed environments where the helm chart only ships the + # DSN as a single secret. + client = await clickhouse_connect.get_async_client(dsn=url) return cls(client, table=table) async def write(self, event: AuditEvent) -> None: diff --git a/services/_shared/tenant/tracebility_tenant/reconciler_audit.py b/services/_shared/tenant/tracebility_tenant/reconciler_audit.py index c2352d1..b908470 100644 --- a/services/_shared/tenant/tracebility_tenant/reconciler_audit.py +++ b/services/_shared/tenant/tracebility_tenant/reconciler_audit.py @@ -192,17 +192,11 @@ async def reconciler_loop( *, pg: asyncpg.Pool, clickhouse_url: str, - clickhouse_user: str = "default", - clickhouse_password: str = "", - clickhouse_database: str = "default", interval_s: float = 24 * 60 * 60.0, # daily ) -> None: - ch = await clickhouse_connect.get_async_client( - dsn=clickhouse_url, - username=clickhouse_user, - password=clickhouse_password, - database=clickhouse_database, - ) + """``clickhouse_url`` is a full DSN with embedded credentials. See + reconciler_quota.reconciler_loop docstring for why we don't split.""" + ch = await clickhouse_connect.get_async_client(dsn=clickhouse_url) try: while True: try: diff --git a/services/_shared/tenant/tracebility_tenant/reconciler_quota.py b/services/_shared/tenant/tracebility_tenant/reconciler_quota.py index 9c705c9..6f7e766 100644 --- a/services/_shared/tenant/tracebility_tenant/reconciler_quota.py +++ b/services/_shared/tenant/tracebility_tenant/reconciler_quota.py @@ -165,18 +165,16 @@ async def reconciler_loop( pg: asyncpg.Pool, redis: redis_async.Redis, clickhouse_url: str, - clickhouse_user: str = "default", - clickhouse_password: str = "", - clickhouse_database: str = "default", interval_s: float = 60.0, ) -> None: - """Run forever. Operator service spawns this as a background task.""" - ch = await clickhouse_connect.get_async_client( - dsn=clickhouse_url, - username=clickhouse_user, - password=clickhouse_password, - database=clickhouse_database, - ) + """Run forever. Operator service spawns this as a background task. + + ``clickhouse_url`` is a full DSN with embedded credentials and database + (http(s)://user:pass@host:port/db). The helm chart ships it as a single + secret value; splitting credentials into separate kwargs caused + AUTHENTICATION_FAILED in production. + """ + ch = await clickhouse_connect.get_async_client(dsn=clickhouse_url) try: while True: try: diff --git a/services/api/tracebility_api/app.py b/services/api/tracebility_api/app.py index da1bdfb..9ddbc9e 100644 --- a/services/api/tracebility_api/app.py +++ b/services/api/tracebility_api/app.py @@ -92,14 +92,7 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]: # ``audit_log`` is read-only after this lands; new writes go to # ClickHouse via this writer (spec §5.8). app.state.audit_writer = ( - await AuditWriter.from_url( - settings.clickhouse_url, - username=settings.clickhouse_user, - password=settings.clickhouse_password, - database=settings.clickhouse_database, - ) - if settings.clickhouse_url - else None + await AuditWriter.from_url(settings.clickhouse_url) if settings.clickhouse_url else None ) # Redis is needed for the api-key invalidation publish path and the # quota / audit reconciler hooks. Optional; if unset, those features diff --git a/services/api/tracebility_api/config.py b/services/api/tracebility_api/config.py index 1a46809..f6ada77 100644 --- a/services/api/tracebility_api/config.py +++ b/services/api/tracebility_api/config.py @@ -16,9 +16,6 @@ class Settings: postgres_dsn: str session_secret: str clickhouse_url: str | None = None - clickhouse_user: str = "default" - clickhouse_password: str = "" - clickhouse_database: str = "default" redis_url: str | None = None session_cookie_name: str = "tracebility_session" session_max_age_seconds: int = 60 * 60 * 24 * 7 @@ -58,9 +55,6 @@ def load() -> Settings: postgres_dsn=postgres_dsn, session_secret=session_secret, clickhouse_url=os.environ.get("TRACEBILITY_CLICKHOUSE_URL"), - clickhouse_user=os.environ.get("TRACEBILITY_CLICKHOUSE_USER", "default"), - clickhouse_password=os.environ.get("TRACEBILITY_CLICKHOUSE_PASSWORD", ""), - clickhouse_database=os.environ.get("TRACEBILITY_CLICKHOUSE_DATABASE", "default"), redis_url=os.environ.get("TRACEBILITY_REDIS_URL"), session_cookie_name=os.environ.get("TRACEBILITY_SESSION_COOKIE", "tracebility_session"), session_max_age_seconds=int( diff --git a/services/ingest-api/tracebility_ingest/app.py b/services/ingest-api/tracebility_ingest/app.py index 1cef8c6..487ade4 100644 --- a/services/ingest-api/tracebility_ingest/app.py +++ b/services/ingest-api/tracebility_ingest/app.py @@ -85,12 +85,7 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]: await app.state.resolver.start_invalidator() app.state.rate_limiter = RateLimiter(app.state.redis) app.state.quota_meter = QuotaMeter(app.state.redis) - app.state.audit_writer = await AuditWriter.from_url( - settings.clickhouse_url, - username=settings.clickhouse_user, - password=settings.clickhouse_password, - database=settings.clickhouse_database, - ) + app.state.audit_writer = await AuditWriter.from_url(settings.clickhouse_url) app.state.redactor = redactor_from_env(settings.redact_pii) drain_task = asyncio.create_task(_drain_loop(app.state.enqueue)) log.info( diff --git a/services/ingest-api/tracebility_ingest/config.py b/services/ingest-api/tracebility_ingest/config.py index 7896428..07deb36 100644 --- a/services/ingest-api/tracebility_ingest/config.py +++ b/services/ingest-api/tracebility_ingest/config.py @@ -12,11 +12,11 @@ class Settings: redis_url: str postgres_dsn: str - # ClickHouse used by AuditWriter (egress + quota.block events) + # Full DSN — http(s)://user:pass@host:port/db. Credentials and + # database name are embedded; the helm chart ships this as a single + # secret value (see deploy/helm/.../templates/ingest-api-deployment.yaml + # and the tracebility-clickhouse secret created by the bootstrap runbook). clickhouse_url: str - clickhouse_user: str - clickhouse_password: str - clickhouse_database: str = "default" # spans larger than this are spilled to object storage (per ER-06) inline_blob_max_bytes: int = 1_000_000 # disk buffer for ingest enqueue when redis is unavailable (ER-01) @@ -46,9 +46,6 @@ def load() -> Settings: redis_url=redis_url, postgres_dsn=postgres_dsn, clickhouse_url=clickhouse_url, - clickhouse_user=os.environ.get("TRACEBILITY_CLICKHOUSE_USER", "default"), - clickhouse_password=os.environ.get("TRACEBILITY_CLICKHOUSE_PASSWORD", ""), - clickhouse_database=os.environ.get("TRACEBILITY_CLICKHOUSE_DATABASE", "default"), inline_blob_max_bytes=int(os.environ.get("TRACEBILITY_INLINE_BLOB_MAX_BYTES", "1000000")), disk_buffer_path=os.environ.get( "TRACEBILITY_DISK_BUFFER_PATH", "/var/lib/tracebility/ingest-buffer"