-
Notifications
You must be signed in to change notification settings - Fork 9
feat: write event data to clickhouse #438
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
19 commits
Select commit
Hold shift + click to select a range
b4a2180
fix: add missing backend-resources-local.yaml
iand bcf1978
feat: add clickhouse to local kind cluster for development
iand 2beed2e
feat: add initial clickhouse module
iand 70c6cb2
feat: perform clickhouse migration on app startup
iand 32e649a
feat: wire up writes to cliickhouse
iand 429282c
chore: rename deal_checks table to data_storage_checks
iand 98230f4
chore: remove network column from clickhouse schemas
iand 2255473
chore: move probeLocation config to app config
iand ccbfceb
chore: rename ttfb_ms to first_byte_ms in clickhouse schema and add s…
iand 731dfc7
feat: add sp_id to clickhouse schema
iand 497ca04
chore: stylistic changes for guarding against undefined variables
iand ff0e6fc
chore: move clickhouse config into app config for consistency
iand 4871d7f
chore: log errors with clickhouse migration in module startup
iand 0534a17
chore: expand documentation to include clickhouse
iand addbd2e
chore: address review comments
iand ed0fc29
merge upstream/main
iand 00cd978
chore: fix import order in worker module and add clickhouse mock
iand 240c799
chore: drop retry_count from ClickHouse tables
iand 19f9658
feat: expand data_retention_challenges schema
iand File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| import { Global, Module } from "@nestjs/common"; | ||
| import { makeCounterProvider, makeGaugeProvider, makeHistogramProvider } from "@willsoto/nestjs-prometheus"; | ||
| import { ClickhouseService } from "./clickhouse.service.js"; | ||
|
|
||
| @Global() | ||
| @Module({ | ||
| providers: [ | ||
| makeHistogramProvider({ | ||
| name: "clickhouseFlushDurationSeconds", | ||
| help: "Round-trip time of each ClickHouse flush call in seconds", | ||
| buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5], | ||
| }), | ||
| makeCounterProvider({ | ||
| name: "clickhouseFlushErrorsTotal", | ||
| help: "Number of failed ClickHouse flush attempts; non-zero means rows were dropped", | ||
| }), | ||
| makeCounterProvider({ | ||
| name: "clickhouseDroppedRowsTotal", | ||
| help: "Rows silently dropped due to flush failure or buffer overflow, by reason", | ||
| labelNames: ["reason"] as const, | ||
| }), | ||
| makeGaugeProvider({ | ||
| name: "clickhouseBufferRows", | ||
| help: "Current number of rows queued in the ClickHouse buffer", | ||
| }), | ||
| makeCounterProvider({ | ||
| name: "clickhouseRowsInsertedTotal", | ||
| help: "Rows successfully written to ClickHouse, by table", | ||
| labelNames: ["table"] as const, | ||
| }), | ||
| ClickhouseService, | ||
| ], | ||
| exports: [ClickhouseService], | ||
| }) | ||
| export class ClickhouseModule {} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,82 @@ | ||
| /** | ||
| * ClickHouse DDL statements executed on startup via CREATE DATABASE/TABLE IF NOT EXISTS. | ||
| * Order matters: database must be created before tables. | ||
| */ | ||
| export function buildMigrations(database: string): string[] { | ||
| return [ | ||
|
iand marked this conversation as resolved.
|
||
| `CREATE TABLE IF NOT EXISTS ${database}.data_storage_checks | ||
| ( | ||
| timestamp DateTime64(3, 'UTC'), -- when deal entity was saved | ||
|
|
||
| probe_location LowCardinality(String), -- dealbot location | ||
| sp_address String, -- storage provider address | ||
| sp_id Nullable(UInt64), -- storage provider numeric id | ||
| sp_name Nullable(String), -- storage provider name | ||
|
|
||
| deal_id UUID, -- id assigned by dealbot | ||
| piece_cid Nullable(String), -- null if upload failed | ||
| piece_id Nullable(UInt64), -- on-chain piece id | ||
| file_size_bytes Nullable(UInt64), -- raw file size before CAR encoding | ||
| piece_size_bytes Nullable(UInt64), -- piece size after CAR encoding | ||
|
|
||
| status LowCardinality(String), -- DealStatus: 'pending' | 'uploaded' | 'piece_added' | 'piece_confirmed' | 'deal_created' | 'failed' | ||
| error_code LowCardinality(Nullable(String)), | ||
|
|
||
| upload_started_at Nullable(DateTime64(3, 'UTC')), -- when executeUpload() was called | ||
| upload_ended_at Nullable(DateTime64(3, 'UTC')), -- when onStored event fired | ||
|
|
||
| pieces_added_at Nullable(DateTime64(3, 'UTC')), -- when onPiecesAdded event fired | ||
| pieces_confirmed_at Nullable(DateTime64(3, 'UTC')), -- when onPiecesConfirmed event fired | ||
|
|
||
| ipni_status LowCardinality(Nullable(String)), -- 'pending' | 'sp_indexed' | 'sp_advertised' | 'verified' | 'failed' | ||
| ipni_indexed_at Nullable(DateTime64(3, 'UTC')), -- when dealbot first observed SP_INDEXED (accuracy limited to poll interval) | ||
| ipni_advertised_at Nullable(DateTime64(3, 'UTC')), -- when dealbot first observed SP_ADVERTISED (accuracy limited to poll interval) | ||
| ipni_verified_at Nullable(DateTime64(3, 'UTC')), -- when dealbot confirmed root CID findable via IPNI | ||
| ipni_verified_cids_count Nullable(UInt32), -- CIDs confirmed findable via IPNI | ||
| ipni_unverified_cids_count Nullable(UInt32) -- CIDs checked but not findable | ||
| ) ENGINE MergeTree() | ||
| PRIMARY KEY (probe_location, sp_address, timestamp) | ||
| PARTITION BY toStartOfMonth(timestamp) | ||
| TTL toDateTime(timestamp) + INTERVAL 1 YEAR`, | ||
|
|
||
| `CREATE TABLE IF NOT EXISTS ${database}.retrieval_checks | ||
| ( | ||
| timestamp DateTime64(3, 'UTC'), -- when retrieval entity was saved | ||
| probe_location LowCardinality(String), -- dealbot location | ||
| sp_address String, -- storage provider address | ||
| sp_id Nullable(UInt64), -- storage provider numeric id | ||
| sp_name Nullable(String), -- storage provider name | ||
|
|
||
| deal_id Nullable(UUID), -- id of deal assigned by dealbot | ||
| retrieval_id UUID, -- id of retrieval assigned by dealbot | ||
| service_type LowCardinality(String), -- 'direct_sp' | 'ipfs_pin' | ||
|
|
||
| status LowCardinality(String), -- RetrievalStatus: 'pending' | 'in_progress' | 'success' | 'failed' | 'timeout' | ||
| http_response_code Nullable(UInt16), -- raw HTTP status; null on transport failure | ||
|
|
||
| first_byte_ms Nullable(Float64), -- time from request start to first response byte | ||
| last_byte_ms Nullable(Float64), -- time from request start to last response byte | ||
| bytes_retrieved Nullable(UInt64) -- size of received data in bytes | ||
| ) ENGINE MergeTree() | ||
| PRIMARY KEY (probe_location, sp_address, timestamp) | ||
| PARTITION BY toStartOfMonth(timestamp) | ||
| TTL toDateTime(timestamp) + INTERVAL 1 YEAR`, | ||
|
|
||
| `CREATE TABLE IF NOT EXISTS ${database}.data_retention_challenges | ||
| ( | ||
| timestamp DateTime64(3, 'UTC'), -- when the poll ran and detected these periods | ||
| probe_location LowCardinality(String), -- dealbot location | ||
| sp_address String, -- storage provider address | ||
| sp_id Nullable(UInt64), -- storage provider numeric id | ||
| sp_name Nullable(String), -- storage provider name | ||
|
|
||
| total_periods_due UInt32, -- cumulative proving periods due (confirmed by subgraph) | ||
| total_faulted_periods UInt32, -- cumulative periods where proof was not submitted | ||
| total_success_periods UInt32, -- cumulative periods where proof was submitted (= due - faulted) | ||
| estimated_overdue_periods UInt32 -- estimated periods not yet recorded on-chain but past deadline | ||
| ) ENGINE MergeTree() | ||
| PRIMARY KEY (probe_location, sp_address, timestamp) | ||
| PARTITION BY toStartOfMonth(timestamp) | ||
| TTL toDateTime(timestamp) + INTERVAL 1 YEAR`, | ||
| ]; | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,158 @@ | ||
| import { type ClickHouseClient, createClient } from "@clickhouse/client"; | ||
| import { Injectable, Logger, OnApplicationShutdown, OnModuleInit } from "@nestjs/common"; | ||
| import { ConfigService } from "@nestjs/config"; | ||
| import { InjectMetric } from "@willsoto/nestjs-prometheus"; | ||
| import { Counter, Gauge, Histogram } from "prom-client"; | ||
| import type { IClickhouseConfig, IConfig } from "../config/app.config.js"; | ||
| import { buildMigrations } from "./clickhouse.schema.js"; | ||
|
|
||
| interface BufferedRow { | ||
| table: string; | ||
| row: Record<string, unknown>; | ||
| } | ||
|
|
||
| @Injectable() | ||
| export class ClickhouseService implements OnModuleInit, OnApplicationShutdown { | ||
| private readonly logger = new Logger(ClickhouseService.name); | ||
| private readonly config: IClickhouseConfig; | ||
| private client: ClickHouseClient | null = null; | ||
| private buffer: BufferedRow[] = []; | ||
| private flushTimer: NodeJS.Timeout | null = null; | ||
|
|
||
| constructor( | ||
| @InjectMetric("clickhouseFlushDurationSeconds") private readonly flushDuration: Histogram, | ||
| @InjectMetric("clickhouseFlushErrorsTotal") private readonly flushErrors: Counter, | ||
| @InjectMetric("clickhouseBufferRows") private readonly bufferRows: Gauge, | ||
| @InjectMetric("clickhouseRowsInsertedTotal") private readonly rowsInserted: Counter, | ||
| @InjectMetric("clickhouseDroppedRowsTotal") private readonly droppedRows: Counter, | ||
| private readonly configService: ConfigService<IConfig, true>, | ||
| ) { | ||
| this.config = this.configService.get("clickhouse", { infer: true }); | ||
| } | ||
|
|
||
| async onModuleInit() { | ||
| if (!this.config.url) { | ||
| this.logger.log("CLICKHOUSE_URL not set, writes to ClickHouse disabled"); | ||
| return; | ||
| } | ||
|
|
||
| this.client = createClient({ | ||
| url: this.config.url, | ||
| }); | ||
|
|
||
| const parsedUrl = new URL(this.config.url); | ||
| const database = parsedUrl.pathname.replace(/^\//, ""); | ||
| try { | ||
| await this.migrate(database); | ||
| } catch (err) { | ||
| this.logger.error({ event: "clickhouse_migration_failed", database, error: String(err) }); | ||
| throw err; | ||
| } | ||
|
|
||
| this.flushTimer = setInterval(() => { | ||
| this.flush().catch((err) => { | ||
| this.logger.error({ event: "flush_interval_error", error: String(err) }); | ||
| }); | ||
| }, this.config.flushIntervalMs); | ||
|
|
||
| this.logger.log({ | ||
| event: "clickhouse_initialized", | ||
| host: parsedUrl.host, | ||
| database, | ||
| batchSize: this.config.batchSize, | ||
| flushIntervalMs: this.config.flushIntervalMs, | ||
| probeLocation: this.configService.get("app").probeLocation, | ||
| }); | ||
|
SgtPooki marked this conversation as resolved.
|
||
| } | ||
|
|
||
| private async migrate(database: string): Promise<void> { | ||
| if (!this.client) return; | ||
| const migrations = buildMigrations(database); | ||
|
iand marked this conversation as resolved.
|
||
| for (const sql of migrations) { | ||
| await this.client.command({ query: sql }); | ||
| } | ||
| this.logger.log({ event: "clickhouse_migrated", database }); | ||
| } | ||
|
|
||
| async onApplicationShutdown() { | ||
| if (this.flushTimer) { | ||
| clearInterval(this.flushTimer); | ||
| this.flushTimer = null; | ||
| } | ||
| await this.flush(); | ||
| await this.client?.close(); | ||
| } | ||
|
|
||
| /** | ||
| * Queue a row for insertion. Returns immediately; the flush happens in the background. | ||
| * Safe to call when ClickHouse is disabled: rows are silently dropped. | ||
| */ | ||
| insert(table: string, row: Record<string, unknown>): void { | ||
| if (!this.client) return; | ||
|
|
||
| if (this.buffer.length >= this.config.maxBufferSize) { | ||
| this.buffer.shift(); | ||
| this.droppedRows.inc({ reason: "buffer_full" }); | ||
| } | ||
|
|
||
| this.buffer.push({ table, row }); | ||
| this.bufferRows.set(this.buffer.length); | ||
|
|
||
| if (this.buffer.length >= this.config.batchSize) { | ||
| this.flush().catch((err) => { | ||
| this.logger.error({ event: "flush_batch_error", error: String(err) }); | ||
| }); | ||
| } | ||
| } | ||
|
|
||
| private async flush(): Promise<void> { | ||
|
iand marked this conversation as resolved.
|
||
| if (!this.client || this.buffer.length === 0) return; | ||
|
|
||
| const n = this.buffer.length; | ||
| const batch = this.buffer.slice(0, n); | ||
|
|
||
| // Group by table so we can do one insert call per table | ||
| const byTable = new Map<string, Record<string, unknown>[]>(); | ||
| for (const { table, row } of batch) { | ||
| let rows = byTable.get(table); | ||
| if (!rows) { | ||
| rows = []; | ||
| byTable.set(table, rows); | ||
| } | ||
| rows.push(row); | ||
| } | ||
|
SgtPooki marked this conversation as resolved.
|
||
|
|
||
| const end = this.flushDuration.startTimer(); | ||
|
SgtPooki marked this conversation as resolved.
|
||
| try { | ||
| await Promise.all( | ||
| Array.from(byTable.entries()).map(async ([table, rows]) => { | ||
| await this.client!.insert({ | ||
| table, | ||
| values: rows, | ||
| format: "JSONEachRow", | ||
| }); | ||
| this.rowsInserted.inc({ table }, rows.length); | ||
| }), | ||
| ); | ||
| this.buffer.splice(0, n); | ||
| this.bufferRows.set(this.buffer.length); | ||
| } catch (err) { | ||
| this.flushErrors.inc(); | ||
| this.logger.error({ | ||
|
iand marked this conversation as resolved.
|
||
| event: "flush_failed", | ||
| error: String(err), | ||
| pendingRows: n, | ||
| }); | ||
| } finally { | ||
| end(); | ||
| } | ||
| } | ||
|
|
||
| get probeLocation(): string { | ||
| return this.configService.get("app").probeLocation; | ||
| } | ||
|
|
||
| get enabled(): boolean { | ||
| return this.client !== null; | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.