diff --git a/.gitignore b/.gitignore index 4bb2336..b0fea65 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,8 @@ node_modules/ .pnp/ .pnp.js - +*.js +*.d.ts # Bun bun.lockb .bun/ @@ -12,6 +13,7 @@ dist/ build/ out/ *.tsbuildinfo +__tests__ # TypeScript *.js.map diff --git a/.prettierignore b/.prettierignore index a82cd06..afe3bf3 100644 --- a/.prettierignore +++ b/.prettierignore @@ -6,4 +6,5 @@ coverage *.tsbuildinfo bun.lock docs -.husky/_ \ No newline at end of file +.husky/_ +*.js \ No newline at end of file diff --git a/bun.lock b/bun.lock index 0042f56..7d36ab2 100644 --- a/bun.lock +++ b/bun.lock @@ -1,5 +1,6 @@ { "lockfileVersion": 1, + "configVersion": 0, "workspaces": { "": { "name": "bytebell-public", @@ -48,6 +49,20 @@ "zod": "^4.3.6", }, }, + "packages/db": { + "name": "@bb/db", + "version": "0.0.0", + "dependencies": { + "@bb/db-core": "workspace:*", + }, + }, + "packages/db-core": { + "name": "@bb/db-core", + "version": "0.0.0", + "dependencies": { + "@bb/types": "workspace:*", + }, + }, "packages/errors": { "name": "@bb/errors", "version": "0.0.0", @@ -55,16 +70,31 @@ "@bb/types": "workspace:*", }, }, + "packages/graph-core": { + "name": "@bb/graph-core", + "version": "0.0.0", + "dependencies": { + "@bb/db-core": "workspace:*", + "@bb/types": "workspace:*", + }, + }, + "packages/graph-db": { + "name": "@bb/graph-db", + "version": "0.0.0", + "dependencies": { + "@bb/graph-core": "workspace:*", + }, + }, "packages/ingest-business-context": { "name": "@bb/ingest-business-context", "version": "0.0.0", "dependencies": { "@bb/config": "workspace:*", "@bb/errors": "workspace:*", + "@bb/graph-db": "workspace:*", "@bb/ingest-github": "workspace:*", "@bb/llm": "workspace:*", "@bb/logger": "workspace:*", - "@bb/neo4j": "workspace:*", "@bb/queue": "workspace:*", "@bb/types": "workspace:*", }, @@ -74,12 +104,17 @@ "version": "0.0.0", "dependencies": { "@bb/config": "workspace:*", + "@bb/db": "workspace:*", + "@bb/db-core": "workspace:*", "@bb/errors": "workspace:*", + "@bb/graph-core": "workspace:*", + "@bb/graph-db": "workspace:*", "@bb/llm": "workspace:*", "@bb/logger": "workspace:*", "@bb/mongo": "workspace:*", "@bb/neo4j": "workspace:*", "@bb/queue": "workspace:*", + "@bb/sqlite": "workspace:*", "@bb/types": "workspace:*", }, }, @@ -88,9 +123,9 @@ "version": "0.0.0", "dependencies": { "@bb/config": "workspace:*", + "@bb/db": "workspace:*", "@bb/errors": "workspace:*", "@bb/logger": "workspace:*", - "@bb/mongo": "workspace:*", "@bb/types": "workspace:*", "tiktoken": "^1.0.22", }, @@ -110,9 +145,9 @@ "version": "0.0.0", "dependencies": { "@bb/config": "workspace:*", + "@bb/graph-db": "workspace:*", "@bb/llm": "workspace:*", "@bb/logger": "workspace:*", - "@bb/neo4j": "workspace:*", "@bb/types": "workspace:*", "@modelcontextprotocol/sdk": "^1.23.0", "zod": "^4.3.6", @@ -126,6 +161,8 @@ "version": "0.0.0", "dependencies": { "@bb/config": "workspace:*", + "@bb/db": "workspace:*", + "@bb/db-core": "workspace:*", "@bb/errors": "workspace:*", "@bb/types": "workspace:*", "mongodb": "^7.2.0", @@ -137,6 +174,8 @@ "dependencies": { "@bb/config": "workspace:*", "@bb/errors": "workspace:*", + "@bb/graph-core": "workspace:*", + "@bb/graph-db": "workspace:*", "@bb/mongo": "workspace:*", "@bb/types": "workspace:*", "neo4j-driver": "^6.0.1", @@ -147,6 +186,7 @@ "version": "0.0.0", "dependencies": { "@bb/config": "workspace:*", + "@bb/db": "workspace:*", "@bb/errors": "workspace:*", "@bb/mongo": "workspace:*", "@bb/redis": "workspace:*", @@ -172,13 +212,16 @@ }, "dependencies": { "@bb/config": "workspace:*", + "@bb/db": "workspace:*", "@bb/errors": "workspace:*", + "@bb/graph-db": "workspace:*", "@bb/ingest-github": "workspace:*", "@bb/mcp": "workspace:*", "@bb/mongo": "workspace:*", "@bb/neo4j": "workspace:*", "@bb/queue": "workspace:*", "@bb/redis": "workspace:*", + "@bb/sqlite": "workspace:*", "@bb/types": "workspace:*", "express": "^5.2.1", }, @@ -186,6 +229,17 @@ "@types/express": "^5.0.6", }, }, + "packages/sqlite": { + "name": "@bb/sqlite", + "version": "0.0.0", + "dependencies": { + "@bb/config": "workspace:*", + "@bb/db": "workspace:*", + "@bb/db-core": "workspace:*", + "@bb/errors": "workspace:*", + "@bb/types": "workspace:*", + }, + }, "packages/types": { "name": "@bb/types", "version": "0.0.0", @@ -202,8 +256,16 @@ "@bb/config": ["@bb/config@workspace:packages/config"], + "@bb/db": ["@bb/db@workspace:packages/db"], + + "@bb/db-core": ["@bb/db-core@workspace:packages/db-core"], + "@bb/errors": ["@bb/errors@workspace:packages/errors"], + "@bb/graph-core": ["@bb/graph-core@workspace:packages/graph-core"], + + "@bb/graph-db": ["@bb/graph-db@workspace:packages/graph-db"], + "@bb/ingest-business-context": ["@bb/ingest-business-context@workspace:packages/ingest-business-context"], "@bb/ingest-github": ["@bb/ingest-github@workspace:packages/ingest-github"], @@ -224,6 +286,8 @@ "@bb/server": ["@bb/server@workspace:packages/server"], + "@bb/sqlite": ["@bb/sqlite@workspace:packages/sqlite"], + "@bb/types": ["@bb/types@workspace:packages/types"], "@colors/colors": ["@colors/colors@1.6.0", "", {}, "sha512-Ir+AOibqzrIsL6ajt3Rz3LskB7OiMVHqltZmspbW/TJuTVuyOMirVqAkjfY6JISiLHgyNqicAC8AyHHGzNd/dA=="], diff --git a/eslint.config.mjs b/eslint.config.mjs index 93e80cd..50151c1 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -16,6 +16,8 @@ export default [ "**/*.d.ts", ".husky/_/**", "docs/**", + "**/*.js", + "**/*.d.ts", ], }, diff --git a/packages/cli/src/DeleteCommand.ts b/packages/cli/src/DeleteCommand.ts index 8c50bac..7f2d3ea 100644 --- a/packages/cli/src/DeleteCommand.ts +++ b/packages/cli/src/DeleteCommand.ts @@ -1,4 +1,6 @@ import { Command } from "commander"; +import { Config, DbProviderType } from "@bb/types"; +import { getConfigValue } from "@bb/config"; import { ensureServerRunning, ServerStartTimeoutError } from "./serverSpawn.ts"; import { deleteJson, HttpClientError } from "./httpClient.ts"; import { promptRepoSelector } from "./repoSelectorPrompt.ts"; @@ -14,7 +16,10 @@ interface DeleteResponse { export function buildDeleteCommand(): Command { const cmd = new Command("delete"); - cmd.description("Pick one or more indexed knowledge entries and delete them from Mongo + Neo4j.").action(runDelete); + const dbProvider = getConfigValue(Config.DbProvider) === DbProviderType.Sqlite ? "SQLite" : "Mongo"; + cmd + .description(`Pick one or more indexed knowledge entries and delete them from ${dbProvider} + Neo4j.`) + .action(runDelete); return cmd; } @@ -51,10 +56,11 @@ async function runDelete(): Promise { } function formatDeletePrompt(labels: string[]): string { + const dbProvider = getConfigValue(Config.DbProvider) === DbProviderType.Sqlite ? "SQLite" : "Mongo"; if (labels.length === 1) { - return `Delete ${labels[0]} from Mongo + Neo4j? [y/N]`; + return `Delete ${labels[0]} from ${dbProvider} + Neo4j? [y/N]`; } - return `Delete ${labels.length} entries from Mongo + Neo4j? [y/N]`; + return `Delete ${labels.length} entries from ${dbProvider} + Neo4j? [y/N]`; } function handleError(cause: unknown): void { diff --git a/packages/cli/src/LsInteractive.tsx b/packages/cli/src/LsInteractive.tsx index 886328d..3958864 100644 --- a/packages/cli/src/LsInteractive.tsx +++ b/packages/cli/src/LsInteractive.tsx @@ -1,11 +1,18 @@ import { useState, useMemo } from "react"; import type { ReactElement } from "react"; import { Box, Text, useApp, useInput } from "ink"; +import type { CommitHashRecord } from "@bb/types"; export interface RepoEntry { knowledgeId: string; source: - | { kind: "github"; repoUrl: string; branch?: string; commitId?: string; commitHashes?: string[] } + | { + kind: "github"; + repoUrl: string; + branch?: string; + commitId?: string; + commitHashes?: (string | CommitHashRecord)[]; + } | { kind: "local"; sourcePath: string }; state: string; createdAt: string; @@ -204,13 +211,16 @@ export function LsInteractive({ repos, onDone }: LsInteractiveProps): ReactEleme Indexed Commits ({s.commitHashes?.length ?? 0}) - {(s.commitHashes ?? []).map((h, i) => ( - - {i + 1}. - {h.slice(0, 8)} - {h === s.commitId && (current head)} - - ))} + {(s.commitHashes ?? []).map((h, i) => { + const hash = typeof h === "string" ? h : (h as { hash: string }).hash; + return ( + + {i + 1}. + {hash.slice(0, 8)} + {hash === s.commitId && (current head)} + + ); + })} {(!s.commitHashes || s.commitHashes.length === 0) && ( No commit history recorded. diff --git a/packages/cli/src/repoSelectorPrompt.ts b/packages/cli/src/repoSelectorPrompt.ts index e08e256..dbe9a26 100644 --- a/packages/cli/src/repoSelectorPrompt.ts +++ b/packages/cli/src/repoSelectorPrompt.ts @@ -1,5 +1,6 @@ import React from "react"; import { render } from "ink"; +import type { CommitHashRecord } from "@bb/types"; import { getJson } from "./httpClient.ts"; import { RepoSelector, @@ -27,7 +28,13 @@ import { export interface RepoListEntry { knowledgeId: string; source: - | { kind: "github"; repoUrl: string; branch?: string; commitId?: string; commitHashes?: string[] } + | { + kind: "github"; + repoUrl: string; + branch?: string; + commitId?: string; + commitHashes?: (string | CommitHashRecord)[]; + } | { kind: "local"; sourcePath: string }; state: string; createdAt: string; diff --git a/packages/config/src/schema-fields.ts b/packages/config/src/schema-fields.ts new file mode 100644 index 0000000..ae6cc54 --- /dev/null +++ b/packages/config/src/schema-fields.ts @@ -0,0 +1,174 @@ +import type { BytebellConfig } from "./schema.ts"; +import { Config } from "@bb/types"; +import type { ConfigValue } from "./schema.ts"; +import type { LogLevel, LlmProvider } from "./schema.ts"; + +export function readField(cfg: BytebellConfig, key: K): ConfigValue { + switch (key) { + case Config.ServerPort: + return cfg.server_port as ConfigValue; + case Config.MongoUri: + return cfg.mongo_uri as ConfigValue; + case Config.Neo4jUri: + return cfg.neo4j_uri as ConfigValue; + case Config.Neo4jUser: + return cfg.neo4j_user as ConfigValue; + case Config.Neo4jPassword: + return cfg.neo4j_password as ConfigValue; + case Config.RedisUrl: + return cfg.redis_url as ConfigValue; + case Config.OpenrouterApiKey: + return cfg.openrouter_api_key as ConfigValue; + case Config.OpenrouterModel: + return cfg.openrouter_model as ConfigValue; + case Config.OpenrouterFallbackModel1: + return cfg.openrouter_fallback_model_1 as ConfigValue; + case Config.OpenrouterFallbackModel2: + return cfg.openrouter_fallback_model_2 as ConfigValue; + case Config.OpenrouterFallbackModel3: + return cfg.openrouter_fallback_model_3 as ConfigValue; + case Config.OpenrouterFallbackModel4: + return cfg.openrouter_fallback_model_4 as ConfigValue; + case Config.ConcurrencyGithub: + return cfg.concurrency.github as ConfigValue; + case Config.LogLevel: + return cfg.log_level as ConfigValue; + case Config.LogRetentionDays: + return cfg.log_retention_days as ConfigValue; + case Config.LlmCacheEnabled: + return cfg.llm_cache_enabled as ConfigValue; + case Config.LlmProvider: + return cfg.llm_provider as ConfigValue; + case Config.OllamaUrl: + return cfg.ollama_url as ConfigValue; + case Config.OllamaModel: + return cfg.ollama_model as ConfigValue; + case Config.ContextWindowLimit: + return cfg["context.window.limit"] as ConfigValue; + case Config.MaxTokensPerChunk: + return cfg["max.tokens.per.chunk"] as ConfigValue; + case Config.BigFileConcurrency: + return cfg["big.file.concurrency"] as ConfigValue; + case Config.AbsoluteFileSizeCap: + return cfg["absolute.file.size.cap"] as ConfigValue; + case Config.ConcurrentWorkers: + return cfg["concurrent.workers"] as ConfigValue; + case Config.LlmConcurrency: + return cfg["llm.concurrency"] as ConfigValue; + case Config.FolderSummaryBatchSize: + return cfg["folder.summary.batch.size"] as ConfigValue; + case Config.FolderSummaryBatchMaxFiles: + return cfg["folder.summary.batch.max.files"] as ConfigValue; + case Config.Neo4jBatchSize: + return cfg["neo4j.batch.size"] as ConfigValue; + case Config.CondenseContextLimit: + return cfg["condense.context.limit"] as ConfigValue; + case Config.CondensePromptOverhead: + return cfg["condense.prompt.overhead"] as ConfigValue; + case Config.SmallFileDedupThreshold: + return cfg["small.file.dedup.threshold"] as ConfigValue; + case Config.BigFileLineThreshold: + return cfg["big.file.line.threshold"] as ConfigValue; + case Config.OrgId: + return cfg.org_id as ConfigValue; + case Config.SkipDecisionEnabled: + return cfg["skip.decision.enabled"] as ConfigValue; + case Config.SkipDecisionMaxCharsForLlm: + return cfg["skip.decision.max.chars.for.llm"] as ConfigValue; + case Config.SkipDecisionCachePath: + return cfg["skip.decision.cache.path"] as ConfigValue; + case Config.DbProvider: + return cfg.db_provider as ConfigValue; + case Config.GraphProvider: + return cfg.graph_provider as ConfigValue; + case Config.SqlitePath: + return cfg.sqlite_path as ConfigValue; + default: + throw new Error(`Unknown config key: ${key}`); + } +} + +export function writeField(cfg: BytebellConfig, key: K, value: ConfigValue): BytebellConfig { + switch (key) { + case Config.ServerPort: + return { ...cfg, server_port: value as number }; + case Config.MongoUri: + return { ...cfg, mongo_uri: value as string }; + case Config.Neo4jUri: + return { ...cfg, neo4j_uri: value as string }; + case Config.Neo4jUser: + return { ...cfg, neo4j_user: value as string }; + case Config.Neo4jPassword: + return { ...cfg, neo4j_password: value as string }; + case Config.RedisUrl: + return { ...cfg, redis_url: value as string }; + case Config.OpenrouterApiKey: + return { ...cfg, openrouter_api_key: value as string }; + case Config.OpenrouterModel: + return { ...cfg, openrouter_model: value as string }; + case Config.OpenrouterFallbackModel1: + return { ...cfg, openrouter_fallback_model_1: value as string }; + case Config.OpenrouterFallbackModel2: + return { ...cfg, openrouter_fallback_model_2: value as string }; + case Config.OpenrouterFallbackModel3: + return { ...cfg, openrouter_fallback_model_3: value as string }; + case Config.OpenrouterFallbackModel4: + return { ...cfg, openrouter_fallback_model_4: value as string }; + case Config.ConcurrencyGithub: + return { ...cfg, concurrency: { ...cfg.concurrency, github: value as number } }; + case Config.LogLevel: + return { ...cfg, log_level: value as LogLevel }; + case Config.LogRetentionDays: + return { ...cfg, log_retention_days: value as number }; + case Config.LlmCacheEnabled: + return { ...cfg, llm_cache_enabled: value as boolean }; + case Config.LlmProvider: + return { ...cfg, llm_provider: value as LlmProvider }; + case Config.OllamaUrl: + return { ...cfg, ollama_url: value as string }; + case Config.OllamaModel: + return { ...cfg, ollama_model: value as string }; + case Config.ContextWindowLimit: + return { ...cfg, "context.window.limit": value as number }; + case Config.MaxTokensPerChunk: + return { ...cfg, "max.tokens.per.chunk": value as number }; + case Config.BigFileConcurrency: + return { ...cfg, "big.file.concurrency": value as number }; + case Config.AbsoluteFileSizeCap: + return { ...cfg, "absolute.file.size.cap": value as number }; + case Config.ConcurrentWorkers: + return { ...cfg, "concurrent.workers": value as number }; + case Config.LlmConcurrency: + return { ...cfg, "llm.concurrency": value as number }; + case Config.FolderSummaryBatchSize: + return { ...cfg, "folder.summary.batch.size": value as number }; + case Config.FolderSummaryBatchMaxFiles: + return { ...cfg, "folder.summary.batch.max.files": value as number }; + case Config.Neo4jBatchSize: + return { ...cfg, "neo4j.batch.size": value as number }; + case Config.CondenseContextLimit: + return { ...cfg, "condense.context.limit": value as number }; + case Config.CondensePromptOverhead: + return { ...cfg, "condense.prompt.overhead": value as number }; + case Config.SmallFileDedupThreshold: + return { ...cfg, "small.file.dedup.threshold": value as number }; + case Config.BigFileLineThreshold: + return { ...cfg, "big.file.line.threshold": value as number }; + case Config.OrgId: + throw new Error("org_id is fixed to 'local' in OSS builds and cannot be set"); + case Config.SkipDecisionEnabled: + return { ...cfg, "skip.decision.enabled": value as boolean }; + case Config.SkipDecisionMaxCharsForLlm: + return { ...cfg, "skip.decision.max.chars.for.llm": value as number }; + case Config.SkipDecisionCachePath: + return { ...cfg, "skip.decision.cache.path": value as string }; + case Config.DbProvider: + return { ...cfg, db_provider: value as string }; + case Config.GraphProvider: + return { ...cfg, graph_provider: value as string }; + case Config.SqlitePath: + return { ...cfg, sqlite_path: value as string }; + default: + throw new Error(`Unknown config key: ${key}`); + } +} diff --git a/packages/config/src/schema.ts b/packages/config/src/schema.ts index d5bae9d..bb94dcd 100644 --- a/packages/config/src/schema.ts +++ b/packages/config/src/schema.ts @@ -53,6 +53,9 @@ export const configSchema = z "skip.decision.enabled": z.boolean().default(true), "skip.decision.max.chars.for.llm": z.number().int().positive().default(4000), "skip.decision.cache.path": z.string().default(""), + db_provider: z.string().default("mongo"), + graph_provider: z.string().default("neo4j"), + sqlite_path: z.string().default(""), }) .strict(); @@ -97,6 +100,9 @@ export type ConfigValueMap = { [Config.SkipDecisionEnabled]: boolean; [Config.SkipDecisionMaxCharsForLlm]: number; [Config.SkipDecisionCachePath]: string; + [Config.DbProvider]: string; + [Config.GraphProvider]: string; + [Config.SqlitePath]: string; }; export type ConfigValue = ConfigValueMap[K]; @@ -155,158 +161,9 @@ export const HINTS: Readonly> = { [Config.SkipDecisionEnabled]: "bytebell set skip.decision.enabled ", [Config.SkipDecisionMaxCharsForLlm]: "bytebell set skip.decision.max.chars.for.llm ", [Config.SkipDecisionCachePath]: "bytebell set skip.decision.cache.path ", + [Config.DbProvider]: "bytebell set db-provider ", + [Config.GraphProvider]: "bytebell set graph-provider ", + [Config.SqlitePath]: "bytebell set sqlite-path ", }; -export function readField(cfg: BytebellConfig, key: K): ConfigValue { - switch (key) { - case Config.ServerPort: - return cfg.server_port as ConfigValue; - case Config.MongoUri: - return cfg.mongo_uri as ConfigValue; - case Config.Neo4jUri: - return cfg.neo4j_uri as ConfigValue; - case Config.Neo4jUser: - return cfg.neo4j_user as ConfigValue; - case Config.Neo4jPassword: - return cfg.neo4j_password as ConfigValue; - case Config.RedisUrl: - return cfg.redis_url as ConfigValue; - case Config.OpenrouterApiKey: - return cfg.openrouter_api_key as ConfigValue; - case Config.OpenrouterModel: - return cfg.openrouter_model as ConfigValue; - case Config.OpenrouterFallbackModel1: - return cfg.openrouter_fallback_model_1 as ConfigValue; - case Config.OpenrouterFallbackModel2: - return cfg.openrouter_fallback_model_2 as ConfigValue; - case Config.OpenrouterFallbackModel3: - return cfg.openrouter_fallback_model_3 as ConfigValue; - case Config.OpenrouterFallbackModel4: - return cfg.openrouter_fallback_model_4 as ConfigValue; - case Config.ConcurrencyGithub: - return cfg.concurrency.github as ConfigValue; - case Config.LogLevel: - return cfg.log_level as ConfigValue; - case Config.LogRetentionDays: - return cfg.log_retention_days as ConfigValue; - case Config.LlmCacheEnabled: - return cfg.llm_cache_enabled as ConfigValue; - case Config.LlmProvider: - return cfg.llm_provider as ConfigValue; - case Config.OllamaUrl: - return cfg.ollama_url as ConfigValue; - case Config.OllamaModel: - return cfg.ollama_model as ConfigValue; - case Config.ContextWindowLimit: - return cfg["context.window.limit"] as ConfigValue; - case Config.MaxTokensPerChunk: - return cfg["max.tokens.per.chunk"] as ConfigValue; - case Config.BigFileConcurrency: - return cfg["big.file.concurrency"] as ConfigValue; - case Config.AbsoluteFileSizeCap: - return cfg["absolute.file.size.cap"] as ConfigValue; - case Config.ConcurrentWorkers: - return cfg["concurrent.workers"] as ConfigValue; - case Config.LlmConcurrency: - return cfg["llm.concurrency"] as ConfigValue; - case Config.FolderSummaryBatchSize: - return cfg["folder.summary.batch.size"] as ConfigValue; - case Config.FolderSummaryBatchMaxFiles: - return cfg["folder.summary.batch.max.files"] as ConfigValue; - case Config.Neo4jBatchSize: - return cfg["neo4j.batch.size"] as ConfigValue; - case Config.CondenseContextLimit: - return cfg["condense.context.limit"] as ConfigValue; - case Config.CondensePromptOverhead: - return cfg["condense.prompt.overhead"] as ConfigValue; - case Config.SmallFileDedupThreshold: - return cfg["small.file.dedup.threshold"] as ConfigValue; - case Config.BigFileLineThreshold: - return cfg["big.file.line.threshold"] as ConfigValue; - case Config.OrgId: - return cfg.org_id as ConfigValue; - case Config.SkipDecisionEnabled: - return cfg["skip.decision.enabled"] as ConfigValue; - case Config.SkipDecisionMaxCharsForLlm: - return cfg["skip.decision.max.chars.for.llm"] as ConfigValue; - case Config.SkipDecisionCachePath: - return cfg["skip.decision.cache.path"] as ConfigValue; - } -} - -export function writeField(cfg: BytebellConfig, key: K, value: ConfigValue): BytebellConfig { - switch (key) { - case Config.ServerPort: - return { ...cfg, server_port: value as number }; - case Config.MongoUri: - return { ...cfg, mongo_uri: value as string }; - case Config.Neo4jUri: - return { ...cfg, neo4j_uri: value as string }; - case Config.Neo4jUser: - return { ...cfg, neo4j_user: value as string }; - case Config.Neo4jPassword: - return { ...cfg, neo4j_password: value as string }; - case Config.RedisUrl: - return { ...cfg, redis_url: value as string }; - case Config.OpenrouterApiKey: - return { ...cfg, openrouter_api_key: value as string }; - case Config.OpenrouterModel: - return { ...cfg, openrouter_model: value as string }; - case Config.OpenrouterFallbackModel1: - return { ...cfg, openrouter_fallback_model_1: value as string }; - case Config.OpenrouterFallbackModel2: - return { ...cfg, openrouter_fallback_model_2: value as string }; - case Config.OpenrouterFallbackModel3: - return { ...cfg, openrouter_fallback_model_3: value as string }; - case Config.OpenrouterFallbackModel4: - return { ...cfg, openrouter_fallback_model_4: value as string }; - case Config.ConcurrencyGithub: - return { ...cfg, concurrency: { ...cfg.concurrency, github: value as number } }; - case Config.LogLevel: - return { ...cfg, log_level: value as LogLevel }; - case Config.LogRetentionDays: - return { ...cfg, log_retention_days: value as number }; - case Config.LlmCacheEnabled: - return { ...cfg, llm_cache_enabled: value as boolean }; - case Config.LlmProvider: - return { ...cfg, llm_provider: value as LlmProvider }; - case Config.OllamaUrl: - return { ...cfg, ollama_url: value as string }; - case Config.OllamaModel: - return { ...cfg, ollama_model: value as string }; - case Config.ContextWindowLimit: - return { ...cfg, "context.window.limit": value as number }; - case Config.MaxTokensPerChunk: - return { ...cfg, "max.tokens.per.chunk": value as number }; - case Config.BigFileConcurrency: - return { ...cfg, "big.file.concurrency": value as number }; - case Config.AbsoluteFileSizeCap: - return { ...cfg, "absolute.file.size.cap": value as number }; - case Config.ConcurrentWorkers: - return { ...cfg, "concurrent.workers": value as number }; - case Config.LlmConcurrency: - return { ...cfg, "llm.concurrency": value as number }; - case Config.FolderSummaryBatchSize: - return { ...cfg, "folder.summary.batch.size": value as number }; - case Config.FolderSummaryBatchMaxFiles: - return { ...cfg, "folder.summary.batch.max.files": value as number }; - case Config.Neo4jBatchSize: - return { ...cfg, "neo4j.batch.size": value as number }; - case Config.CondenseContextLimit: - return { ...cfg, "condense.context.limit": value as number }; - case Config.CondensePromptOverhead: - return { ...cfg, "condense.prompt.overhead": value as number }; - case Config.SmallFileDedupThreshold: - return { ...cfg, "small.file.dedup.threshold": value as number }; - case Config.BigFileLineThreshold: - return { ...cfg, "big.file.line.threshold": value as number }; - case Config.OrgId: - throw new Error("org_id is fixed to 'local' in OSS builds and cannot be set"); - case Config.SkipDecisionEnabled: - return { ...cfg, "skip.decision.enabled": value as boolean }; - case Config.SkipDecisionMaxCharsForLlm: - return { ...cfg, "skip.decision.max.chars.for.llm": value as number }; - case Config.SkipDecisionCachePath: - return { ...cfg, "skip.decision.cache.path": value as string }; - } -} +export { readField, writeField } from "./schema-fields.ts"; diff --git a/packages/db-core/README.md b/packages/db-core/README.md new file mode 100644 index 0000000..3b1673d --- /dev/null +++ b/packages/db-core/README.md @@ -0,0 +1,26 @@ +# `@bb/db-core` + +Provider-agnostic interfaces for the document database layer. + +## Responsibilities + +Defines the contract that every document database backend (Mongo, SQLite, etc.) must implement. Contains no I/O — pure TypeScript interfaces and shared types. + +## Public Interfaces + +- `IDocumentDatabaseProvider` — composite of all repository interfaces plus `connect`/`close`/`ping` +- `IKnowledgeRepository` — CRUD for knowledge entries +- `IRawRepository` — upsert, list SHA map, delete raw file docs +- `IAggregateStatsRepository` — `aggregateStats()` +- `IActivityRepository` — `recordActivity()` +- `IUsageRepository` — `incrementUsage`, `getMonthlyUsage`, `getGlobalUsage` +- `DbPingResult` — health probe result shape +- `FileAnalysis`, `RawFileDoc` — shared raw-file types (previously duplicated in `@bb/mongo`) + +## Data Ownership + +None. This package owns no data — it only describes shapes. + +## Tier + +Strategy (interfaces consumed by `@bb/db` and implemented by `@bb/mongo`, `@bb/sqlite`) diff --git a/packages/db-core/package.json b/packages/db-core/package.json new file mode 100644 index 0000000..b083c7f --- /dev/null +++ b/packages/db-core/package.json @@ -0,0 +1,17 @@ +{ + "name": "@bb/db-core", + "version": "0.0.0", + "private": true, + "type": "module", + "main": "./src/index.ts", + "types": "./src/index.ts", + "exports": { + ".": "./src/index.ts" + }, + "imports": { + "#src/*": "./src/*" + }, + "dependencies": { + "@bb/types": "workspace:*" + } +} diff --git a/packages/db-core/src/README.md b/packages/db-core/src/README.md new file mode 100644 index 0000000..8273b6b --- /dev/null +++ b/packages/db-core/src/README.md @@ -0,0 +1,7 @@ +# `@bb/db-core/src` + +Provider-agnostic interfaces and shared types for the document database layer. + +## Files + +- **index.ts** — all public interfaces (`IDocumentDatabaseProvider`, `IKnowledgeRepository`, `IRawRepository`, etc.) and shared types (`FileAnalysis`, `RawFileDoc`, `DbPingResult`) diff --git a/packages/db-core/src/index.ts b/packages/db-core/src/index.ts new file mode 100644 index 0000000..2bd2964 --- /dev/null +++ b/packages/db-core/src/index.ts @@ -0,0 +1,70 @@ +import { KnowledgeState } from "@bb/types"; +import type { + KnowledgeDoc, + KnowledgeFailureCategory, + StatsResponse, + ActivityInput, + FileAnalysisSection, + FileAnalysis, + RawFileDoc, + KnowledgeListEntry, + DeleteKnowledgeResult, + DbPingResult, +} from "@bb/types"; + +export type { FileAnalysisSection, FileAnalysis, RawFileDoc, KnowledgeListEntry, DeleteKnowledgeResult, DbPingResult }; + +export interface IKnowledgeRepository { + setKnowledgeState(knowledgeId: string, state: KnowledgeState): Promise; + setKnowledgeCommit( + knowledgeId: string, + commitHash: string, + inputTokens?: string, + outputTokens?: string, + costUsd?: string, + ): Promise; + setKnowledgeBranch(knowledgeId: string, branch: string): Promise; + updateKnowledgeProgress(knowledgeId: string, processedFiles: number, totalFiles?: number): Promise; + upsertKnowledge(doc: Omit & { updatedAt?: Date }): Promise; + deleteKnowledge(knowledgeId: string): Promise; + listKnowledge(opts?: { limit?: number }): Promise; + getKnowledge(knowledgeId: string): Promise; + markKnowledgeFailed( + knowledgeId: string, + reason: string, + category: KnowledgeFailureCategory, + detail?: string, + ): Promise; +} + +export interface IRawRepository { + upsertRawFile(doc: Omit): Promise; + listRawFileShas(knowledgeId: string): Promise>; + deleteRawFiles(knowledgeId: string, relativePaths: string[]): Promise; +} + +export interface IAggregateStatsRepository { + aggregateStats(): Promise; +} + +export interface IActivityRepository { + recordActivity(activity: ActivityInput): Promise; +} + +export interface IUsageRepository { + incrementUsage(identityId: string, inputTokenCount?: number, outputTokenCount?: number): Promise; + getMonthlyUsage(year: number, month: number): Promise; + getGlobalUsage(): Promise; +} + +export interface IDocumentDatabaseProvider { + knowledge: IKnowledgeRepository; + raw: IRawRepository; + stats: IAggregateStatsRepository; + activity: IActivityRepository; + usage: IUsageRepository; + + connect(): Promise; + close(): Promise; + ping(): Promise; +} diff --git a/packages/db-core/tsconfig.json b/packages/db-core/tsconfig.json new file mode 100644 index 0000000..4ed0786 --- /dev/null +++ b/packages/db-core/tsconfig.json @@ -0,0 +1,4 @@ +{ + "extends": "../../tsconfig.base.json", + "include": ["src/**/*.ts", "src/**/*.tsx", "src/**/*.json"] +} diff --git a/packages/db/README.md b/packages/db/README.md new file mode 100644 index 0000000..c08ae38 --- /dev/null +++ b/packages/db/README.md @@ -0,0 +1,24 @@ +# `@bb/db` + +Provider registry and facade for the document database layer. + +## Responsibilities + +Maintains a map of named database providers, exposes a single `getDb()` accessor that delegates to whichever provider is active. Provides convenience facade objects (`knowledge`, `raw`, `stats`, `activity`, `usage`) that proxy to the active provider's methods. + +## Public Interfaces + +- `registerDbProvider(name, factory)` — register a provider (called at import time by `@bb/mongo` and `@bb/sqlite`) +- `connectDb(providerName)` — instantiate and connect a provider +- `closeDb()` — close the active provider +- `getDb()` — returns the active `IDocumentDatabaseProvider` +- `knowledge`, `raw`, `stats`, `activity`, `usage` — facade objects proxying to `getDb()` +- `pingDb()` — health probe + +## Data Ownership + +None. All I/O is delegated to the active provider. + +## Tier + +Strategy (consumer of `@bb/db-core`, consumed by domain and queue packages) diff --git a/packages/db/package.json b/packages/db/package.json new file mode 100644 index 0000000..bcc7e8f --- /dev/null +++ b/packages/db/package.json @@ -0,0 +1,17 @@ +{ + "name": "@bb/db", + "version": "0.0.0", + "private": true, + "type": "module", + "main": "./src/index.ts", + "types": "./src/index.ts", + "exports": { + ".": "./src/index.ts" + }, + "imports": { + "#src/*": "./src/*" + }, + "dependencies": { + "@bb/db-core": "workspace:*" + } +} diff --git a/packages/db/src/README.md b/packages/db/src/README.md new file mode 100644 index 0000000..a14ca95 --- /dev/null +++ b/packages/db/src/README.md @@ -0,0 +1,7 @@ +# `@bb/db/src` + +Provider registry, facade, and lifecycle management for the document database layer. + +## Files + +- **index.ts** — provider registry (`registerDbProvider`, `connectDb`, `closeDb`, `getDb`), convenience facade objects (`knowledge`, `raw`, `stats`, `activity`, `usage`), and `pingDb()` diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts new file mode 100644 index 0000000..1d8fe32 --- /dev/null +++ b/packages/db/src/index.ts @@ -0,0 +1,75 @@ +import type { + IDocumentDatabaseProvider, + IKnowledgeRepository, + IRawRepository, + IAggregateStatsRepository, + IActivityRepository, + IUsageRepository, + DbPingResult, +} from "@bb/db-core"; + +let activeProvider: IDocumentDatabaseProvider | null = null; +const providers = new Map IDocumentDatabaseProvider>(); + +export function registerDbProvider(name: string, factory: () => IDocumentDatabaseProvider) { + providers.set(name, factory); +} + +export function getDb(): IDocumentDatabaseProvider { + if (!activeProvider) { + throw new Error("Database provider not initialized. Call connectDb() first."); + } + return activeProvider; +} + +export async function connectDb(providerName: string): Promise { + const factory = providers.get(providerName); + if (!factory) { + throw new Error(`Database provider '${providerName}' not registered.`); + } + activeProvider = factory(); + await activeProvider.connect(); +} + +export async function closeDb(): Promise { + if (activeProvider) { + await activeProvider.close(); + activeProvider = null; + } +} + +export const knowledgeDb: IKnowledgeRepository = { + setKnowledgeState: (...args) => getDb().knowledge.setKnowledgeState(...args), + setKnowledgeCommit: (...args) => getDb().knowledge.setKnowledgeCommit(...args), + setKnowledgeBranch: (...args) => getDb().knowledge.setKnowledgeBranch(...args), + updateKnowledgeProgress: (...args) => getDb().knowledge.updateKnowledgeProgress(...args), + upsertKnowledge: (...args) => getDb().knowledge.upsertKnowledge(...args), + deleteKnowledge: (...args) => getDb().knowledge.deleteKnowledge(...args), + listKnowledge: (...args) => getDb().knowledge.listKnowledge(...args), + getKnowledge: (...args) => getDb().knowledge.getKnowledge(...args), + markKnowledgeFailed: (...args) => getDb().knowledge.markKnowledgeFailed(...args), +}; + +export const rawDb: IRawRepository = { + upsertRawFile: (...args) => getDb().raw.upsertRawFile(...args), + listRawFileShas: (...args) => getDb().raw.listRawFileShas(...args), + deleteRawFiles: (...args) => getDb().raw.deleteRawFiles(...args), +}; + +export const statsDb: IAggregateStatsRepository = { + aggregateStats: (...args) => getDb().stats.aggregateStats(...args), +}; + +export const activityDb: IActivityRepository = { + recordActivity: (...args) => getDb().activity.recordActivity(...args), +}; + +export const usageDb: IUsageRepository = { + incrementUsage: (...args) => getDb().usage.incrementUsage(...args), + getMonthlyUsage: (...args) => getDb().usage.getMonthlyUsage(...args), + getGlobalUsage: (...args) => getDb().usage.getGlobalUsage(...args), +}; + +export async function pingDb(): Promise { + return getDb().ping(); +} diff --git a/packages/db/tsconfig.json b/packages/db/tsconfig.json new file mode 100644 index 0000000..4ed0786 --- /dev/null +++ b/packages/db/tsconfig.json @@ -0,0 +1,4 @@ +{ + "extends": "../../tsconfig.base.json", + "include": ["src/**/*.ts", "src/**/*.tsx", "src/**/*.json"] +} diff --git a/packages/graph-core/README.md b/packages/graph-core/README.md new file mode 100644 index 0000000..e1a10be --- /dev/null +++ b/packages/graph-core/README.md @@ -0,0 +1,26 @@ +# `@bb/graph-core` + +Provider-agnostic interfaces for the graph database layer. + +## Responsibilities + +Defines the contract that every graph database backend (Neo4j, etc.) must implement. Contains no I/O — pure TypeScript interfaces and shared input/output types. + +## Public Interfaces + +- `IGraphDatabaseProvider` — composite of all repository interfaces plus `connect`/`close`/`ping`/`runCypher` +- `IGraphKnowledgeRepository` — knowledge node CRUD in the graph +- `IGraphFileRepository` — file node upsert, delete, version snapshot +- `IGraphFolderRepository` — folder node upsert +- `IGraphRepoRepository` — repo node upsert +- `IGraphIndexRepository` — index creation +- `GraphPingResult` — health probe result shape +- Input types: `NodeScope`, `UpsertFileNodeInput`, `UpsertFolderNodeInput`, `UpsertRepoNodeInput`, `SnapshotFilesInput`, and summary payload types + +## Data Ownership + +None. This package owns no data — it only describes shapes. + +## Tier + +Strategy (interfaces consumed by `@bb/graph-db` and implemented by `@bb/neo4j`) diff --git a/packages/graph-core/package.json b/packages/graph-core/package.json new file mode 100644 index 0000000..9c3b726 --- /dev/null +++ b/packages/graph-core/package.json @@ -0,0 +1,18 @@ +{ + "name": "@bb/graph-core", + "version": "0.0.0", + "private": true, + "type": "module", + "main": "./src/index.ts", + "types": "./src/index.ts", + "exports": { + ".": "./src/index.ts" + }, + "imports": { + "#src/*": "./src/*" + }, + "dependencies": { + "@bb/db-core": "workspace:*", + "@bb/types": "workspace:*" + } +} diff --git a/packages/graph-core/src/README.md b/packages/graph-core/src/README.md new file mode 100644 index 0000000..3bd09a8 --- /dev/null +++ b/packages/graph-core/src/README.md @@ -0,0 +1,7 @@ +# `@bb/graph-core/src` + +Provider-agnostic interfaces and shared types for the graph database layer. + +## Files + +- **index.ts** — all public interfaces (`IGraphDatabaseProvider`, `IGraphKnowledgeRepository`, `IGraphFileRepository`, etc.), input/output types (`NodeScope`, `UpsertFileNodeInput`, `UpsertFolderNodeInput`, etc.), and `GraphPingResult` diff --git a/packages/graph-core/src/index.ts b/packages/graph-core/src/index.ts new file mode 100644 index 0000000..d4f3b94 --- /dev/null +++ b/packages/graph-core/src/index.ts @@ -0,0 +1,65 @@ +import { KnowledgeState } from "@bb/types"; +import type { + KnowledgeDoc, + NodeScope, + RepoSummaryPayload, + UpsertRepoNodeInput, + FolderSummaryPayload, + UpsertFolderNodeInput, + SnapshotFilesInput, + UpsertFileNodeInput, + GraphPingResult, +} from "@bb/types"; + +export type { + NodeScope, + RepoSummaryPayload, + UpsertRepoNodeInput, + FolderSummaryPayload, + UpsertFolderNodeInput, + SnapshotFilesInput, + UpsertFileNodeInput, + GraphPingResult, +}; + +export interface IGraphKnowledgeRepository { + upsertKnowledgeNode(doc: KnowledgeDoc): Promise; + setKnowledgeStateInGraph(knowledgeId: string, state: KnowledgeState): Promise; + setKnowledgeBranchInGraph(knowledgeId: string, branch: string): Promise; + deleteKnowledgeGraph(knowledgeId: string): Promise; +} + +export interface IGraphFileRepository { + upsertFileNode(input: UpsertFileNodeInput): Promise; + deleteFileNodes(knowledgeId: string, paths: string[]): Promise; + snapshotFilesToVersion(input: SnapshotFilesInput): Promise; + upsertFileNodesBatch(inputs: readonly UpsertFileNodeInput[]): Promise; +} + +export interface IGraphFolderRepository { + upsertFolderNode(input: UpsertFolderNodeInput): Promise; + upsertFolderNodesBatch(inputs: readonly UpsertFolderNodeInput[]): Promise; +} + +export interface IGraphRepoRepository { + upsertRepoNode(input: UpsertRepoNodeInput): Promise; +} + +export interface IGraphIndexRepository { + ensureKnowledgeIndexes(): Promise; + ensureFlatFolderIndexes(): Promise; +} + +export interface IGraphDatabaseProvider { + knowledge: IGraphKnowledgeRepository; + files: IGraphFileRepository; + folders: IGraphFolderRepository; + repo: IGraphRepoRepository; + indexes: IGraphIndexRepository; + + connect(): Promise; + close(): Promise; + ping(): Promise; + runCypher(query: string, params?: Record): Promise; + toNeo4jInt?(value: number): unknown; +} diff --git a/packages/graph-core/tsconfig.json b/packages/graph-core/tsconfig.json new file mode 100644 index 0000000..4ed0786 --- /dev/null +++ b/packages/graph-core/tsconfig.json @@ -0,0 +1,4 @@ +{ + "extends": "../../tsconfig.base.json", + "include": ["src/**/*.ts", "src/**/*.tsx", "src/**/*.json"] +} diff --git a/packages/graph-db/README.md b/packages/graph-db/README.md new file mode 100644 index 0000000..c5da663 --- /dev/null +++ b/packages/graph-db/README.md @@ -0,0 +1,24 @@ +# `@bb/graph-db` + +Provider registry and facade for the graph database layer. + +## Responsibilities + +Maintains a map of named graph providers, exposes a single `getGraph()` accessor that delegates to the active provider. Provides convenience facade objects (`knowledge`, `files`, `folders`, `repo`, `indexes`) that proxy to the active provider's methods. + +## Public Interfaces + +- `registerGraphProvider(name, factory)` — register a provider (called at import time by `@bb/neo4j`) +- `connectGraph(providerName)` — instantiate and connect a provider +- `closeGraph()` — close the active provider +- `getGraph()` — returns the active `IGraphDatabaseProvider` +- `knowledge`, `files`, `folders`, `repo`, `indexes` — facade objects proxying to `getGraph()` +- `pingGraph()`, `runCypher()`, `toNeo4jInt()` — utility accessors + +## Data Ownership + +None. All I/O is delegated to the active provider. + +## Tier + +Strategy (consumer of `@bb/graph-core`, consumed by domain packages) diff --git a/packages/graph-db/package.json b/packages/graph-db/package.json new file mode 100644 index 0000000..4546e42 --- /dev/null +++ b/packages/graph-db/package.json @@ -0,0 +1,17 @@ +{ + "name": "@bb/graph-db", + "version": "0.0.0", + "private": true, + "type": "module", + "main": "./src/index.ts", + "types": "./src/index.ts", + "exports": { + ".": "./src/index.ts" + }, + "imports": { + "#src/*": "./src/*" + }, + "dependencies": { + "@bb/graph-core": "workspace:*" + } +} diff --git a/packages/graph-db/src/README.md b/packages/graph-db/src/README.md new file mode 100644 index 0000000..148aff9 --- /dev/null +++ b/packages/graph-db/src/README.md @@ -0,0 +1,7 @@ +# `@bb/graph-db/src` + +Provider registry, facade, and lifecycle management for the graph database layer. + +## Files + +- **index.ts** — provider registry (`registerGraphProvider`, `connectGraph`, `closeGraph`, `getGraph`), convenience facade objects (`knowledge`, `files`, `folders`, `repo`, `indexes`), and `pingGraph()`, `runCypher()`, `toNeo4jInt()` diff --git a/packages/graph-db/src/index.ts b/packages/graph-db/src/index.ts new file mode 100644 index 0000000..bbbae50 --- /dev/null +++ b/packages/graph-db/src/index.ts @@ -0,0 +1,83 @@ +import type { + IGraphDatabaseProvider, + IGraphKnowledgeRepository, + IGraphFileRepository, + IGraphFolderRepository, + IGraphRepoRepository, + IGraphIndexRepository, + GraphPingResult, +} from "@bb/graph-core"; + +let activeProvider: IGraphDatabaseProvider | null = null; +const providers = new Map IGraphDatabaseProvider>(); + +export function registerGraphProvider(name: string, factory: () => IGraphDatabaseProvider) { + providers.set(name, factory); +} + +export function getGraph(): IGraphDatabaseProvider { + if (!activeProvider) { + throw new Error("Graph database provider not initialized. Call connectGraph() first."); + } + return activeProvider; +} + +export async function connectGraph(providerName: string): Promise { + const factory = providers.get(providerName); + if (!factory) { + throw new Error(`Graph database provider '${providerName}' not registered.`); + } + activeProvider = factory(); + await activeProvider.connect(); +} + +export async function closeGraph(): Promise { + if (activeProvider) { + await activeProvider.close(); + activeProvider = null; + } +} + +export const knowledgeGraph: IGraphKnowledgeRepository = { + upsertKnowledgeNode: (...args) => getGraph().knowledge.upsertKnowledgeNode(...args), + setKnowledgeStateInGraph: (...args) => getGraph().knowledge.setKnowledgeStateInGraph(...args), + setKnowledgeBranchInGraph: (...args) => getGraph().knowledge.setKnowledgeBranchInGraph(...args), + deleteKnowledgeGraph: (...args) => getGraph().knowledge.deleteKnowledgeGraph(...args), +}; + +export const filesGraph: IGraphFileRepository = { + upsertFileNode: (...args) => getGraph().files.upsertFileNode(...args), + deleteFileNodes: (...args) => getGraph().files.deleteFileNodes(...args), + snapshotFilesToVersion: (...args) => getGraph().files.snapshotFilesToVersion(...args), + upsertFileNodesBatch: (...args) => getGraph().files.upsertFileNodesBatch(...args), +}; + +export const foldersGraph: IGraphFolderRepository = { + upsertFolderNode: (...args) => getGraph().folders.upsertFolderNode(...args), + upsertFolderNodesBatch: (...args) => getGraph().folders.upsertFolderNodesBatch(...args), +}; + +export const repoGraph: IGraphRepoRepository = { + upsertRepoNode: (...args) => getGraph().repo.upsertRepoNode(...args), +}; + +export const indexesGraph: IGraphIndexRepository = { + ensureKnowledgeIndexes: (...args) => getGraph().indexes.ensureKnowledgeIndexes(...args), + ensureFlatFolderIndexes: (...args) => getGraph().indexes.ensureFlatFolderIndexes(...args), +}; + +export async function pingGraph(): Promise { + return getGraph().ping(); +} + +export async function runCypher(query: string, params?: Record): Promise { + return getGraph().runCypher(query, params); +} + +export function toNeo4jInt(value: number): unknown { + const provider = getGraph(); + if (provider.toNeo4jInt) { + return provider.toNeo4jInt(value); + } + return value; +} diff --git a/packages/graph-db/tsconfig.json b/packages/graph-db/tsconfig.json new file mode 100644 index 0000000..4ed0786 --- /dev/null +++ b/packages/graph-db/tsconfig.json @@ -0,0 +1,4 @@ +{ + "extends": "../../tsconfig.base.json", + "include": ["src/**/*.ts", "src/**/*.tsx", "src/**/*.json"] +} diff --git a/packages/ingest-business-context/package.json b/packages/ingest-business-context/package.json index da74346..4d60f2b 100644 --- a/packages/ingest-business-context/package.json +++ b/packages/ingest-business-context/package.json @@ -14,10 +14,10 @@ "dependencies": { "@bb/config": "workspace:*", "@bb/errors": "workspace:*", + "@bb/graph-db": "workspace:*", "@bb/ingest-github": "workspace:*", "@bb/llm": "workspace:*", "@bb/logger": "workspace:*", - "@bb/neo4j": "workspace:*", "@bb/queue": "workspace:*", "@bb/types": "workspace:*" } diff --git a/packages/ingest-business-context/src/neo4j/indexes.ts b/packages/ingest-business-context/src/neo4j/indexes.ts index 63dd22d..22fade3 100644 --- a/packages/ingest-business-context/src/neo4j/indexes.ts +++ b/packages/ingest-business-context/src/neo4j/indexes.ts @@ -1,4 +1,4 @@ -import { runCypher } from "@bb/neo4j"; +import { runCypher } from "@bb/graph-db"; import { logger } from "@bb/logger"; const INDEX_DEFINITIONS: readonly string[] = [ diff --git a/packages/ingest-business-context/src/neo4j/write-keywords.ts b/packages/ingest-business-context/src/neo4j/write-keywords.ts index 75ea426..8de22a0 100644 --- a/packages/ingest-business-context/src/neo4j/write-keywords.ts +++ b/packages/ingest-business-context/src/neo4j/write-keywords.ts @@ -1,4 +1,4 @@ -import { runCypher } from "@bb/neo4j"; +import { runCypher } from "@bb/graph-db"; import { BUSINESS_CONTEXT_KEYWORD_TYPES } from "#src/neo4j/relationship-types.ts"; import type { BusinessContextAnalysis } from "#src/types.ts"; @@ -45,13 +45,13 @@ export async function createBusinessContextKeywords( continue; } - const rows = await runCypher<{ count: number }>(MERGE_KEYWORDS, { + const rows = (await runCypher(MERGE_KEYWORDS, { keywords: words.map((w) => ({ word: w })), relType, orgId: identity.orgId, nodeId: sanitizedTitle, knowledgeId: identity.knowledgeId, - }); + })) as Array<{ count: number }>; if (rows.length > 0) { total += Number(rows[0]?.count ?? 0); } diff --git a/packages/ingest-business-context/src/neo4j/write-node.ts b/packages/ingest-business-context/src/neo4j/write-node.ts index 232f887..f2b11fe 100644 --- a/packages/ingest-business-context/src/neo4j/write-node.ts +++ b/packages/ingest-business-context/src/neo4j/write-node.ts @@ -1,4 +1,4 @@ -import { runCypher } from "@bb/neo4j"; +import { runCypher } from "@bb/graph-db"; import { serializeArrayForNeo4j } from "#src/neo4j/serialize.ts"; import type { BusinessContextAnalysis } from "#src/types.ts"; @@ -35,7 +35,7 @@ export async function createBusinessContextNode( analysis: BusinessContextAnalysis, sanitizedTitle: string, ): Promise { - const rows = await runCypher<{ count: number }>(MERGE_BUSINESS_CONTEXT, { + const rows = (await runCypher(MERGE_BUSINESS_CONTEXT, { nodeId: sanitizedTitle, knowledgeId: identity.knowledgeId, orgId: identity.orgId, @@ -48,6 +48,6 @@ export async function createBusinessContextNode( keywordsText: serializeArrayForNeo4j(analysis.keywords), domainKeywordsText: serializeArrayForNeo4j(analysis.domain_keywords), updatedAt: new Date().toISOString(), - }); + })) as Array<{ count: number }>; return rows.length > 0 ? Number(rows[0]?.count ?? 0) : 0; } diff --git a/packages/ingest-business-context/src/neo4j/write-version.ts b/packages/ingest-business-context/src/neo4j/write-version.ts index dc9700e..f780161 100644 --- a/packages/ingest-business-context/src/neo4j/write-version.ts +++ b/packages/ingest-business-context/src/neo4j/write-version.ts @@ -1,4 +1,4 @@ -import { runCypher } from "@bb/neo4j"; +import { runCypher } from "@bb/graph-db"; import type { BusinessContextAnalysis } from "#src/types.ts"; export interface BusinessContextVersionIdentity { @@ -41,14 +41,14 @@ export async function createBusinessContextVersionNode( analysis: BusinessContextAnalysis, sanitizedTitle: string, ): Promise { - const rows = await runCypher<{ count: number }>(MERGE_VERSION, { + const rows = (await runCypher(MERGE_VERSION, { nodeId: sanitizedTitle, knowledgeId: identity.knowledgeId, orgId: identity.orgId, commitHash: identity.commitHash, analysisJson: JSON.stringify(analysis), updatedAt: new Date().toISOString(), - }); + })) as Array<{ count: number }>; return rows.length > 0 ? Number(rows[0]?.count ?? 0) : 0; } @@ -62,10 +62,10 @@ export async function linkVersionToFileVersions( identity: BusinessContextVersionIdentity, sanitizedTitle: string, ): Promise { - const rows = await runCypher<{ count: number }>(LINK_TO_FILE_VERSIONS, { + const rows = (await runCypher(LINK_TO_FILE_VERSIONS, { nodeId: sanitizedTitle, knowledgeId: identity.knowledgeId, commitHash: identity.commitHash, - }); + })) as Array<{ count: number }>; return rows.length > 0 ? Number(rows[0]?.count ?? 0) : 0; } diff --git a/packages/ingest-business-context/src/strategy/commit-validator.ts b/packages/ingest-business-context/src/strategy/commit-validator.ts index 983e091..5b88bef 100644 --- a/packages/ingest-business-context/src/strategy/commit-validator.ts +++ b/packages/ingest-business-context/src/strategy/commit-validator.ts @@ -1,4 +1,4 @@ -import { runCypher } from "@bb/neo4j"; +import { runCypher } from "@bb/graph-db"; import { CommitNotIndexedError } from "#src/errors.ts"; const CHECK_INDEXED = ` @@ -29,7 +29,10 @@ export interface CommitIndexStatus { * If both are zero, the commit (or knowledge) is not indexed. */ export async function checkCommitIndexed(knowledgeId: string, commitHash: string): Promise { - const rows = await runCypher<{ versions: number; files: number }>(CHECK_INDEXED, { knowledgeId, commitHash }); + const rows = (await runCypher(CHECK_INDEXED, { knowledgeId, commitHash })) as Array<{ + versions: number; + files: number; + }>; const row = rows[0] ?? { versions: 0, files: 0 }; const fileVersions = Number(row.versions ?? 0); const liveFiles = Number(row.files ?? 0); diff --git a/packages/ingest-github/package.json b/packages/ingest-github/package.json index 936da74..ecae6fb 100644 --- a/packages/ingest-github/package.json +++ b/packages/ingest-github/package.json @@ -13,12 +13,17 @@ }, "dependencies": { "@bb/config": "workspace:*", + "@bb/db": "workspace:*", "@bb/errors": "workspace:*", + "@bb/graph-db": "workspace:*", "@bb/llm": "workspace:*", "@bb/logger": "workspace:*", "@bb/mongo": "workspace:*", + "@bb/sqlite": "workspace:*", "@bb/neo4j": "workspace:*", "@bb/queue": "workspace:*", + "@bb/db-core": "workspace:*", + "@bb/graph-core": "workspace:*", "@bb/types": "workspace:*" } } diff --git a/packages/ingest-github/src/adapters/llm-file-analyzer.ts b/packages/ingest-github/src/adapters/llm-file-analyzer.ts index 8e42d74..ff64c99 100644 --- a/packages/ingest-github/src/adapters/llm-file-analyzer.ts +++ b/packages/ingest-github/src/adapters/llm-file-analyzer.ts @@ -1,7 +1,7 @@ import { askJsonLLM, type AskLlmOptions } from "@bb/llm"; import { LlmConfigError, LlmError } from "@bb/errors"; import { logger } from "@bb/logger"; -import type { FileAnalysis, FileAnalysisSection } from "@bb/mongo"; +import type { FileAnalysis, FileAnalysisSection } from "@bb/db-core"; import { FALLBACK_LANGUAGE, emptyFileAnalysis } from "#src/types/file-analysis.ts"; import type { AnalyzedFileResult, FileAnalyzer } from "#src/types/pipeline.ts"; diff --git a/packages/ingest-github/src/bootstrap.ts b/packages/ingest-github/src/bootstrap.ts index 69bdda6..583d7db 100644 --- a/packages/ingest-github/src/bootstrap.ts +++ b/packages/ingest-github/src/bootstrap.ts @@ -1,7 +1,11 @@ -import { seedConfig } from "@bb/config"; +import { seedConfig, getConfigValue } from "@bb/config"; import { seedLoggerFactory, type LoggerFactory } from "@bb/logger"; -import { connectMongo } from "@bb/mongo"; -import { connectNeo4j } from "@bb/neo4j"; +import { Config } from "@bb/types"; +import { connectDb } from "@bb/db"; +import { connectGraph } from "@bb/graph-db"; +import "@bb/mongo"; +import "@bb/sqlite"; +import "@bb/neo4j"; export interface BootstrapRuntimeOptions { config: unknown; @@ -11,6 +15,10 @@ export interface BootstrapRuntimeOptions { export async function bootstrapRuntime(opts: BootstrapRuntimeOptions): Promise { seedConfig(opts.config); seedLoggerFactory(opts.loggerFactory); - await connectMongo(); - await connectNeo4j(); + + const dbProvider = getConfigValue(Config.DbProvider); + await connectDb(dbProvider); + + const graphProvider = getConfigValue(Config.GraphProvider); + await connectGraph(graphProvider); } diff --git a/packages/ingest-github/src/index.ts b/packages/ingest-github/src/index.ts index efd5348..23e5eff 100644 --- a/packages/ingest-github/src/index.ts +++ b/packages/ingest-github/src/index.ts @@ -12,7 +12,7 @@ import { } from "./strategies/flat-folder/prompts/file-analysis.ts"; import type { PullFactory, SourceFactory } from "./types/pipeline.ts"; import type { ProgressContextFactory } from "./progress/types.ts"; -import { nullProgressContextFactory } from "./progress/NullProgressReporter.ts"; +import { dbProgressContextFactory } from "./progress/DbProgressReporter.ts"; /** * Optional dependencies for the GitHub workers. Factories are documented in @@ -47,7 +47,7 @@ function buildRunner( } export function registerGithubWorkers(deps: RegisterGithubWorkersDeps = {}): void { - const progressContextFactory = deps.progressContextFactory ?? nullProgressContextFactory; + const progressContextFactory = deps.progressContextFactory ?? dbProgressContextFactory; const runner = buildRunner(deps.sourceFactory, progressContextFactory); // `registerWorker` expects `Promise`; the handler now returns // `Promise` so the enterprise queue bridge can mirror @@ -65,7 +65,7 @@ export function registerGithubWorkers(deps: RegisterGithubWorkersDeps = {}): voi } export function registerLocalIngestWorker(): void { - const runner = buildRunner(undefined, nullProgressContextFactory); + const runner = buildRunner(undefined, dbProgressContextFactory); const localHandler = createLocalIngestHandler({ runner }); registerWorker(JobType.LocalIngest, async (msg) => { await localHandler(msg); @@ -133,3 +133,4 @@ export type { ProgressTotalMode, } from "./progress/types.ts"; export { nullProgressContextFactory } from "./progress/NullProgressReporter.ts"; +export { dbProgressContextFactory } from "./progress/DbProgressReporter.ts"; diff --git a/packages/ingest-github/src/pipeline/pull-helpers.ts b/packages/ingest-github/src/pipeline/pull-helpers.ts new file mode 100644 index 0000000..43adf56 --- /dev/null +++ b/packages/ingest-github/src/pipeline/pull-helpers.ts @@ -0,0 +1,20 @@ +import { KnowledgeState } from "@bb/types"; +import { knowledgeDb } from "@bb/db"; +import { knowledgeGraph } from "@bb/graph-db"; +import type { PipelineSummary } from "#src/types/pipeline.ts"; + +export async function transitionState(knowledgeId: string, state: KnowledgeState): Promise { + await knowledgeDb.setKnowledgeState(knowledgeId, state); + await knowledgeGraph.setKnowledgeStateInGraph(knowledgeId, state).catch(() => undefined); +} + +export function emptyPullSummary(commitHash: string): PipelineSummary { + return { + filesAnalyzed: 0, + foldersSummarised: 0, + repoSummarised: false, + graphNodesWritten: 0, + commitHash, + tokenUsage: { inputTokens: 0, outputTokens: 0, costUsd: 0 }, + }; +} diff --git a/packages/ingest-github/src/pipeline/pull.ts b/packages/ingest-github/src/pipeline/pull.ts index be344a6..062b1bb 100644 --- a/packages/ingest-github/src/pipeline/pull.ts +++ b/packages/ingest-github/src/pipeline/pull.ts @@ -1,12 +1,14 @@ import { Config, KnowledgeState, type GithubPullPayload, type JobMessage } from "@bb/types"; +import type { NodeScope } from "@bb/types"; import { getConfigValue } from "@bb/config"; import { withConcurrency } from "./concurrency.ts"; -import { getKnowledge, markKnowledgeFailed, setKnowledgeCommit, setKnowledgeState } from "@bb/mongo"; -import { setKnowledgeStateInGraph, snapshotFilesToVersion, type NodeScope } from "@bb/neo4j"; +import { knowledgeDb } from "@bb/db"; +import { knowledgeGraph, filesGraph } from "@bb/graph-db"; import type { PipelineSummary } from "#src/types/pipeline.ts"; import { resolveOrgId, llmCallContextFromPayload } from "./context.ts"; import { IngestError, KnowledgeNotFoundError } from "@bb/errors"; import { classifyFailure } from "./failure-classifier.ts"; +import { transitionState, emptyPullSummary } from "./pull-helpers.ts"; import { logger } from "@bb/logger"; import { ensureMetaDirs, metaPathsFor, repoCloneDir, ensureReposRoot } from "./paths.ts"; import { readHeadCommitHash, syncRepository } from "./source.ts"; @@ -50,14 +52,14 @@ export async function runPull( ); } - const knowledge = await getKnowledge(knowledgeId); - if (knowledge === null) { + const kDoc = await knowledgeDb.getKnowledge(knowledgeId); + if (kDoc === null) { throw new KnowledgeNotFoundError(knowledgeId); } - if (knowledge.source.kind !== "github") { - throw new IngestError(knowledgeId, `pull is only supported for github knowledge (kind=${knowledge.source.kind})`); + if (kDoc.source.kind !== "github") { + throw new IngestError(knowledgeId, `pull is only supported for github knowledge (kind=${kDoc.source.kind})`); } - const currentCommit = knowledge.source.commitId ?? ""; + const currentCommit = kDoc.source.commitId ?? ""; if (currentCommit.length === 0) { throw new IngestError( knowledgeId, @@ -65,8 +67,8 @@ export async function runPull( ); } - const branch = knowledge.info.branch ?? "main"; - const repoUrl = knowledge.info.repoUrl; + const branch = kDoc.info.branch ?? "main"; + const repoUrl = kDoc.info.repoUrl; if (repoUrl === undefined || repoUrl.length === 0) { throw new IngestError(knowledgeId, "pull requires knowledge.info.repoUrl"); } @@ -139,7 +141,7 @@ export async function runPull( } throwIfCancelled(knowledgeId); - await snapshotFilesToVersion({ knowledgeId, commitHash: currentCommit }).catch((cause: unknown) => { + await filesGraph.snapshotFilesToVersion({ knowledgeId, commitHash: currentCommit }).catch((cause: unknown) => { const msgText = cause instanceof Error ? cause.message : String(cause); logger.warn(`pull: snapshot of ${currentCommit.slice(0, 12)} failed (non-fatal): ${msgText}`); }); @@ -224,7 +226,7 @@ export async function runPull( progressContext.phaseChanged("indexing"); logger.info(`pull: phase repo summary starting`); throwIfCancelled(knowledgeId); - const orgId = resolveOrgId({ ...(knowledge.source.kind === "github" ? {} : {}) }); + const orgId = resolveOrgId({ ...(kDoc.source.kind === "github" ? {} : {}) }); const scope: NodeScope = { orgId, knowledgeId, repoId: knowledgeId }; const { summary: repoSummary, tokenUsage: repoUsage } = await summariseRepo(knowledgeId, metaPaths, llmCallContext); totalInputTokens += repoUsage.inputTokens; @@ -245,7 +247,7 @@ export async function runPull( affectedFolders, }); - await setKnowledgeCommit( + await knowledgeDb.setKnowledgeCommit( knowledgeId, targetCommit, String(totalInputTokens), @@ -272,25 +274,9 @@ export async function runPull( throw cause; } const { category, reason, detail } = classifyFailure(cause); - await markKnowledgeFailed(knowledgeId, reason, category, detail).catch(() => undefined); - await setKnowledgeStateInGraph(knowledgeId, KnowledgeState.Failed).catch(() => undefined); + await knowledgeDb.markKnowledgeFailed(knowledgeId, reason, category, detail).catch(() => undefined); + await knowledgeGraph.setKnowledgeStateInGraph(knowledgeId, KnowledgeState.Failed).catch(() => undefined); progressContext.failed(reason, undefined, category, detail); throw new IngestError(knowledgeId, `github_pull failed: ${reason}`, cause); } } - -async function transitionState(knowledgeId: string, state: KnowledgeState): Promise { - await setKnowledgeState(knowledgeId, state); - await setKnowledgeStateInGraph(knowledgeId, state).catch(() => undefined); -} - -function emptyPullSummary(commitHash: string): PipelineSummary { - return { - filesAnalyzed: 0, - foldersSummarised: 0, - repoSummarised: false, - graphNodesWritten: 0, - commitHash, - tokenUsage: { inputTokens: 0, outputTokens: 0, costUsd: 0 }, - }; -} diff --git a/packages/ingest-github/src/pipeline/run.ts b/packages/ingest-github/src/pipeline/run.ts index eca725e..256d4f7 100644 --- a/packages/ingest-github/src/pipeline/run.ts +++ b/packages/ingest-github/src/pipeline/run.ts @@ -4,8 +4,8 @@ import { type KnowledgeFailureCategory, type LocalIngestPayload, } from "@bb/types"; -import { markKnowledgeFailed, setKnowledgeBranch, setKnowledgeCommit, setKnowledgeState } from "@bb/mongo"; -import { setKnowledgeBranchInGraph, setKnowledgeStateInGraph } from "@bb/neo4j"; +import { knowledgeDb } from "@bb/db"; +import { knowledgeGraph } from "@bb/graph-db"; import { IngestError } from "@bb/errors"; import { logger } from "@bb/logger"; import { classifyFailure } from "./failure-classifier.ts"; @@ -70,8 +70,8 @@ async function runGithub( try { throwIfCancelled(knowledgeId); const branch = await resolveBranch(knowledgeId, payload, payload.gitToken); - await setKnowledgeBranch(knowledgeId, branch); - await setKnowledgeBranchInGraph(knowledgeId, branch).catch(() => undefined); + await knowledgeDb.setKnowledgeBranch(knowledgeId, branch); + await knowledgeGraph.setKnowledgeBranchInGraph(knowledgeId, branch).catch(() => undefined); let source: SourceReader; let archiveSink: ArchiveSink | undefined; @@ -129,7 +129,7 @@ async function runGithub( strategyStarted = true; const result = await strategy.execute(strategyInput); - await setKnowledgeCommit( + await knowledgeDb.setKnowledgeCommit( knowledgeId, commitHash, String(result.tokenUsage.inputTokens), @@ -211,8 +211,8 @@ async function runLocal(strategy: IngestStrategy, payload: LocalIngestPayload): } async function transitionState(knowledgeId: string, state: KnowledgeState): Promise { - await setKnowledgeState(knowledgeId, state); - await setKnowledgeStateInGraph(knowledgeId, state).catch(() => undefined); + await knowledgeDb.setKnowledgeState(knowledgeId, state); + await knowledgeGraph.setKnowledgeStateInGraph(knowledgeId, state).catch(() => undefined); } /** @@ -226,8 +226,8 @@ async function persistFailure( reason: string, detail?: string, ): Promise { - await markKnowledgeFailed(knowledgeId, reason, category, detail).catch(() => undefined); - await setKnowledgeStateInGraph(knowledgeId, KnowledgeState.Failed).catch(() => undefined); + await knowledgeDb.markKnowledgeFailed(knowledgeId, reason, category, detail).catch(() => undefined); + await knowledgeGraph.setKnowledgeStateInGraph(knowledgeId, KnowledgeState.Failed).catch(() => undefined); } function isGithubPayload(payload: GithubIndexPayload | LocalIngestPayload): payload is GithubIndexPayload { diff --git a/packages/ingest-github/src/progress/DbProgressReporter.ts b/packages/ingest-github/src/progress/DbProgressReporter.ts new file mode 100644 index 0000000..e54d8a9 --- /dev/null +++ b/packages/ingest-github/src/progress/DbProgressReporter.ts @@ -0,0 +1,65 @@ +import { knowledgeDb } from "@bb/db"; +import type { + ProgressContext, + ProgressContextFactory, + ProgressPhase, + ProgressReporter, + ProgressReporterInput, +} from "./types.ts"; + +class DbProgressContext implements ProgressContext { + private total = 0; + private processed = 0; + private lastUpdate = 0; + + constructor(private knowledgeId: string) {} + + reporter(input: ProgressReporterInput): ProgressReporter { + const isFileAnalysis = + input.phase === "file_analysis" && + (input.subPhase === "analyse_small" || input.subPhase === "big_files_condense"); + + return { + start: async () => { + if (isFileAnalysis && input.total.kind === "fixed") { + this.total += input.total.total; + await knowledgeDb.updateKnowledgeProgress(this.knowledgeId, this.processed, this.total); + } + }, + increment: (delta = 1) => { + if (isFileAnalysis) { + this.processed += delta; + const now = Date.now(); + if (now - this.lastUpdate > 250 || this.processed >= this.total) { + this.lastUpdate = now; + knowledgeDb.updateKnowledgeProgress(this.knowledgeId, this.processed, this.total).catch(() => {}); + } + } + }, + incrementSeen: () => {}, + setTotal: (total) => { + if (isFileAnalysis) { + this.total = total; + knowledgeDb.updateKnowledgeProgress(this.knowledgeId, this.processed, this.total).catch(() => {}); + } + }, + stop: () => {}, + }; + } + + phaseChanged(phase: ProgressPhase) { + if (phase === "clone" || phase === "scan") { + knowledgeDb.updateKnowledgeProgress(this.knowledgeId, 0, undefined).catch(() => {}); + } + } + + completed() { + knowledgeDb.updateKnowledgeProgress(this.knowledgeId, this.total, this.total).catch(() => {}); + } + + failed() {} +} + +export const dbProgressContextFactory: ProgressContextFactory = (knowledgeId: string) => { + return new DbProgressContext(knowledgeId); +}; diff --git a/packages/ingest-github/src/strategies/flat-folder/README.md b/packages/ingest-github/src/strategies/flat-folder/README.md index 78d8acf..b391d3c 100644 --- a/packages/ingest-github/src/strategies/flat-folder/README.md +++ b/packages/ingest-github/src/strategies/flat-folder/README.md @@ -21,6 +21,14 @@ this single pool. One knob bounds total in-flight LLM concurrency. **two-pass** strategy: walk + cache-only `decideStatic` first, then parallel-deduplicated LLM resolution for unknown extensions/filenames through the shared limiter, then drain. + 1b. **write-eligible-files** (`eligible-files.ts`) — between scan and the + 2a/2b parallel block, persists `.bytebell/eligible_files.json` (paths + + parent folders for every `small`/`big` entry, plus the commit hash) to + the source layer (local disk under `source.localRepoDir/.bytebell/` and/or + the `archiveSink`). Read back by `@bytebell/knowledge-validation` to + verify every file the analyzer was asked to process landed in Neo4j. + Hard-fails if neither write target is available — an un-validatable + knowledge is not a state we want. 2a. **analyse-small** (`phases/analyse-small.ts`) — reads the manifest's `kind: "small"` entries, re-opens content, runs the LLM file-analyser per file under the shared limiter, writes `CondensedFileAnalysis` JSON. @@ -104,6 +112,7 @@ The strategy emits progress through the `ProgressContext` port defined in see updated entries without re-reading disk. - `scan-manifest.ts` — `ScanManifest` shape, `readScanManifest`, `writeScanManifest`. The canonical handoff between phase 1 and phases 2a/2b. +- `eligible-files.ts` — `writeEligibleFiles({knowledgeId, manifest, source, archiveSink?})`. Writes `.bytebell/eligible_files.json` to the source layer between phase 1 and 2a/2b. The validation service (`@bytebell/knowledge-validation`) reads this artifact to cross-check post-indexing consistency. - `folder-path.ts` — `directFolderOf`, `affectedFolderPaths`. - `folder-summary.ts` — group + summarise (individual or batched) + persist - iterate folder summaries; shared `dispatchFolderSummaries` used by both diff --git a/packages/ingest-github/src/strategies/flat-folder/backfill/fields.ts b/packages/ingest-github/src/strategies/flat-folder/backfill/fields.ts index 9effedb..57b16e0 100644 --- a/packages/ingest-github/src/strategies/flat-folder/backfill/fields.ts +++ b/packages/ingest-github/src/strategies/flat-folder/backfill/fields.ts @@ -1,7 +1,7 @@ import { askJsonLLM, type AskLlmOptions } from "@bb/llm"; import { LlmConfigError, LlmError } from "@bb/errors"; import { logger } from "@bb/logger"; -import type { FileAnalysis, FileAnalysisSection } from "@bb/mongo"; +import type { FileAnalysis, FileAnalysisSection } from "@bb/db-core"; import type { MetaPaths } from "#src/types/meta-paths.ts"; import type { ProgressContext } from "#src/progress/types.ts"; import type { ConcurrencyLimiter } from "#src/pipeline/concurrency.ts"; diff --git a/packages/ingest-github/src/strategies/flat-folder/eligible-files.ts b/packages/ingest-github/src/strategies/flat-folder/eligible-files.ts new file mode 100644 index 0000000..0205889 --- /dev/null +++ b/packages/ingest-github/src/strategies/flat-folder/eligible-files.ts @@ -0,0 +1,75 @@ +import { mkdir, writeFile } from "node:fs/promises"; +import path from "node:path"; +import { logger } from "@bb/logger"; +import type { ArchiveSink, SourceReader } from "#src/types/pipeline.ts"; +import { affectedFolderPaths } from "./folder-path.ts"; +import type { ScanManifest } from "./scan-manifest.ts"; + +export const ELIGIBLE_FILES_RELATIVE_PATH = ".bytebell/eligible_files.json"; + +export interface EligibleFilesDocument { + knowledgeId: string; + commitHash: string; + generatedAt: string; + files: string[]; + folders: string[]; +} + +export interface WriteEligibleFilesInput { + knowledgeId: string; + manifest: ScanManifest; + source: SourceReader; + archiveSink?: ArchiveSink; +} + +/** + * Persist the canonical list of files the analyzer is about to process, + * BEFORE any small-file or big-file LLM call runs. The downstream + * `@bytebell/knowledge-validation` service reads this artifact via the same + * source layer to cross-check that every eligible file landed in Neo4j. + * + * Writes to whichever source layer is active: local disk when the source + * reader is disk-backed (`source.localRepoDir !== ""`), the archive sink + * otherwise. Fails the strategy if neither target is available, since a + * successfully-indexed but un-validatable knowledge is not a state we want. + */ +export async function writeEligibleFiles(input: WriteEligibleFilesInput): Promise { + const files = input.manifest.entries + .filter((entry) => entry.kind === "small" || entry.kind === "big") + .map((entry) => entry.relativePath) + .sort(); + const folders = affectedFolderPaths(files); + const doc: EligibleFilesDocument = { + knowledgeId: input.knowledgeId, + commitHash: input.source.commitHash, + generatedAt: new Date().toISOString(), + files, + folders, + }; + const content = JSON.stringify(doc, null, 2); + + let wrote = false; + if (input.source.localRepoDir.length > 0) { + const targetDir = path.join(input.source.localRepoDir, ".bytebell"); + const targetFile = path.join(targetDir, "eligible_files.json"); + await mkdir(targetDir, { recursive: true }); + await writeFile(targetFile, content, "utf8"); + wrote = true; + } + if (input.archiveSink !== undefined) { + await input.archiveSink.push({ + knowledgeId: input.knowledgeId, + relativePath: ELIGIBLE_FILES_RELATIVE_PATH, + content, + }); + wrote = true; + } + if (!wrote) { + throw new Error( + `flat-folder: cannot persist eligible_files.json for ${input.knowledgeId}: source reader has no localRepoDir and no archiveSink is configured`, + ); + } + logger.info( + `flat-folder: persisted eligible_files.json for ${input.knowledgeId} (files=${String(files.length)} folders=${String(folders.length)})`, + ); +} diff --git a/packages/ingest-github/src/strategies/flat-folder/index.ts b/packages/ingest-github/src/strategies/flat-folder/index.ts index 86797a6..4fcd091 100644 --- a/packages/ingest-github/src/strategies/flat-folder/index.ts +++ b/packages/ingest-github/src/strategies/flat-folder/index.ts @@ -9,6 +9,7 @@ import { withConcurrency } from "#src/pipeline/concurrency.ts"; import { scanAndClassify } from "./phases/scan-and-classify.ts"; import { analyseSmallFiles } from "./phases/analyse-small.ts"; import { analyseBigFiles } from "./phases/analyse-big-files.ts"; +import { writeEligibleFiles } from "./eligible-files.ts"; import { backfillMissingFields } from "./backfill/fields.ts"; import { FileAnalysisCache } from "./file-analysis-cache.ts"; import { runFolderSummaryPhase } from "./folder-summary.ts"; @@ -52,6 +53,22 @@ export function createFlatFolderStrategy(deps: FlatFolderStrategyDeps): IngestSt } const { manifest } = await scanAndClassify(scanInput); + // Persist the canonical eligible-files list BEFORE any small- or + // big-file LLM call runs. Read back by `@bytebell/knowledge-validation` + // to verify every file the analyzer was asked to process landed in + // Neo4j. Must be the last step before analysis dispatch — if this + // fails, the knowledge is not validatable post-hoc and we'd rather + // fail the run than ship an un-checkable index. + const eligibleInput: Parameters[0] = { + knowledgeId, + manifest, + source, + }; + if (archiveSink !== undefined) { + eligibleInput.archiveSink = archiveSink; + } + await writeEligibleFiles(eligibleInput); + progressContext.phaseChanged("file_analysis"); logger.info( `flat-folder: phase2 (analyse small ${manifest.summary.smallCount} + big ${manifest.summary.bigCount}) starting in parallel`, diff --git a/packages/ingest-github/src/strategies/flat-folder/phases/store-flat-analysis.ts b/packages/ingest-github/src/strategies/flat-folder/phases/store-flat-analysis.ts index 7db4433..19913fb 100644 --- a/packages/ingest-github/src/strategies/flat-folder/phases/store-flat-analysis.ts +++ b/packages/ingest-github/src/strategies/flat-folder/phases/store-flat-analysis.ts @@ -1,17 +1,10 @@ import { readFile } from "node:fs/promises"; -import { Config } from "@bb/types"; +import { Config, type UpsertFolderNodeInput, type UpsertFileNodeInput } from "@bb/types"; import { getConfigValue } from "@bb/config"; import { logger } from "@bb/logger"; -import { - ensureFlatFolderIndexes, - upsertFileNodesBatch, - upsertFolderNodesBatch, - upsertRepoNode, - type NodeScope, - type UpsertFileNodeInput, - type UpsertFolderNodeInput, -} from "@bb/neo4j"; +import { repoGraph, indexesGraph, foldersGraph, filesGraph } from "@bb/graph-db"; import type { GithubIndexPayload } from "@bb/types"; +import type { NodeScope } from "@bb/graph-core"; import type { MetaPaths } from "#src/types/meta-paths.ts"; import { throwIfCancelled } from "#src/pipeline/cancellation.ts"; import type { FileAnalysisCache } from "#src/strategies/flat-folder/file-analysis-cache.ts"; @@ -38,7 +31,7 @@ export interface StoreFlatAnalysisResult { export async function storeFlatAnalysis(input: StoreFlatAnalysisInput): Promise { throwIfCancelled(input.scope.knowledgeId); - await ensureFlatFolderIndexes(); + await indexesGraph.ensureFlatFolderIndexes(); const batchSize = getConfigValue(Config.Neo4jBatchSize); @@ -46,7 +39,7 @@ export async function storeFlatAnalysis(input: StoreFlatAnalysisInput): Promise< let nodesWritten = 0; const repoSummary = await readRepoSummary(input.metaPaths); if (repoSummary !== null) { - await upsertRepoNode({ + await repoGraph.upsertRepoNode({ scope: input.scope, repoUrl: input.payload.repoUrl, branch: input.branch, @@ -62,7 +55,7 @@ export async function storeFlatAnalysis(input: StoreFlatAnalysisInput): Promise< }); } else { logger.warn(`phase7: no repo summary on disk; writing :Repo with empty summary`); - await upsertRepoNode({ + await repoGraph.upsertRepoNode({ scope: input.scope, repoUrl: input.payload.repoUrl, branch: input.branch, @@ -124,7 +117,7 @@ export async function storeFlatAnalysis(input: StoreFlatAnalysisInput): Promise< for (let i = 0; i < folderInputs.length; i += batchSize) { throwIfCancelled(input.scope.knowledgeId); const batch = folderInputs.slice(i, i + batchSize); - await upsertFolderNodesBatch(batch); + await foldersGraph.upsertFolderNodesBatch(batch); foldersWritten += batch.length; nodesWritten += batch.length; for (const item of batch) { @@ -156,7 +149,7 @@ export async function storeFlatAnalysis(input: StoreFlatAnalysisInput): Promise< for (let i = 0; i < fileInputs.length; i += batchSize) { throwIfCancelled(input.scope.knowledgeId); const batch = fileInputs.slice(i, i + batchSize); - await upsertFileNodesBatch(batch); + await filesGraph.upsertFileNodesBatch(batch); filesWritten += batch.length; nodesWritten += batch.length; for (const item of batch) { diff --git a/packages/ingest-github/src/strategies/flat-folder/store-pull.ts b/packages/ingest-github/src/strategies/flat-folder/store-pull.ts index d070c42..6b2d23e 100644 --- a/packages/ingest-github/src/strategies/flat-folder/store-pull.ts +++ b/packages/ingest-github/src/strategies/flat-folder/store-pull.ts @@ -1,15 +1,9 @@ import { readFile } from "node:fs/promises"; import { logger } from "@bb/logger"; -import { - deleteFileNodes, - ensureFlatFolderIndexes, - upsertFileNode, - upsertFolderNode, - upsertRepoNode, - type NodeScope, -} from "@bb/neo4j"; -import { deleteRawFiles } from "@bb/mongo"; +import { filesGraph, foldersGraph, repoGraph, indexesGraph } from "@bb/graph-db"; +import { rawDb } from "@bb/db"; import type { GithubIndexPayload } from "@bb/types"; +import type { NodeScope } from "@bb/graph-core"; import type { MetaPaths } from "#src/types/meta-paths.ts"; import type { CondensedFileAnalysis } from "#src/types/condensed-file-analysis.ts"; import { throwIfCancelled } from "#src/pipeline/cancellation.ts"; @@ -50,7 +44,7 @@ export interface StorePullResult { */ export async function storePullAnalysis(input: StorePullInput): Promise { throwIfCancelled(input.scope.knowledgeId); - await ensureFlatFolderIndexes(); + await indexesGraph.ensureFlatFolderIndexes(); let filesUpserted = 0; let filesDeleted = 0; @@ -58,15 +52,15 @@ export async function storePullAnalysis(input: StorePullInput): Promise r.oldPath)]; if (deletedPaths.length > 0) { - await deleteFileNodes(input.scope.knowledgeId, deletedPaths); - await deleteRawFiles(input.scope.knowledgeId, deletedPaths); + await filesGraph.deleteFileNodes(input.scope.knowledgeId, deletedPaths); + await rawDb.deleteRawFiles(input.scope.knowledgeId, deletedPaths); filesDeleted = deletedPaths.length; } const repoSummary = await readRepoSummary(input.metaPaths); let repoUpserted = false; if (repoSummary !== null) { - await upsertRepoNode({ + await repoGraph.upsertRepoNode({ scope: input.scope, repoUrl: input.payload.repoUrl, branch: input.branch, @@ -91,7 +85,7 @@ export async function storePullAnalysis(input: StorePullInput): Promise { - await upsertFileNode({ + await filesGraph.upsertFileNode({ orgId: scope.orgId, knowledgeId: scope.knowledgeId, repoId: scope.repoId, diff --git a/packages/ingest-github/src/types/big-file.ts b/packages/ingest-github/src/types/big-file.ts index 4d73838..8e7dd80 100644 --- a/packages/ingest-github/src/types/big-file.ts +++ b/packages/ingest-github/src/types/big-file.ts @@ -1,4 +1,4 @@ -import type { FileAnalysis } from "@bb/mongo"; +import type { FileAnalysis } from "@bb/db-core"; export type BigFileReason = "context-window-exceeded" | "too-large"; diff --git a/packages/ingest-github/src/types/condensed-file-analysis.ts b/packages/ingest-github/src/types/condensed-file-analysis.ts index eeee56d..00f8dcc 100644 --- a/packages/ingest-github/src/types/condensed-file-analysis.ts +++ b/packages/ingest-github/src/types/condensed-file-analysis.ts @@ -1,4 +1,4 @@ -import type { FileAnalysis } from "@bb/mongo"; +import type { FileAnalysis } from "@bb/db-core"; export interface CondensedFileAnalysis { relativePath: string; diff --git a/packages/ingest-github/src/types/file-analysis.ts b/packages/ingest-github/src/types/file-analysis.ts index 7467c64..6599e2b 100644 --- a/packages/ingest-github/src/types/file-analysis.ts +++ b/packages/ingest-github/src/types/file-analysis.ts @@ -1,4 +1,4 @@ -import type { FileAnalysis } from "@bb/mongo"; +import type { FileAnalysis } from "@bb/db-core"; export const FALLBACK_LANGUAGE = "unknown"; diff --git a/packages/ingest-github/src/types/pipeline.ts b/packages/ingest-github/src/types/pipeline.ts index aaf13a5..c5629e6 100644 --- a/packages/ingest-github/src/types/pipeline.ts +++ b/packages/ingest-github/src/types/pipeline.ts @@ -1,6 +1,6 @@ import type { GithubIndexPayload, GithubPullPayload } from "@bb/types"; import type { AskLlmOptions } from "@bb/llm"; -import type { FileAnalysis } from "@bb/mongo"; +import type { FileAnalysis } from "@bb/db-core"; import type { ConcurrencyLimiter } from "#src/pipeline/concurrency.ts"; import type { DiffResult } from "#src/pipeline/git-diff.ts"; diff --git a/packages/llm/package.json b/packages/llm/package.json index 32be323..4a2f8ad 100644 --- a/packages/llm/package.json +++ b/packages/llm/package.json @@ -15,7 +15,7 @@ "@bb/config": "workspace:*", "@bb/errors": "workspace:*", "@bb/logger": "workspace:*", - "@bb/mongo": "workspace:*", + "@bb/db": "workspace:*", "@bb/types": "workspace:*", "tiktoken": "^1.0.22" } diff --git a/packages/llm/src/usageTracker.ts b/packages/llm/src/usageTracker.ts index 2234d82..54476aa 100644 --- a/packages/llm/src/usageTracker.ts +++ b/packages/llm/src/usageTracker.ts @@ -1,5 +1,5 @@ import type { ActivityInput } from "@bb/types"; -import { incrementUsage, recordActivity } from "@bb/mongo"; +import { usageDb, activityDb } from "@bb/db"; import { tokenLen } from "./tokenizer.ts"; /** @@ -30,7 +30,7 @@ export class UsageTracker { const outputTokens = tokenLen(response); // 1. Increment monthly usage (Atomic update) - await incrementUsage(identityId, inputTokens, outputTokens); + await usageDb.incrementUsage(identityId, inputTokens, outputTokens); // 2. Record detailed activity log const activity: ActivityInput = { @@ -44,7 +44,7 @@ export class UsageTracker { output: outputTokens, }, }; - await recordActivity(activity); + await activityDb.recordActivity(activity); } catch (error) { // Failure in tracking should not break the main application flow console.error("[UsageTracker] Failed to track usage:", error); diff --git a/packages/mcp/package.json b/packages/mcp/package.json index 98fc5a5..0034438 100644 --- a/packages/mcp/package.json +++ b/packages/mcp/package.json @@ -15,7 +15,7 @@ "@bb/config": "workspace:*", "@bb/logger": "workspace:*", "@bb/llm": "workspace:*", - "@bb/neo4j": "workspace:*", + "@bb/graph-db": "workspace:*", "@bb/types": "workspace:*", "@modelcontextprotocol/sdk": "^1.23.0", "zod": "^4.3.6" diff --git a/packages/mcp/src/keywordLookupTool.ts b/packages/mcp/src/keywordLookupTool.ts index 6ca4485..248d66e 100644 --- a/packages/mcp/src/keywordLookupTool.ts +++ b/packages/mcp/src/keywordLookupTool.ts @@ -1,6 +1,6 @@ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { z } from "zod"; -import { runCypher, toNeo4jInt } from "@bb/neo4j"; +import { runCypher, toNeo4jInt } from "@bb/graph-db"; import { UsageTracker } from "@bb/llm"; import { buildFulltextQuery, escapeLucene } from "./smartSearchChannels.ts"; @@ -167,7 +167,7 @@ async function runMatchQuery(args: MatchQueryArgs): Promise { } else { params["fulltextQuery"] = buildFulltextQuery([escapeLucene(lower)]); } - return runCypher(cypher, params); + return (await runCypher(cypher, params)) as RowShape[]; } function cypherForMatch(match: MatchMode): string { diff --git a/packages/mcp/src/listKnowledgeTool.ts b/packages/mcp/src/listKnowledgeTool.ts index 88d8b16..08579a7 100644 --- a/packages/mcp/src/listKnowledgeTool.ts +++ b/packages/mcp/src/listKnowledgeTool.ts @@ -1,7 +1,7 @@ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { z } from "zod"; import { UsageTracker } from "@bb/llm"; -import { runCypher } from "@bb/neo4j"; +import { runCypher } from "@bb/graph-db"; const MAX_PAGE_CHARS = 20_000; @@ -116,7 +116,7 @@ async function fetchAllRows(): Promise { fileCount ORDER BY k.updatedAt DESC `; - const raw = await runCypher(cypher, {}); + const raw = (await runCypher(cypher, {})) as RawRow[]; return raw.map(coerceRow); } diff --git a/packages/mcp/src/retrieveFileMetadata.ts b/packages/mcp/src/retrieveFileMetadata.ts index 499dbae..e91d6ee 100644 --- a/packages/mcp/src/retrieveFileMetadata.ts +++ b/packages/mcp/src/retrieveFileMetadata.ts @@ -1,4 +1,4 @@ -import { runCypher } from "@bb/neo4j"; +import { runCypher } from "@bb/graph-db"; export interface FileMetadata { path: string; @@ -48,7 +48,7 @@ export async function fetchMetadata(knowledgeId: string, relativePaths: readonly notFound: [], }; } - const rows = await runCypher( + const rows = (await runCypher( ` MATCH (f:File) WHERE f.knowledgeId = $knowledgeId AND f.relativePath IN $paths @@ -70,7 +70,7 @@ export async function fetchMetadata(knowledgeId: string, relativePaths: readonly collect(DISTINCT me.name) AS importsExternal `, { knowledgeId, paths: relativePaths }, - ); + )) as RowShape[]; const files = rows.map(rowToMetadata); const found = new Set(files.map((file) => file.path)); const notFound = relativePaths.filter((p) => !found.has(p)); diff --git a/packages/mcp/src/smartSearchChannels.ts b/packages/mcp/src/smartSearchChannels.ts index 65a39a7..64f7f5e 100644 --- a/packages/mcp/src/smartSearchChannels.ts +++ b/packages/mcp/src/smartSearchChannels.ts @@ -1,4 +1,4 @@ -import { runCypher } from "@bb/neo4j"; +import { runCypher } from "@bb/graph-db"; import { EXCLUSION_WHERE } from "./searchExclusions.ts"; export interface ScoredHit { @@ -48,7 +48,7 @@ async function chPurpose(params: SearchParams): Promise { WITH f, score ORDER BY score DESC LIMIT $resultCap ${COLLECT_RETURN} `; - return toScoredHits(await runCypher(cypher, params)); + return toScoredHits((await runCypher(cypher, params)) as RowShape[]); } async function chPaths(params: SearchParams): Promise { @@ -72,7 +72,7 @@ async function chPaths(params: SearchParams): Promise { ORDER BY score DESC, f.relativePath LIMIT $resultCap ${COLLECT_RETURN} `; - return toScoredHits(await runCypher(cypher, params)); + return toScoredHits((await runCypher(cypher, params)) as RowShape[]); } async function chKeywords(params: SearchParams): Promise { @@ -85,7 +85,7 @@ async function chKeywords(params: SearchParams): Promise { ORDER BY score DESC LIMIT $resultCap ${COLLECT_RETURN} `; - return toScoredHits(await runCypher(cypher, params)); + return toScoredHits((await runCypher(cypher, params)) as RowShape[]); } async function chClasses(params: SearchParams): Promise { @@ -111,7 +111,7 @@ async function symbolChannel( ORDER BY score DESC LIMIT $resultCap ${COLLECT_RETURN} `; - return toScoredHits(await runCypher(cypher, params)); + return toScoredHits((await runCypher(cypher, params)) as RowShape[]); } async function chImportsInternal(params: SearchParams): Promise { @@ -135,7 +135,7 @@ async function importsChannel( ORDER BY f.relativePath LIMIT $resultCap ${COLLECT_RETURN} `; - return toScoredHits(await runCypher(cypher, params)); + return toScoredHits((await runCypher(cypher, params)) as RowShape[]); } async function chBusinessContext(params: SearchParams): Promise { @@ -146,7 +146,7 @@ async function chBusinessContext(params: SearchParams): Promise { WITH f, score ORDER BY score DESC LIMIT $resultCap ${COLLECT_RETURN} `; - return toScoredHits(await runCypher(cypher, params)); + return toScoredHits((await runCypher(cypher, params)) as RowShape[]); } export type ChannelName = diff --git a/packages/mcp/src/smartSearchFusion.ts b/packages/mcp/src/smartSearchFusion.ts index 74677e8..a7a1147 100644 --- a/packages/mcp/src/smartSearchFusion.ts +++ b/packages/mcp/src/smartSearchFusion.ts @@ -1,4 +1,4 @@ -import { runCypher } from "@bb/neo4j"; +import { runCypher } from "@bb/graph-db"; import type { ChannelName, ScoredHit } from "./smartSearchChannels.ts"; export interface FusedResult { @@ -78,11 +78,11 @@ export async function attachRepoNames(results: FusedResult[]): Promise { if (ids.length === 0) { return; } - const rows = await runCypher<{ knowledgeId: string; repoName: string | null }>( + const rows = (await runCypher( `MATCH (k:Knowledge) WHERE k.knowledgeId IN $ids RETURN k.knowledgeId AS knowledgeId, k.repoName AS repoName`, { ids }, - ); + )) as Array<{ knowledgeId: string; repoName: string | null }>; const lookup = new Map(rows.map((row) => [row.knowledgeId, row.repoName ?? ""])); for (const result of results) { result.repo_name = lookup.get(result.knowledge_id) ?? ""; diff --git a/packages/mcp/src/smartSearchTool.ts b/packages/mcp/src/smartSearchTool.ts index a001bd9..23c4efd 100644 --- a/packages/mcp/src/smartSearchTool.ts +++ b/packages/mcp/src/smartSearchTool.ts @@ -1,7 +1,7 @@ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { z } from "zod"; import { UsageTracker } from "@bb/llm"; -import { toNeo4jInt } from "@bb/neo4j"; +import { toNeo4jInt } from "@bb/graph-db"; import { getLogger } from "@bb/logger"; import { CHANNEL_RUNNERS, diff --git a/packages/mongo/package.json b/packages/mongo/package.json index 54f058a..a68ed56 100644 --- a/packages/mongo/package.json +++ b/packages/mongo/package.json @@ -13,7 +13,9 @@ }, "dependencies": { "@bb/config": "workspace:*", + "@bb/db": "workspace:*", "@bb/errors": "workspace:*", + "@bb/db-core": "workspace:*", "@bb/types": "workspace:*", "mongodb": "^7.2.0" } diff --git a/packages/mongo/src/index.ts b/packages/mongo/src/index.ts index bcee0b0..80bc4f2 100644 --- a/packages/mongo/src/index.ts +++ b/packages/mongo/src/index.ts @@ -1,3 +1,5 @@ +import "./provider.ts"; + export { connectMongo, closeMongo, pingMongo } from "./client.ts"; export type { PingResult } from "./client.ts"; diff --git a/packages/mongo/src/provider.ts b/packages/mongo/src/provider.ts new file mode 100644 index 0000000..41fea8c --- /dev/null +++ b/packages/mongo/src/provider.ts @@ -0,0 +1,57 @@ +import { connectMongo, closeMongo, pingMongo } from "./client.ts"; +import * as knowledgeRepo from "./knowledge.ts"; +import * as rawRepo from "./raw.ts"; +import * as statsRepo from "./aggregateStats.ts"; +import * as activityRepo from "./activity.ts"; +import * as usageRepo from "./usage.ts"; + +import { registerDbProvider } from "@bb/db"; +import type { IDocumentDatabaseProvider } from "@bb/db-core"; + +class MongoDatabaseProvider implements IDocumentDatabaseProvider { + knowledge = { + setKnowledgeState: knowledgeRepo.setKnowledgeState, + setKnowledgeCommit: knowledgeRepo.setKnowledgeCommit, + setKnowledgeBranch: knowledgeRepo.setKnowledgeBranch, + updateKnowledgeProgress: knowledgeRepo.updateKnowledgeProgress, + upsertKnowledge: knowledgeRepo.upsertKnowledge, + deleteKnowledge: knowledgeRepo.deleteKnowledge, + listKnowledge: knowledgeRepo.listKnowledge, + getKnowledge: knowledgeRepo.getKnowledge, + markKnowledgeFailed: knowledgeRepo.markKnowledgeFailed, + }; + + raw = { + upsertRawFile: rawRepo.upsertRawFile, + listRawFileShas: rawRepo.listRawFileShas, + deleteRawFiles: rawRepo.deleteRawFiles, + }; + + stats = { + aggregateStats: statsRepo.aggregateStats, + }; + + activity = { + recordActivity: activityRepo.recordActivity, + }; + + usage = { + incrementUsage: usageRepo.incrementUsage, + getMonthlyUsage: usageRepo.getMonthlyUsage, + getGlobalUsage: usageRepo.getGlobalUsage, + }; + + async connect(): Promise { + await connectMongo(); + } + + async close(): Promise { + await closeMongo(); + } + + async ping() { + return pingMongo(); + } +} + +registerDbProvider("mongo", () => new MongoDatabaseProvider()); diff --git a/packages/neo4j/package.json b/packages/neo4j/package.json index 7e16617..707f06a 100644 --- a/packages/neo4j/package.json +++ b/packages/neo4j/package.json @@ -14,6 +14,8 @@ "dependencies": { "@bb/config": "workspace:*", "@bb/errors": "workspace:*", + "@bb/graph-db": "workspace:*", + "@bb/graph-core": "workspace:*", "@bb/types": "workspace:*", "@bb/mongo": "workspace:*", "neo4j-driver": "^6.0.1" diff --git a/packages/neo4j/src/index.ts b/packages/neo4j/src/index.ts index c581c80..7336d85 100644 --- a/packages/neo4j/src/index.ts +++ b/packages/neo4j/src/index.ts @@ -1,3 +1,5 @@ +import "./provider.ts"; + export { connectNeo4j, closeNeo4j, pingNeo4j } from "./client.ts"; export { _runCypher as runCypher, toNeo4jInt } from "./client.ts"; export type { PingResult } from "./client.ts"; diff --git a/packages/neo4j/src/knowledge.ts b/packages/neo4j/src/knowledge.ts index 8721817..2536987 100644 --- a/packages/neo4j/src/knowledge.ts +++ b/packages/neo4j/src/knowledge.ts @@ -28,6 +28,16 @@ MATCH (f:File {knowledgeId: $knowledgeId}) DETACH DELETE f `; +const DELETE_REPOS_BY_KNOWLEDGE = ` +MATCH (r:Repo {knowledgeId: $knowledgeId}) +DETACH DELETE r +`; + +const DELETE_FOLDERS_BY_KNOWLEDGE = ` +MATCH (folder:Folder {knowledgeId: $knowledgeId}) +DETACH DELETE folder +`; + const DELETE_KNOWLEDGE_NODE = ` MATCH (k:Knowledge {knowledgeId: $knowledgeId}) DETACH DELETE k @@ -52,7 +62,7 @@ const DELETE_ORPHAN_ENTITIES = ` MATCH (n) WHERE (n:Keyword OR n:Class OR n:Function OR n:Module) AND NOT EXISTS { MATCH (:File)-[]->(n) } -DELETE n +DETACH DELETE n `; export async function upsertKnowledgeNode(doc: KnowledgeDoc): Promise { @@ -89,6 +99,8 @@ export async function setKnowledgeBranchInGraph(knowledgeId: string, branch: str export async function deleteKnowledgeGraph(knowledgeId: string): Promise { await _runCypher(DELETE_FILES_BY_KNOWLEDGE, { knowledgeId }); + await _runCypher(DELETE_REPOS_BY_KNOWLEDGE, { knowledgeId }); + await _runCypher(DELETE_FOLDERS_BY_KNOWLEDGE, { knowledgeId }); await _runCypher(DELETE_ORPHAN_FILES); await _runCypher(DELETE_KNOWLEDGE_NODE, { knowledgeId }); await _runCypher(DELETE_ORPHAN_ENTITIES); diff --git a/packages/neo4j/src/provider.ts b/packages/neo4j/src/provider.ts new file mode 100644 index 0000000..113f438 --- /dev/null +++ b/packages/neo4j/src/provider.ts @@ -0,0 +1,63 @@ +import { connectNeo4j, closeNeo4j, pingNeo4j, _runCypher, toNeo4jInt } from "./client.ts"; +import * as knowledgeRepo from "./knowledge.ts"; +import * as filesRepo from "./files.ts"; +import * as fileVersionsRepo from "./fileVersions.ts"; +import * as folderRepo from "./folder.ts"; +import * as repoRepo from "./repo.ts"; +import * as indexRepo from "./indexes.ts"; +import * as flatFolderIndexRepo from "./flatFolderIndexes.ts"; + +import { registerGraphProvider } from "@bb/graph-db"; +import type { IGraphDatabaseProvider } from "@bb/graph-core"; + +class Neo4jGraphProvider implements IGraphDatabaseProvider { + knowledge = { + upsertKnowledgeNode: knowledgeRepo.upsertKnowledgeNode, + setKnowledgeStateInGraph: knowledgeRepo.setKnowledgeStateInGraph, + setKnowledgeBranchInGraph: knowledgeRepo.setKnowledgeBranchInGraph, + deleteKnowledgeGraph: knowledgeRepo.deleteKnowledgeGraph, + }; + + files = { + upsertFileNode: filesRepo.upsertFileNode, + deleteFileNodes: filesRepo.deleteFileNodes, + snapshotFilesToVersion: fileVersionsRepo.snapshotFilesToVersion, + upsertFileNodesBatch: filesRepo.upsertFileNodesBatch, + }; + + folders = { + upsertFolderNode: folderRepo.upsertFolderNode, + upsertFolderNodesBatch: folderRepo.upsertFolderNodesBatch, + }; + + repo = { + upsertRepoNode: repoRepo.upsertRepoNode, + }; + + indexes = { + ensureKnowledgeIndexes: indexRepo.ensureKnowledgeIndexes, + ensureFlatFolderIndexes: flatFolderIndexRepo.ensureFlatFolderIndexes, + }; + + async connect(): Promise { + await connectNeo4j(); + } + + async close(): Promise { + await closeNeo4j(); + } + + async ping() { + return pingNeo4j(); + } + + async runCypher(query: string, params?: Record): Promise { + return _runCypher(query, params); + } + + toNeo4jInt(value: number): unknown { + return toNeo4jInt(value); + } +} + +registerGraphProvider("neo4j", () => new Neo4jGraphProvider()); diff --git a/packages/queue/package.json b/packages/queue/package.json index 99230be..c6b2502 100644 --- a/packages/queue/package.json +++ b/packages/queue/package.json @@ -13,6 +13,7 @@ }, "dependencies": { "@bb/config": "workspace:*", + "@bb/db": "workspace:*", "@bb/errors": "workspace:*", "@bb/mongo": "workspace:*", "@bb/redis": "workspace:*", diff --git a/packages/queue/src/github-index.ts b/packages/queue/src/github-index.ts index 39439c8..81631ac 100644 --- a/packages/queue/src/github-index.ts +++ b/packages/queue/src/github-index.ts @@ -1,5 +1,5 @@ import { JobPriority, JobType, KnowledgeState, type GithubIndexPayload } from "@bb/types"; -import { setKnowledgeState } from "@bb/mongo"; +import { knowledgeDb } from "@bb/db"; import { _getQueue } from "./manager.ts"; import { buildJobMessage, dedupeKey, mapPriority } from "./envelope.ts"; @@ -12,7 +12,7 @@ export async function enqueueGithubIndex(payload: GithubIndexPayload, opts: Enqu const message = buildJobMessage(JobType.GithubIndex, priority, payload); const jobId = dedupeKey(JobType.GithubIndex, payload.knowledgeId); - await setKnowledgeState(payload.knowledgeId, KnowledgeState.Queued); + await knowledgeDb.setKnowledgeState(payload.knowledgeId, KnowledgeState.Queued); const queue = _getQueue(JobType.GithubIndex); await queue.add(JobType.GithubIndex, message, { diff --git a/packages/queue/src/github-pull.ts b/packages/queue/src/github-pull.ts index e3d0c03..05de7fe 100644 --- a/packages/queue/src/github-pull.ts +++ b/packages/queue/src/github-pull.ts @@ -1,5 +1,5 @@ import { JobPriority, JobType, KnowledgeState, type GithubPullPayload } from "@bb/types"; -import { setKnowledgeState } from "@bb/mongo"; +import { knowledgeDb } from "@bb/db"; import { _getQueue } from "./manager.ts"; import { buildJobMessage, dedupeKey, mapPriority } from "./envelope.ts"; import type { EnqueueOptions } from "./github-index.ts"; @@ -9,7 +9,7 @@ export async function enqueueGithubPull(payload: GithubPullPayload, opts: Enqueu const message = buildJobMessage(JobType.GithubPull, priority, payload); const jobId = dedupeKey(JobType.GithubPull, payload.knowledgeId); - await setKnowledgeState(payload.knowledgeId, KnowledgeState.Queued); + await knowledgeDb.setKnowledgeState(payload.knowledgeId, KnowledgeState.Queued); const queue = _getQueue(JobType.GithubPull); await queue.add(JobType.GithubPull, message, { diff --git a/packages/server/package.json b/packages/server/package.json index aa828af..08b77e0 100644 --- a/packages/server/package.json +++ b/packages/server/package.json @@ -16,10 +16,13 @@ }, "dependencies": { "@bb/config": "workspace:*", + "@bb/db": "workspace:*", "@bb/errors": "workspace:*", + "@bb/graph-db": "workspace:*", "@bb/ingest-github": "workspace:*", "@bb/mcp": "workspace:*", "@bb/mongo": "workspace:*", + "@bb/sqlite": "workspace:*", "@bb/neo4j": "workspace:*", "@bb/queue": "workspace:*", "@bb/redis": "workspace:*", diff --git a/packages/server/src/deleteRoute.ts b/packages/server/src/deleteRoute.ts index a35a63f..d32c66c 100644 --- a/packages/server/src/deleteRoute.ts +++ b/packages/server/src/deleteRoute.ts @@ -1,7 +1,7 @@ import type { Request, Response, Router } from "express"; import express from "express"; -import { deleteKnowledge } from "@bb/mongo"; -import { deleteKnowledgeGraph } from "@bb/neo4j"; +import { knowledgeDb } from "@bb/db"; +import { knowledgeGraph } from "@bb/graph-db"; import { removeKnowledgeJobs } from "@bb/queue"; import { KnowledgeNotFoundError } from "@bb/errors"; @@ -17,21 +17,21 @@ export function buildDeleteRoute(): Router { const removedJobs = await removeKnowledgeJobs(knowledgeId).catch(() => ({ removed: 0 })); try { - await deleteKnowledgeGraph(knowledgeId); + await knowledgeGraph.deleteKnowledgeGraph(knowledgeId); } catch (cause: unknown) { res.status(500).json({ error: `neo4j delete failed: ${describe(cause)}`, step: "neo4j" }); return; } - let mongoResult: Awaited>; + let mongoResult: Awaited>; try { - mongoResult = await deleteKnowledge(knowledgeId); + mongoResult = await knowledgeDb.deleteKnowledge(knowledgeId); } catch (cause: unknown) { if (cause instanceof KnowledgeNotFoundError) { res.status(404).json({ error: cause.message }); return; } - res.status(500).json({ error: `mongo delete failed: ${describe(cause)}`, step: "mongo" }); + res.status(500).json({ error: `database delete failed: ${describe(cause)}`, step: "database" }); return; } diff --git a/packages/server/src/githubCommitsRoute.ts b/packages/server/src/githubCommitsRoute.ts index 2bc909b..7a097cc 100644 --- a/packages/server/src/githubCommitsRoute.ts +++ b/packages/server/src/githubCommitsRoute.ts @@ -1,6 +1,6 @@ import type { Request, Response, Router } from "express"; import express from "express"; -import { getKnowledge } from "@bb/mongo"; +import { knowledgeDb } from "@bb/db"; import { fetchRecentCommits } from "@bb/ingest-github"; const DEFAULT_LIMIT = 200; @@ -40,7 +40,7 @@ export function buildGithubCommitsRoute(): Router { const limitRaw = req.query["limit"]; const limit = parseLimit(typeof limitRaw === "string" ? limitRaw : undefined); - const knowledge = await getKnowledge(knowledgeId); + const knowledge = await knowledgeDb.getKnowledge(knowledgeId); if (knowledge === null) { res.status(404).json({ error: "knowledge not found" }); return; diff --git a/packages/server/src/githubIndexRoute.ts b/packages/server/src/githubIndexRoute.ts index e92dde0..bf301c7 100644 --- a/packages/server/src/githubIndexRoute.ts +++ b/packages/server/src/githubIndexRoute.ts @@ -1,8 +1,8 @@ import type { Request, Response, Router } from "express"; import express from "express"; import { KnowledgeState, type KnowledgeDoc } from "@bb/types"; -import { upsertKnowledge } from "@bb/mongo"; -import { upsertKnowledgeNode } from "@bb/neo4j"; +import { knowledgeDb } from "@bb/db"; +import { knowledgeGraph } from "@bb/graph-db"; import { enqueueGithubIndex } from "@bb/queue"; interface IndexBody { @@ -37,8 +37,8 @@ export function buildGithubIndexRoute(): Router { createdAt: now, updatedAt: now, }; - await upsertKnowledge(doc); - await upsertKnowledgeNode(doc); + await knowledgeDb.upsertKnowledge(doc); + await knowledgeGraph.upsertKnowledgeNode(doc); const jobId = await enqueueGithubIndex({ knowledgeId, repoUrl, diff --git a/packages/server/src/githubPullRoute.ts b/packages/server/src/githubPullRoute.ts index 0b72a81..c92ddc3 100644 --- a/packages/server/src/githubPullRoute.ts +++ b/packages/server/src/githubPullRoute.ts @@ -1,6 +1,6 @@ import type { Request, Response, Router } from "express"; import express from "express"; -import { getKnowledge } from "@bb/mongo"; +import { knowledgeDb } from "@bb/db"; import { enqueueGithubPull } from "@bb/queue"; import { fetchLatestCommitHash } from "@bb/ingest-github"; @@ -54,7 +54,7 @@ export function buildGithubPullRoute(): Router { return; } - const knowledge = await getKnowledge(knowledgeId); + const knowledge = await knowledgeDb.getKnowledge(knowledgeId); if (knowledge === null) { res.status(404).json({ error: "knowledge not found" }); return; diff --git a/packages/server/src/healthRoute.ts b/packages/server/src/healthRoute.ts index 5b6f042..bb9431e 100644 --- a/packages/server/src/healthRoute.ts +++ b/packages/server/src/healthRoute.ts @@ -1,13 +1,13 @@ import type { Request, Response, Router } from "express"; import express from "express"; -import { pingMongo } from "@bb/mongo"; +import { pingDb } from "@bb/db"; import { pingRedis } from "@bb/redis"; -import { pingNeo4j } from "@bb/neo4j"; +import { pingGraph } from "@bb/graph-db"; export function buildHealthRoute(): Router { const router = express.Router(); router.get("/health", async (_req: Request, res: Response) => { - const [mongo, redis, neo4j] = await Promise.all([pingMongo(), pingRedis(), pingNeo4j()]); + const [mongo, redis, neo4j] = await Promise.all([pingDb(), pingRedis(), pingGraph()]); const ok = mongo.ok && redis.ok && neo4j.ok; res.status(ok ? 200 : 503).json({ status: ok ? "ok" : "down", mongo, redis, neo4j }); }); diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts index df7b9a2..9bdcc54 100755 --- a/packages/server/src/index.ts +++ b/packages/server/src/index.ts @@ -2,12 +2,15 @@ import { writeFile } from "node:fs/promises"; import path from "node:path"; import express from "express"; -import { Config, type Config as ConfigEnum } from "@bb/types"; +import { Config, DbProviderType, type Config as ConfigEnum } from "@bb/types"; import { getBytebellHome, getConfigValue, HINTS } from "@bb/config"; -import { connectMongo } from "@bb/mongo"; +import { connectDb } from "@bb/db"; import { connectRedis } from "@bb/redis"; -import { connectNeo4j, ensureKnowledgeIndexes } from "@bb/neo4j"; +import { connectGraph, indexesGraph } from "@bb/graph-db"; import { connectQueue } from "@bb/queue"; +import "@bb/mongo"; +import "@bb/sqlite"; +import "@bb/neo4j"; import { registerGithubWorkers, registerLocalIngestWorker } from "@bb/ingest-github"; import { ServerConfigError } from "@bb/errors"; import { registerRoutes } from "./routes.ts"; @@ -25,7 +28,17 @@ const REQUIRED: ConfigEnum[] = [ function checkRequiredConfig(): void { const missing: string[] = []; const hints: string[] = []; - for (const key of REQUIRED) { + const dbProvider = getConfigValue(Config.DbProvider); + + const required = [...REQUIRED]; + if (dbProvider !== DbProviderType.Mongo) { + const idx = required.indexOf(Config.MongoUri); + if (idx !== -1) { + required.splice(idx, 1); + } + } + + for (const key of required) { const value = getConfigValue(key); if (typeof value === "string" && value.length === 0) { missing.push(key); @@ -39,10 +52,14 @@ function checkRequiredConfig(): void { async function main(): Promise { checkRequiredConfig(); - await connectMongo(); + const dbProvider = getConfigValue(Config.DbProvider); + await connectDb(dbProvider); + await connectRedis(); - await connectNeo4j(); - await ensureKnowledgeIndexes(); + + const graphProvider = getConfigValue(Config.GraphProvider); + await connectGraph(graphProvider); + await indexesGraph.ensureKnowledgeIndexes(); await connectQueue(); registerGithubWorkers(); registerLocalIngestWorker(); diff --git a/packages/server/src/localIndexRoute.ts b/packages/server/src/localIndexRoute.ts index 326185e..1a7da95 100644 --- a/packages/server/src/localIndexRoute.ts +++ b/packages/server/src/localIndexRoute.ts @@ -4,8 +4,8 @@ import { stat, mkdir } from "node:fs/promises"; import path from "node:path"; import { KnowledgeState, type KnowledgeDoc } from "@bb/types"; import { getBytebellHome } from "@bb/config"; -import { upsertKnowledge } from "@bb/mongo"; -import { upsertKnowledgeNode } from "@bb/neo4j"; +import { knowledgeDb } from "@bb/db"; +import { knowledgeGraph } from "@bb/graph-db"; import { enqueueLocalIngest } from "@bb/queue"; import { copyRepo } from "./copyRepo.ts"; @@ -53,8 +53,8 @@ export function buildLocalIndexRoute(): Router { createdAt: now, updatedAt: now, }; - await upsertKnowledge(doc); - await upsertKnowledgeNode(doc); + await knowledgeDb.upsertKnowledge(doc); + await knowledgeGraph.upsertKnowledgeNode(doc); const jobId = await enqueueLocalIngest({ knowledgeId, rootDir: destDir }); res.status(200).json({ knowledgeId, jobId }); }); diff --git a/packages/server/src/mcpStatsRoute.ts b/packages/server/src/mcpStatsRoute.ts index 7bbf1c1..b85ad69 100644 --- a/packages/server/src/mcpStatsRoute.ts +++ b/packages/server/src/mcpStatsRoute.ts @@ -1,6 +1,6 @@ import type { Request, Response, Router } from "express"; import express from "express"; -import { getGlobalUsage, getMonthlyUsage } from "@bb/mongo"; +import { usageDb } from "@bb/db"; export function buildMcpStatsRoute(): Router { const router = express.Router(); @@ -11,7 +11,10 @@ export function buildMcpStatsRoute(): Router { const year = now.getUTCFullYear(); const month = now.getUTCMonth() + 1; - const [globalStats, monthlyStats] = await Promise.all([getGlobalUsage(), getMonthlyUsage(year, month)]); + const [globalStats, monthlyStats] = await Promise.all([ + usageDb.getGlobalUsage(), + usageDb.getMonthlyUsage(year, month), + ]); res.status(200).json({ global: globalStats[0] || { diff --git a/packages/server/src/reposRoute.ts b/packages/server/src/reposRoute.ts index 3343e42..b2e29f9 100644 --- a/packages/server/src/reposRoute.ts +++ b/packages/server/src/reposRoute.ts @@ -1,18 +1,25 @@ import type { Request, Response, Router } from "express"; import express from "express"; -import { getKnowledge, listKnowledge } from "@bb/mongo"; +import { knowledgeDb } from "@bb/db"; export function buildReposRoute(): Router { const router = express.Router(); router.get("/api/v1/repos", async (_req: Request, res: Response) => { - const entries = await listKnowledge(); + const entries = await knowledgeDb.listKnowledge(); const repos = entries.map((e) => ({ knowledgeId: e.knowledgeId, - source: e.source, + source: + e.source.kind === "github" + ? { + ...e.source, + repoUrl: e.info?.repoUrl, + branch: e.info?.branch, + } + : e.source, state: e.status.state, createdAt: e.createdAt instanceof Date ? e.createdAt.toISOString() : new Date(e.createdAt).toISOString(), updatedAt: e.updatedAt instanceof Date ? e.updatedAt.toISOString() : new Date(e.updatedAt).toISOString(), - fileCount: e.fileCount, + fileCount: e.status.totalFiles ?? e.fileCount, })); res.status(200).json({ repos }); }); @@ -23,20 +30,27 @@ export function buildReposRoute(): Router { res.status(400).json({ error: "invalid id" }); return; } - const entry = await getKnowledge(id); + const entry = await knowledgeDb.getKnowledge(id); if (entry === null) { res.status(404).json({ error: "knowledge not found" }); return; } res.status(200).json({ knowledgeId: entry.knowledgeId, - source: entry.source, + source: + entry.source.kind === "github" + ? { + ...entry.source, + repoUrl: entry.info?.repoUrl, + branch: entry.info?.branch, + } + : entry.source, state: entry.status.state, createdAt: entry.createdAt instanceof Date ? entry.createdAt.toISOString() : new Date(entry.createdAt).toISOString(), updatedAt: entry.updatedAt instanceof Date ? entry.updatedAt.toISOString() : new Date(entry.updatedAt).toISOString(), - fileCount: entry.fileCount, + fileCount: entry.status.totalFiles ?? entry.fileCount, totalFiles: entry.status.totalFiles, processedFiles: entry.status.processedFiles, }); diff --git a/packages/server/src/shutdown.ts b/packages/server/src/shutdown.ts index f1ff9e0..179a1d1 100644 --- a/packages/server/src/shutdown.ts +++ b/packages/server/src/shutdown.ts @@ -1,8 +1,8 @@ import { unlink } from "node:fs/promises"; import path from "node:path"; -import { closeMongo } from "@bb/mongo"; +import { closeDb } from "@bb/db"; import { closeRedis } from "@bb/redis"; -import { closeNeo4j } from "@bb/neo4j"; +import { closeGraph } from "@bb/graph-db"; import { closeQueue } from "@bb/queue"; import { closeAllMcpSessions } from "@bb/mcp"; import { getBytebellHome } from "@bb/config"; @@ -29,8 +29,8 @@ async function shutdown(signal: string): Promise { await closeAllMcpSessions(); await closeQueue(); await closeRedis(); - await closeNeo4j(); - await closeMongo(); + await closeGraph(); + await closeDb(); await unlink(path.join(getBytebellHome(), "pid")).catch(() => undefined); } catch (cause: unknown) { process.stderr.write(`Shutdown error: ${cause instanceof Error ? cause.message : String(cause)}\n`); diff --git a/packages/server/src/statsRoute.ts b/packages/server/src/statsRoute.ts index 2525b9e..54c63fd 100644 --- a/packages/server/src/statsRoute.ts +++ b/packages/server/src/statsRoute.ts @@ -1,11 +1,11 @@ import type { Request, Response, Router } from "express"; import express from "express"; -import { aggregateStats } from "@bb/mongo"; +import { statsDb } from "@bb/db"; export function buildStatsRoute(): Router { const router = express.Router(); router.get("/api/v1/stats", async (_req: Request, res: Response) => { - const stats = await aggregateStats(); + const stats = await statsDb.aggregateStats(); res.status(200).json(stats); }); return router; diff --git a/packages/sqlite/README.md b/packages/sqlite/README.md new file mode 100644 index 0000000..8dfa4b6 --- /dev/null +++ b/packages/sqlite/README.md @@ -0,0 +1,31 @@ +# `@bb/sqlite` + +SQLite implementation of the `IDocumentDatabaseProvider` interface. + +## Responsibilities + +Stores knowledge entries, raw file documents, activity logs, and usage records in a local SQLite database (via `bun:sqlite`). Registers itself as the `"sqlite"` provider with `@bb/db` at import time. + +## Public Interfaces + +- `connectSqlite()`, `closeSqlite()`, `pingSqlite()` — lifecycle and health probe +- Knowledge CRUD: `setKnowledgeState`, `setKnowledgeCommit`, `setKnowledgeBranch`, `updateKnowledgeProgress`, `upsertKnowledge`, `deleteKnowledge`, `listKnowledge`, `getKnowledge`, `markKnowledgeFailed` +- Raw files: `upsertRawFile`, `listRawFileShas`, `deleteRawFiles` +- Stats: `aggregateStats` +- Activity: `recordActivity` +- Usage: `incrementUsage`, `getMonthlyUsage`, `getGlobalUsage` + +## Data Ownership + +Owns a single SQLite file at the path configured by `Config.SqlitePath` (defaults to `~/.bytebell/data.sqlite`). Tables: `knowledge`, `raw_files`, `activity`, `usage`. + +## Invariants + +- Knowledge entries stored as JSON blobs keyed by `knowledgeId` +- Raw files keyed by `knowledgeId:relativePath` with a `knowledgeId` index +- WAL journal mode for concurrent read performance +- Foreign keys enforced + +## Tier + +Infrastructure (implements `@bb/db-core` interfaces, consumed via `@bb/db`) diff --git a/packages/sqlite/package.json b/packages/sqlite/package.json new file mode 100644 index 0000000..0bb6d3d --- /dev/null +++ b/packages/sqlite/package.json @@ -0,0 +1,21 @@ +{ + "name": "@bb/sqlite", + "version": "0.0.0", + "private": true, + "type": "module", + "main": "./src/index.ts", + "types": "./src/index.ts", + "exports": { + ".": "./src/index.ts" + }, + "imports": { + "#src/*": "./src/*" + }, + "dependencies": { + "@bb/config": "workspace:*", + "@bb/db": "workspace:*", + "@bb/errors": "workspace:*", + "@bb/db-core": "workspace:*", + "@bb/types": "workspace:*" + } +} diff --git a/packages/sqlite/src/README.md b/packages/sqlite/src/README.md new file mode 100644 index 0000000..c656d31 --- /dev/null +++ b/packages/sqlite/src/README.md @@ -0,0 +1,13 @@ +# `@bb/sqlite/src` + +SQLite implementation of the document database provider. + +## Files + +- **client.ts** — module-scoped `bun:sqlite` `Database` singleton, lifecycle (`connectSqlite`, `closeSqlite`), health probe (`pingSqlite`), schema initialization (tables: `knowledge`, `raw_files`, `activity`, `usage`) +- **knowledge.ts** — knowledge CRUD via JSON blobs: state transitions, commit tracking, progress updates, list/get/delete +- **raw.ts** — raw file upsert (keyed by `knowledgeId:relativePath`), SHA listing, batch delete +- **provider.ts** — `SqliteDatabaseProvider` class wiring all repositories; calls `registerDbProvider("sqlite", ...)` at import time +- **activity.ts** — activity record persistence +- **usage.ts** — token usage increment and aggregation +- **aggregateStats.ts** — global stats aggregation across tables diff --git a/packages/sqlite/src/activity.ts b/packages/sqlite/src/activity.ts new file mode 100644 index 0000000..aa15c87 --- /dev/null +++ b/packages/sqlite/src/activity.ts @@ -0,0 +1,18 @@ +import type { ActivityInput } from "@bb/types"; +import { getSqliteDb } from "./client.ts"; + +export async function recordActivity(input: ActivityInput): Promise { + const { response, ...rest } = input; + const db = getSqliteDb(); + const doc = { + identityId: rest.identityId, + toolName: rest.toolName, + query: rest.query, + responseSnippet: response.slice(0, 500), + durationMs: rest.durationMs, + tokens_input: rest.tokens.input, + tokens_output: rest.tokens.output, + createdAt: new Date().toISOString(), + }; + db.run("INSERT INTO activity (value) VALUES (?)", [JSON.stringify(doc)]); +} diff --git a/packages/sqlite/src/aggregateStats.ts b/packages/sqlite/src/aggregateStats.ts new file mode 100644 index 0000000..136311e --- /dev/null +++ b/packages/sqlite/src/aggregateStats.ts @@ -0,0 +1,146 @@ +import type { StatsCommitEntry, StatsRepoEntry, StatsResponse, StatsTotals } from "@bb/types"; +import type { KnowledgeDoc } from "@bb/types"; +import { getSqliteDb } from "./client.ts"; + +interface CommitHashRecord { + hash: string; + inputTokens: string; + outputTokens: string; + costUsd: string; +} + +export async function aggregateStats(): Promise { + const db = getSqliteDb(); + const rows = db.query("SELECT value FROM knowledge ORDER BY json_extract(value, '$.updatedAt') DESC").all() as { + value: string; + }[]; + + const repos: StatsRepoEntry[] = []; + const commitStats: StatsCommitEntry[] = []; + let totalInputTokens = 0; + let totalOutputTokens = 0; + let totalCost = 0; + let totalFiles = 0; + + for (const row of rows) { + const doc = JSON.parse(row.value) as KnowledgeDoc; + const knowledgeId = doc.knowledgeId; + const source = doc.source; + const info = doc.info; + + const fileCountRow = db.query("SELECT COUNT(*) as count FROM raw_files WHERE knowledgeId = ?").get(knowledgeId) as { + count: number; + }; + const fileCount = fileCountRow.count; + + const commits = pickCommits(source); + const repoName = deriveRepoName(source, info); + const type = source.kind === "github" ? ("GITHUB" as const) : ("LOCAL" as const); + + let repoIn = 0; + let repoOut = 0; + let repoCost = 0; + for (const c of commits) { + const inT = parseNumber(c.inputTokens); + const outT = parseNumber(c.outputTokens); + const cost = parseNumber(c.costUsd); + repoIn += inT; + repoOut += outT; + repoCost += cost; + commitStats.push({ + knowledgeId, + repoName, + commitHash: c.hash, + inputTokens: inT, + outputTokens: outT, + estimatedCost: cost, + totalBatches: 0, + processingTimeMs: 0, + totalFiles: fileCount, + totalFolders: 0, + filesAnalyzed: fileCount, + createdAt: "", + updatedAt: "", + }); + } + + repos.push({ + knowledgeId, + repoName, + type, + fileCount, + folderCount: 0, + inputTokens: repoIn, + outputTokens: repoOut, + estimatedCost: repoCost, + }); + + totalInputTokens += repoIn; + totalOutputTokens += repoOut; + totalCost += repoCost; + totalFiles += fileCount; + } + + const totals: StatsTotals = { + totalRepos: rows.length, + totalFiles, + totalFolders: 0, + totalInputTokens, + totalOutputTokens, + totalEstimatedCost: Math.round(totalCost * 1_000_000) / 1_000_000, + }; + + return { totals, repos, commitStats }; +} + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +function pickCommits(source: any): CommitHashRecord[] { + const raw = source?.commitHashes; + if (!Array.isArray(raw)) { + return []; + } + const out: CommitHashRecord[] = []; + for (const entry of raw) { + if (typeof entry !== "object" || entry === null) { + continue; + } + const rec = entry as Partial; + if (typeof rec.hash !== "string") { + continue; + } + out.push({ + hash: rec.hash, + inputTokens: typeof rec.inputTokens === "string" ? rec.inputTokens : "0", + outputTokens: typeof rec.outputTokens === "string" ? rec.outputTokens : "0", + costUsd: typeof rec.costUsd === "string" ? rec.costUsd : "0", + }); + } + return out; +} + +function parseNumber(value: string): number { + const n = Number.parseFloat(value); + return Number.isFinite(n) ? n : 0; +} + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +function deriveRepoName(source: any, info: any): string { + if (source.kind === "local") { + const segments = source.sourcePath.split("/").filter((s: string) => s.length > 0); + return segments.at(-1) ?? source.sourcePath; + } + try { + const segments = new URL(info.repoUrl ?? "").pathname + .split("/") + .map((s) => s.trim()) + .filter((s) => s.length > 0); + const repo = segments.at(-1)?.replace(/\.git$/u, ""); + const owner = segments.at(-2); + if (owner !== undefined && repo !== undefined) { + return `${owner}/${repo}`; + } + } catch { + // fall through + } + return info.repoUrl ?? ""; +} diff --git a/packages/sqlite/src/client.ts b/packages/sqlite/src/client.ts new file mode 100644 index 0000000..ac55874 --- /dev/null +++ b/packages/sqlite/src/client.ts @@ -0,0 +1,86 @@ +import { Database } from "bun:sqlite"; +import path from "node:path"; +import fs from "node:fs"; +import { getConfigValue, getBytebellHome } from "@bb/config"; +import { Config } from "@bb/types"; + +let db: Database | null = null; +let dbPath: string = ""; + +export async function connectSqlite(): Promise { + if (db !== null) { + return; + } + + let sqlitePath = getConfigValue(Config.SqlitePath); + if (!sqlitePath || sqlitePath.length === 0) { + sqlitePath = path.join(getBytebellHome(), "data.sqlite"); + } + + dbPath = sqlitePath; + const dir = path.dirname(dbPath); + if (!fs.existsSync(dir)) { + fs.mkdirSync(dir, { recursive: true }); + } + + db = new Database(dbPath); + db.run("PRAGMA journal_mode = WAL;"); + db.run("PRAGMA foreign_keys = ON;"); + + // Initialize tables + db.run(` + CREATE TABLE IF NOT EXISTS knowledge ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL + ); + `); + + db.run(` + CREATE TABLE IF NOT EXISTS raw_files ( + key TEXT PRIMARY KEY, + knowledgeId TEXT NOT NULL, + value TEXT NOT NULL + ); + `); + + db.run(`CREATE INDEX IF NOT EXISTS idx_raw_files_knowledgeId ON raw_files(knowledgeId);`); + + db.run(` + CREATE TABLE IF NOT EXISTS activity ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + value TEXT NOT NULL + ); + `); + + db.run(` + CREATE TABLE IF NOT EXISTS usage ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL + ); + `); +} + +export async function closeSqlite(): Promise { + if (db !== null) { + db.close(); + db = null; + } +} + +export function getSqliteDb(): Database { + if (db === null) { + throw new Error("SQLite Database not connected. Call connectSqlite() first."); + } + return db; +} + +export async function pingSqlite(): Promise<{ ok: boolean; latencyMs: number }> { + const start = performance.now(); + try { + const database = getSqliteDb(); + database.run("SELECT 1;"); + return { ok: true, latencyMs: Math.round(performance.now() - start) }; + } catch { + return { ok: false, latencyMs: Math.round(performance.now() - start) }; + } +} diff --git a/packages/sqlite/src/index.ts b/packages/sqlite/src/index.ts new file mode 100644 index 0000000..d655704 --- /dev/null +++ b/packages/sqlite/src/index.ts @@ -0,0 +1,23 @@ +import "./provider.ts"; + +export { connectSqlite, closeSqlite, pingSqlite } from "./client.ts"; + +export { + getKnowledge, + setKnowledgeCommit, + setKnowledgeState, + markKnowledgeFailed, + setKnowledgeBranch, + updateKnowledgeProgress, + upsertKnowledge, + listKnowledge, + deleteKnowledge, +} from "./knowledge.ts"; +export type { KnowledgeListEntry, DeleteKnowledgeResult } from "./knowledge.ts"; + +export { upsertRawFile, listRawFileShas, deleteRawFiles } from "./raw.ts"; + +export { aggregateStats } from "./aggregateStats.ts"; + +export { incrementUsage, getMonthlyUsage, getGlobalUsage } from "./usage.ts"; +export { recordActivity } from "./activity.ts"; diff --git a/packages/sqlite/src/knowledge.ts b/packages/sqlite/src/knowledge.ts new file mode 100644 index 0000000..2bbdfee --- /dev/null +++ b/packages/sqlite/src/knowledge.ts @@ -0,0 +1,181 @@ +import type { KnowledgeDoc, KnowledgeFailureCategory, KnowledgeState } from "@bb/types"; +import { KnowledgeNotFoundError } from "@bb/errors"; +import { getSqliteDb } from "./client.ts"; + +export interface KnowledgeListEntry extends KnowledgeDoc { + fileCount: number; +} + +export interface DeleteKnowledgeResult { + knowledgeDeleted: number; + rawDeleted: number; +} + +export async function setKnowledgeState(knowledgeId: string, state: KnowledgeState): Promise { + const db = getSqliteDb(); + const now = new Date().toISOString(); + const result = db.run( + "UPDATE knowledge SET value = json_remove(json_set(value, '$.status.state', ?, '$.updatedAt', ?), '$.failure') WHERE key = ?", + [state, now, knowledgeId], + ); + if (result.changes === 0) { + throw new KnowledgeNotFoundError(knowledgeId); + } +} + +export async function markKnowledgeFailed( + knowledgeId: string, + reason: string, + category: KnowledgeFailureCategory, + detail?: string, +): Promise { + const db = getSqliteDb(); + const now = new Date().toISOString(); + const failure = { + reason, + category, + at: now, + detail: detail || undefined, + }; + const result = db.run( + "UPDATE knowledge SET value = json_set(value, '$.status.state', 'FAILED', '$.updatedAt', ?, '$.failure', json(?)) WHERE key = ?", + [now, JSON.stringify(failure), knowledgeId], + ); + if (result.changes === 0) { + throw new KnowledgeNotFoundError(knowledgeId); + } +} + +export async function setKnowledgeCommit( + knowledgeId: string, + commitHash: string, + inputTokens: string = "", + outputTokens: string = "", + costUsd: string = "0", +): Promise { + const db = getSqliteDb(); + const row = db.query("SELECT value FROM knowledge WHERE key = ?").get(knowledgeId) as { + value: string; + } | null; + if (!row) { + throw new KnowledgeNotFoundError(knowledgeId); + } + const doc = JSON.parse(row.value) as KnowledgeDoc; + const source = doc.source as { commitId?: string; commitHashes?: unknown[] }; + source.commitId = commitHash; + if (!source.commitHashes) { + source.commitHashes = []; + } + const exists = source.commitHashes.some((c: unknown) => + typeof c === "string" ? c === commitHash : (c as { hash?: string }).hash === commitHash, + ); + if (!exists) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + source.commitHashes.push({ hash: commitHash, inputTokens, outputTokens, costUsd } as any); + } + doc.updatedAt = new Date(); + db.run("UPDATE knowledge SET value = ? WHERE key = ?", [JSON.stringify(doc), knowledgeId]); +} + +export async function setKnowledgeBranch(knowledgeId: string, branch: string): Promise { + const db = getSqliteDb(); + const now = new Date().toISOString(); + const result = db.run( + "UPDATE knowledge SET value = json_set(value, '$.source.branch', ?, '$.updatedAt', ?) WHERE key = ?", + [branch, now, knowledgeId], + ); + if (result.changes === 0) { + throw new KnowledgeNotFoundError(knowledgeId); + } +} + +export async function updateKnowledgeProgress( + knowledgeId: string, + processedFiles: number, + totalFiles?: number, +): Promise { + const db = getSqliteDb(); + const now = new Date().toISOString(); + let result; + if (totalFiles !== undefined) { + result = db.run( + "UPDATE knowledge SET value = json_set(value, '$.status.processedFiles', ?, '$.status.totalFiles', ?, '$.updatedAt', ?) WHERE key = ?", + [processedFiles, totalFiles, now, knowledgeId], + ); + } else { + result = db.run( + "UPDATE knowledge SET value = json_set(value, '$.status.processedFiles', ?, '$.updatedAt', ?) WHERE key = ?", + [processedFiles, now, knowledgeId], + ); + } + if (result.changes === 0) { + throw new KnowledgeNotFoundError(knowledgeId); + } +} + +export async function upsertKnowledge(doc: Omit & { updatedAt?: Date }): Promise { + const now = new Date(); + const db = getSqliteDb(); + const finalDoc: KnowledgeDoc = { + ...doc, + updatedAt: doc.updatedAt ?? now, + }; + db.run("INSERT INTO knowledge (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value = excluded.value", [ + doc.knowledgeId, + JSON.stringify(finalDoc), + ]); +} + +export async function deleteKnowledge(knowledgeId: string): Promise { + const db = getSqliteDb(); + const res1 = db.run("DELETE FROM knowledge WHERE key = ?", [knowledgeId]); + if (res1.changes === 0) { + throw new KnowledgeNotFoundError(knowledgeId); + } + const res2 = db.run("DELETE FROM raw_files WHERE knowledgeId = ?", [knowledgeId]); + return { + knowledgeDeleted: res1.changes, + rawDeleted: res2.changes, + }; +} + +export async function listKnowledge(opts: { limit?: number } = {}): Promise { + const db = getSqliteDb(); + const limit = opts.limit ?? 200; + const rows = db + .query("SELECT value FROM knowledge ORDER BY json_extract(value, '$.updatedAt') DESC LIMIT ?") + .all(limit) as { value: string }[]; + + const entries: KnowledgeListEntry[] = []; + for (const row of rows) { + const doc = JSON.parse(row.value) as KnowledgeDoc; + const fileCountRow = db + .query("SELECT COUNT(*) as count FROM raw_files WHERE knowledgeId = ?") + .get(doc.knowledgeId) as { count: number }; + entries.push({ + ...doc, + createdAt: new Date(doc.createdAt), + updatedAt: new Date(doc.updatedAt), + fileCount: fileCountRow.count, + }); + } + return entries; +} + +export async function getKnowledge(knowledgeId: string): Promise { + const db = getSqliteDb(); + const row = db.query("SELECT value FROM knowledge WHERE key = ?").get(knowledgeId) as { value: string } | null; + if (!row) { + return null; + } + const doc = JSON.parse(row.value) as KnowledgeDoc; + const fileCountRow = db.query("SELECT COUNT(*) as count FROM raw_files WHERE knowledgeId = ?").get(knowledgeId) as { + count: number; + }; + return { + ...doc, + createdAt: new Date(doc.createdAt), + updatedAt: new Date(doc.updatedAt), + fileCount: fileCountRow.count, + }; +} diff --git a/packages/sqlite/src/provider.ts b/packages/sqlite/src/provider.ts new file mode 100644 index 0000000..9c13f13 --- /dev/null +++ b/packages/sqlite/src/provider.ts @@ -0,0 +1,57 @@ +import { connectSqlite, closeSqlite, pingSqlite } from "./client.ts"; +import * as knowledgeRepo from "./knowledge.ts"; +import * as rawRepo from "./raw.ts"; +import * as statsRepo from "./aggregateStats.ts"; +import * as activityRepo from "./activity.ts"; +import * as usageRepo from "./usage.ts"; + +import { registerDbProvider } from "@bb/db"; +import type { IDocumentDatabaseProvider } from "@bb/db-core"; + +class SqliteDatabaseProvider implements IDocumentDatabaseProvider { + knowledge = { + setKnowledgeState: knowledgeRepo.setKnowledgeState, + setKnowledgeCommit: knowledgeRepo.setKnowledgeCommit, + setKnowledgeBranch: knowledgeRepo.setKnowledgeBranch, + updateKnowledgeProgress: knowledgeRepo.updateKnowledgeProgress, + upsertKnowledge: knowledgeRepo.upsertKnowledge, + deleteKnowledge: knowledgeRepo.deleteKnowledge, + listKnowledge: knowledgeRepo.listKnowledge, + getKnowledge: knowledgeRepo.getKnowledge, + markKnowledgeFailed: knowledgeRepo.markKnowledgeFailed, + }; + + raw = { + upsertRawFile: rawRepo.upsertRawFile, + listRawFileShas: rawRepo.listRawFileShas, + deleteRawFiles: rawRepo.deleteRawFiles, + }; + + stats = { + aggregateStats: statsRepo.aggregateStats, + }; + + activity = { + recordActivity: activityRepo.recordActivity, + }; + + usage = { + incrementUsage: usageRepo.incrementUsage, + getMonthlyUsage: usageRepo.getMonthlyUsage, + getGlobalUsage: usageRepo.getGlobalUsage, + }; + + async connect(): Promise { + await connectSqlite(); + } + + async close(): Promise { + await closeSqlite(); + } + + async ping() { + return pingSqlite(); + } +} + +registerDbProvider("sqlite", () => new SqliteDatabaseProvider()); diff --git a/packages/sqlite/src/raw.ts b/packages/sqlite/src/raw.ts new file mode 100644 index 0000000..2796f10 --- /dev/null +++ b/packages/sqlite/src/raw.ts @@ -0,0 +1,41 @@ +import { getSqliteDb } from "./client.ts"; +import type { RawFileDoc } from "@bb/db-core"; + +export async function upsertRawFile(doc: Omit): Promise { + const db = getSqliteDb(); + const key = `${doc.knowledgeId}:${doc.relativePath}`; + const finalDoc: RawFileDoc = { + ...doc, + updatedAt: new Date(), + }; + db.run( + "INSERT INTO raw_files (key, knowledgeId, value) VALUES (?, ?, ?) ON CONFLICT(key) DO UPDATE SET value = excluded.value", + [key, doc.knowledgeId, JSON.stringify(finalDoc)], + ); +} + +export async function listRawFileShas(knowledgeId: string): Promise> { + const db = getSqliteDb(); + const rows = db + .query( + "SELECT json_extract(value, '$.relativePath') as relativePath, json_extract(value, '$.sha') as sha FROM raw_files WHERE knowledgeId = ?", + ) + .all(knowledgeId) as { relativePath: string; sha: string }[]; + + const map = new Map(); + for (const row of rows) { + map.set(row.relativePath, row.sha); + } + return map; +} + +export async function deleteRawFiles(knowledgeId: string, relativePaths: string[]): Promise { + if (relativePaths.length === 0) { + return 0; + } + const db = getSqliteDb(); + const keys = relativePaths.map((p) => `${knowledgeId}:${p}`); + const placeholders = keys.map(() => "?").join(","); + const result = db.run(`DELETE FROM raw_files WHERE key IN (${placeholders})`, keys); + return result.changes; +} diff --git a/packages/sqlite/src/usage.ts b/packages/sqlite/src/usage.ts new file mode 100644 index 0000000..6711af3 --- /dev/null +++ b/packages/sqlite/src/usage.ts @@ -0,0 +1,89 @@ +import { getSqliteDb } from "./client.ts"; + +export async function incrementUsage( + identityId: string, + inputTokenCount: number = 0, + outputTokenCount: number = 0, +): Promise { + const db = getSqliteDb(); + const now = new Date().toISOString(); + const year = new Date().getUTCFullYear(); + const month = new Date().getUTCMonth() + 1; + const key = `${identityId}:${year}:${month}`; + + const row = db.query("SELECT value FROM usage WHERE key = ?").get(key) as { value: string } | null; + if (row) { + const doc = JSON.parse(row.value); + doc.requestCount += 1; + doc.inputTokens += inputTokenCount; + doc.outputTokens += outputTokenCount; + doc.tokensUsed += inputTokenCount + outputTokenCount; + doc.lastUpdated = now; + db.run("UPDATE usage SET value = ? WHERE key = ?", [JSON.stringify(doc), key]); + } else { + const doc = { + identityId, + year, + month, + requestCount: 1, + inputTokens: inputTokenCount, + outputTokens: outputTokenCount, + tokensUsed: inputTokenCount + outputTokenCount, + lastUpdated: now, + createdAt: now, + }; + db.run("INSERT INTO usage (key, value) VALUES (?, ?)", [key, JSON.stringify(doc)]); + } +} + +export async function getMonthlyUsage(year: number, month: number): Promise { + const db = getSqliteDb(); + const rows = db + .query("SELECT value FROM usage WHERE json_extract(value, '$.year') = ? AND json_extract(value, '$.month') = ?") + .all(year, month) as { value: string }[]; + return rows.map((r) => { + const doc = JSON.parse(r.value); + return { + identityId: doc.identityId, + year: doc.year, + month: doc.month, + requestCount: doc.requestCount, + inputTokens: doc.inputTokens, + outputTokens: doc.outputTokens, + tokensUsed: doc.tokensUsed, + lastUpdated: new Date(doc.lastUpdated), + createdAt: new Date(doc.createdAt), + }; + }); +} + +export async function getGlobalUsage(): Promise { + const db = getSqliteDb(); + const row = db + .query( + `SELECT + SUM(json_extract(value, '$.requestCount')) as totalRequests, + SUM(json_extract(value, '$.inputTokens')) as totalInputTokens, + SUM(json_extract(value, '$.outputTokens')) as totalOutputTokens, + SUM(json_extract(value, '$.tokensUsed')) as totalTokens + FROM usage`, + ) + .get() as { + totalRequests: number | null; + totalInputTokens: number | null; + totalOutputTokens: number | null; + totalTokens: number | null; + } | null; + if (!row || row.totalRequests === null) { + return []; + } + return [ + { + _id: null, + totalRequests: row.totalRequests ?? 0, + totalInputTokens: row.totalInputTokens ?? 0, + totalOutputTokens: row.totalOutputTokens ?? 0, + totalTokens: row.totalTokens ?? 0, + }, + ]; +} diff --git a/packages/sqlite/tsconfig.json b/packages/sqlite/tsconfig.json new file mode 100644 index 0000000..4ed0786 --- /dev/null +++ b/packages/sqlite/tsconfig.json @@ -0,0 +1,4 @@ +{ + "extends": "../../tsconfig.base.json", + "include": ["src/**/*.ts", "src/**/*.tsx", "src/**/*.json"] +} diff --git a/packages/types/src/analysis.ts b/packages/types/src/analysis.ts new file mode 100644 index 0000000..222adfb --- /dev/null +++ b/packages/types/src/analysis.ts @@ -0,0 +1,36 @@ +export interface FileAnalysisSection { + name: string; + description: string; +} + +export interface FileAnalysis { + purpose: string; + summary: string; + businessContext: string; + classes: string[]; + functions: string[]; + importsInternal: string[]; + importsExternal: string[]; + keywords: string[]; + ontologyConcepts?: string[]; + businessEntities?: string[]; + systemCapabilities?: string[]; + sideEffects?: string[]; + configDependencies?: string[]; + dataFlowDirection?: string; + integrationSurface?: string[]; + contractsProvided?: string[]; + contractsConsumed?: string[]; + sectionMap?: FileAnalysisSection[]; +} + +export interface RawFileDoc { + knowledgeId: string; + relativePath: string; + content: string; + sha: string; + sizeBytes: number; + language: string; + analysis: FileAnalysis; + updatedAt: Date; +} diff --git a/packages/types/src/config.ts b/packages/types/src/config.ts index c878718..0a0e780 100644 --- a/packages/types/src/config.ts +++ b/packages/types/src/config.ts @@ -35,4 +35,17 @@ export enum Config { SkipDecisionEnabled = "skip.decision.enabled", SkipDecisionMaxCharsForLlm = "skip.decision.max.chars.for.llm", SkipDecisionCachePath = "skip.decision.cache.path", + DbProvider = "db_provider", + GraphProvider = "graph_provider", + SqlitePath = "sqlite_path", +} + +export enum DbProviderType { + Sqlite = "sqlite", + Mongo = "mongo", +} + +export enum GraphProviderType { + Neo4j = "neo4j", + Ladybug = "ladybug", } diff --git a/packages/types/src/database.ts b/packages/types/src/database.ts new file mode 100644 index 0000000..4cd4985 --- /dev/null +++ b/packages/types/src/database.ts @@ -0,0 +1,10 @@ +export interface DeleteKnowledgeResult { + knowledgeDeleted: number; + rawDeleted: number; + statsDeleted?: number; +} + +export interface DbPingResult { + ok: boolean; + latencyMs: number; +} diff --git a/packages/types/src/graph.ts b/packages/types/src/graph.ts new file mode 100644 index 0000000..5d948b6 --- /dev/null +++ b/packages/types/src/graph.ts @@ -0,0 +1,66 @@ +import type { FileAnalysis } from "./analysis.ts"; + +export interface NodeScope { + orgId: string; + knowledgeId: string; + repoId: string; +} + +export interface RepoSummaryPayload { + purpose: string; + summary: string; + keywords: string[]; + architecture: string; + dataFlow: string; + majorSubsystems: string[]; + keyPatterns: string[]; +} + +export interface UpsertRepoNodeInput { + scope: NodeScope; + repoUrl: string; + branch: string; + summary: RepoSummaryPayload; +} + +export interface FolderSummaryPayload { + purpose: string; + summary: string; + keywords: string[]; + classes: string[]; + functions: string[]; + importsInternal: string[]; + importsExternal: string[]; + dependencyGraph: string; +} + +export interface UpsertFolderNodeInput { + scope: NodeScope; + folderPath: string; + summary: FolderSummaryPayload; +} + +export interface SnapshotFilesInput { + knowledgeId: string; + commitHash: string; +} + +export interface UpsertFileNodeInput { + orgId?: string; + knowledgeId: string; + repoId?: string; + relativePath: string; + language: string; + sha: string; + sizeBytes: number; + analysis: FileAnalysis; + folderPath?: string; + isBigFile?: boolean; + totalChunks?: number; + totalTokenCount?: number; +} + +export interface GraphPingResult { + ok: boolean; + latencyMs: number; +} diff --git a/packages/types/src/index.ts b/packages/types/src/index.ts index b5171f8..2caabc5 100644 --- a/packages/types/src/index.ts +++ b/packages/types/src/index.ts @@ -1,4 +1,4 @@ -export { Config } from "./config.ts"; +export { Config, DbProviderType, GraphProviderType } from "./config.ts"; export { JobType, JobPriority } from "./job.ts"; export type { GithubIndexPayload, @@ -18,6 +18,20 @@ export type { KnowledgeInfo, KnowledgeSource, LocalKnowledgeSource, + KnowledgeListEntry, + CommitHashRecord, } from "./knowledge.ts"; export type { StatsCommitEntry, StatsRepoEntry, StatsResponse, StatsTotals } from "./stats.ts"; export type { UsageDoc, ActivityDoc, UsageIncrement, ActivityInput } from "./usage.ts"; +export type { FileAnalysisSection, FileAnalysis, RawFileDoc } from "./analysis.ts"; +export type { DeleteKnowledgeResult, DbPingResult } from "./database.ts"; +export type { + NodeScope, + RepoSummaryPayload, + UpsertRepoNodeInput, + FolderSummaryPayload, + UpsertFolderNodeInput, + SnapshotFilesInput, + UpsertFileNodeInput, + GraphPingResult, +} from "./graph.ts"; diff --git a/packages/types/src/knowledge.ts b/packages/types/src/knowledge.ts index aa6f77b..378e1a5 100644 --- a/packages/types/src/knowledge.ts +++ b/packages/types/src/knowledge.ts @@ -81,3 +81,7 @@ export interface KnowledgeDoc { */ failure?: KnowledgeFailure; } + +export interface KnowledgeListEntry extends KnowledgeDoc { + fileCount: number; +}