Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 102 additions & 2 deletions bun.lock

Large diffs are not rendered by default.

13 changes: 13 additions & 0 deletions infra/docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,19 @@ services:
retries: 12
start_period: 5s

ladybug-explorer:
image: ghcr.io/ladybugdb/explorer:latest
container_name: bytebell-ladybug-explorer
restart: unless-stopped
ports:
- "127.0.0.1:8000:8000"
volumes:
- /Users/zeta/.bytebell:/database
environment:
- LBUG_FILE=ladybug.lbug
networks:
- bytebell

networks:
bytebell:
name: bytebell
Expand Down
4 changes: 4 additions & 0 deletions packages/config/src/schema-fields.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ export function readField<K extends Config>(cfg: BytebellConfig, key: K): Config
return cfg.graph_provider as ConfigValue<K>;
case Config.SqlitePath:
return cfg.sqlite_path as ConfigValue<K>;
case Config.LadybugPath:
return cfg.ladybug_path as ConfigValue<K>;
default:
throw new Error(`Unknown config key: ${key}`);
}
Expand Down Expand Up @@ -168,6 +170,8 @@ export function writeField<K extends Config>(cfg: BytebellConfig, key: K, value:
return { ...cfg, graph_provider: value as string };
case Config.SqlitePath:
return { ...cfg, sqlite_path: value as string };
case Config.LadybugPath:
return { ...cfg, ladybug_path: value as string };
default:
throw new Error(`Unknown config key: ${key}`);
}
Expand Down
3 changes: 3 additions & 0 deletions packages/config/src/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ export const configSchema = z
db_provider: z.string().default("sqlite"),
graph_provider: z.string().default("neo4j"),
sqlite_path: z.string().default(""),
ladybug_path: z.string().default(""),
})
.strict();

Expand Down Expand Up @@ -103,6 +104,7 @@ export type ConfigValueMap = {
[Config.DbProvider]: string;
[Config.GraphProvider]: string;
[Config.SqlitePath]: string;
[Config.LadybugPath]: string;
};

export type ConfigValue<K extends Config> = ConfigValueMap[K];
Expand Down Expand Up @@ -164,6 +166,7 @@ export const HINTS: Readonly<Record<Config, string>> = {
[Config.DbProvider]: "bytebell set db-provider <mongo|...>",
[Config.GraphProvider]: "bytebell set graph-provider <neo4j|...>",
[Config.SqlitePath]: "bytebell set sqlite-path <path>",
[Config.LadybugPath]: "bytebell set ladybug-path <path>",
};

export { readField, writeField } from "./schema-fields.ts";
5 changes: 3 additions & 2 deletions packages/graph-core/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,13 @@ export interface IGraphFileRepository {
upsertFileNode(input: UpsertFileNodeInput): Promise<void>;
deleteFileNodes(knowledgeId: string, paths: string[]): Promise<void>;
snapshotFilesToVersion(input: SnapshotFilesInput): Promise<void>;
upsertFileNodesBatch(inputs: readonly UpsertFileNodeInput[]): Promise<void>;
upsertFileNodesBatch?(inputs: readonly UpsertFileNodeInput[]): Promise<void>;
bulkUpsertFiles?(knowledgeId: string, fileStream: AsyncIterable<UpsertFileNodeInput>): Promise<void>;
}

export interface IGraphFolderRepository {
upsertFolderNode(input: UpsertFolderNodeInput): Promise<void>;
upsertFolderNodesBatch(inputs: readonly UpsertFolderNodeInput[]): Promise<void>;
upsertFolderNodesBatch?(inputs: readonly UpsertFolderNodeInput[]): Promise<void>;
}

export interface IGraphRepoRepository {
Expand Down
31 changes: 29 additions & 2 deletions packages/graph-db/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,39 @@ export const filesGraph: IGraphFileRepository = {
upsertFileNode: (...args) => getGraph().files.upsertFileNode(...args),
deleteFileNodes: (...args) => getGraph().files.deleteFileNodes(...args),
snapshotFilesToVersion: (...args) => getGraph().files.snapshotFilesToVersion(...args),
upsertFileNodesBatch: (...args) => getGraph().files.upsertFileNodesBatch(...args),
upsertFileNodesBatch: async (inputs) => {
const f = getGraph().files;
if (f.upsertFileNodesBatch) {
await f.upsertFileNodesBatch(inputs);
} else {
for (const input of inputs) {
await f.upsertFileNode(input);
}
}
},
bulkUpsertFiles: async (knowledgeId, fileStream) => {
const f = getGraph().files;
if (f.bulkUpsertFiles) {
return f.bulkUpsertFiles(knowledgeId, fileStream);
}
for await (const input of fileStream) {
await f.upsertFileNode(input);
}
},
};

export const foldersGraph: IGraphFolderRepository = {
upsertFolderNode: (...args) => getGraph().folders.upsertFolderNode(...args),
upsertFolderNodesBatch: (...args) => getGraph().folders.upsertFolderNodesBatch(...args),
upsertFolderNodesBatch: async (inputs) => {
const f = getGraph().folders;
if (f.upsertFolderNodesBatch) {
await f.upsertFolderNodesBatch(inputs);
} else {
for (const input of inputs) {
await f.upsertFolderNode(input);
}
}
},
};

export const repoGraph: IGraphRepoRepository = {
Expand Down
1 change: 1 addition & 0 deletions packages/ingest-github/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"@bb/mongo": "workspace:*",
"@bb/sqlite": "workspace:*",
"@bb/neo4j": "workspace:*",
"@bb/ladybug": "workspace:*",
"@bb/queue": "workspace:*",
"@bb/db-core": "workspace:*",
"@bb/graph-core": "workspace:*",
Expand Down
1 change: 1 addition & 0 deletions packages/ingest-github/src/bootstrap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { connectGraph } from "@bb/graph-db";
import "@bb/mongo";
import "@bb/sqlite";
import "@bb/neo4j";
import "@bb/ladybug";

export interface BootstrapRuntimeOptions {
config: unknown;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
import { readFile, readdir, writeFile } from "node:fs/promises";
import path from "node:path";
import { askJsonLLM, type AskLlmOptions } from "@bb/llm";
import { LlmConfigError, LlmError } from "@bb/errors";
import { logger } from "@bb/logger";
import { Config } from "@bb/types";
import { getConfigValue } from "@bb/config";
import type { CondensedFileAnalysis } from "#src/types/condensed-file-analysis.ts";
import type { MetaPaths } from "#src/types/meta-paths.ts";
import { encodeMetaPath } from "#src/pipeline/paths.ts";
import {
FOLDER_ANALYSIS_SYSTEM_PROMPT,
FOLDER_BATCH_SYSTEM_PROMPT,
folderAnalysisUserPrompt,
folderBatchUserPrompt,
type BatchedFolderInput,
} from "./prompts/folder-summary.ts";
import type { FolderSummary } from "./types.ts";

export interface FolderBucket {
folderPath: string;
files: CondensedFileAnalysis[];
}

interface FolderSummaryJson {
purpose?: unknown;
summary?: unknown;
keywords?: unknown;
classes?: unknown;
functions?: unknown;
importsInternal?: unknown;
importsExternal?: unknown;
dependencyGraph?: unknown;
}

/**
* Splits the folder groups into "individual" (one LLM call per folder, used
* for big folders or when batching is disabled) and "batches" (N small
* folders summarised in one LLM call). Driven by `Config.FolderSummaryBatchSize`
* (set to 1 to disable batching entirely) and `Config.FolderSummaryBatchMaxFiles`
* (folders exceeding this file count always take the individual path).
*
* Folders are sorted by path so that two runs of the same repo produce the
* same batch composition — helpful when A/B-comparing outputs.
*/
export function groupFoldersForBatching(groups: Map<string, CondensedFileAnalysis[]>): {
individual: FolderBucket[];
batches: FolderBucket[][];
} {
const batchSize = getConfigValue(Config.FolderSummaryBatchSize);
const maxFiles = getConfigValue(Config.FolderSummaryBatchMaxFiles);
const sorted: FolderBucket[] = [...groups.entries()]
.map(([folderPath, files]) => ({ folderPath, files }))
.sort((a, b) => a.folderPath.localeCompare(b.folderPath));

if (batchSize <= 1) {
return { individual: sorted, batches: [] };
}

const individual: FolderBucket[] = [];
const batchable: FolderBucket[] = [];
for (const bucket of sorted) {
if (bucket.files.length > maxFiles) {
individual.push(bucket);
} else {
batchable.push(bucket);
}
}

const batches: FolderBucket[][] = [];
for (let i = 0; i < batchable.length; i += batchSize) {
batches.push(batchable.slice(i, i + batchSize));
}
return { individual, batches };
}

export async function summariseFolder(
folderPath: string,
files: CondensedFileAnalysis[],
llmCallContext?: AskLlmOptions,
): Promise<{
summary: FolderSummary | null;
tokenUsage: { inputTokens: number; outputTokens: number; costUsd: number };
}> {
const userPrompt = folderAnalysisUserPrompt(folderPath, files);
try {
const response = await askJsonLLM<FolderSummaryJson>(
FOLDER_ANALYSIS_SYSTEM_PROMPT,
userPrompt,
llmCallContext ?? {},
);
if (response.result === null) {
logger.warn(`summariseFolder: ${folderPath || "<root>"} returned unparseable JSON`);
return {
summary: null,
tokenUsage: {
inputTokens: response.usage.inputTokens,
outputTokens: response.usage.outputTokens,
costUsd: response.usage.costUsd,
},
};
}
return {
summary: shapeFolderSummary(folderPath, response.result),
tokenUsage: {
inputTokens: response.usage.inputTokens,
outputTokens: response.usage.outputTokens,
costUsd: response.usage.costUsd,
},
};
} catch (cause: unknown) {
if (cause instanceof LlmConfigError || cause instanceof LlmError) {
throw cause;
}
const msg = cause instanceof Error ? cause.message : String(cause);
logger.warn(`summariseFolder: ${folderPath || "<root>"} askJsonLLM failed: ${msg}`);
return { summary: null, tokenUsage: { inputTokens: 0, outputTokens: 0, costUsd: 0 } };
}
}

/**
* Multi-folder summary. Builds a label-indexed prompt, parses the keyed JSON
* response, returns one `FolderSummary | null` per folder. Folders missing
* from the response (or whose entry fails shape validation) are surfaced as
* `null` with a warn log; the caller counts those as failed.
*/
export async function summariseFolderBatch(
batch: FolderBucket[],
llmCallContext?: AskLlmOptions,
): Promise<{
summaries: Map<string, FolderSummary | null>;
tokenUsage: { inputTokens: number; outputTokens: number; costUsd: number };
}> {
const labeled: BatchedFolderInput[] = batch.map((b, i) => ({ label: i, folderPath: b.folderPath, files: b.files }));
const userPrompt = folderBatchUserPrompt(labeled);
const summaries = new Map<string, FolderSummary | null>();
try {
const response = await askJsonLLM<Record<string, FolderSummaryJson>>(
FOLDER_BATCH_SYSTEM_PROMPT,
userPrompt,
llmCallContext ?? {},
);
if (response.result === null) {
logger.warn(`summariseFolderBatch: batch of ${batch.length} returned unparseable JSON`);
for (const b of batch) {
summaries.set(b.folderPath, null);
}
return {
summaries,
tokenUsage: {
inputTokens: response.usage.inputTokens,
outputTokens: response.usage.outputTokens,
costUsd: response.usage.costUsd,
},
};
}
for (const b of labeled) {
const raw = response.result[String(b.label)];
if (raw === undefined || typeof raw !== "object" || raw === null) {
logger.warn(`summariseFolderBatch: missing/invalid entry for label ${b.label} (${b.folderPath || "<root>"})`);
summaries.set(b.folderPath, null);
continue;
}
summaries.set(b.folderPath, shapeFolderSummary(b.folderPath, raw));
}
return {
summaries,
tokenUsage: {
inputTokens: response.usage.inputTokens,
outputTokens: response.usage.outputTokens,
costUsd: response.usage.costUsd,
},
};
} catch (cause: unknown) {
if (cause instanceof LlmConfigError || cause instanceof LlmError) {
throw cause;
}
const msg = cause instanceof Error ? cause.message : String(cause);
logger.warn(`summariseFolderBatch: batch of ${batch.length} askJsonLLM failed: ${msg}`);
for (const b of batch) {
summaries.set(b.folderPath, null);
}
return { summaries, tokenUsage: { inputTokens: 0, outputTokens: 0, costUsd: 0 } };
}
}

export async function persistFolderSummary(metaPaths: MetaPaths, summary: FolderSummary): Promise<void> {
const file = path.join(metaPaths.folderSummariesDir, `${encodeMetaPath(summary.folderPath || "__ROOT__")}.json`);
await writeFile(file, JSON.stringify(summary, null, 2), "utf8");
}

export async function* iterateFolderSummaries(metaPaths: MetaPaths): AsyncGenerator<FolderSummary> {
let entries: string[];
try {
entries = await readdir(metaPaths.folderSummariesDir);
} catch {
return;
}
for (const name of entries) {
if (!name.endsWith(".json")) {
continue;
}
try {
const raw = await readFile(path.join(metaPaths.folderSummariesDir, name), "utf8");
const parsed: unknown = JSON.parse(raw);
if (typeof parsed === "object" && parsed !== null) {
yield parsed as FolderSummary;
}
} catch {
continue;
}
}
}

export function shapeFolderSummary(folderPath: string, raw: FolderSummaryJson): FolderSummary {
return {
folderPath,
purpose: pickString(raw.purpose, ""),
summary: pickString(raw.summary, ""),
keywords: pickStringArray(raw.keywords),
classes: pickStringArray(raw.classes),
functions: pickStringArray(raw.functions),
importsInternal: pickStringArray(raw.importsInternal),
importsExternal: pickStringArray(raw.importsExternal),
dependencyGraph: pickString(raw.dependencyGraph, ""),
generatedAt: new Date().toISOString(),
};
}

function pickString(value: unknown, fallback: string): string {
return typeof value === "string" && value.length > 0 ? value : fallback;
}

function pickStringArray(value: unknown): string[] {
if (!Array.isArray(value)) {
return [];
}
const out: string[] = [];
for (const item of value) {
if (typeof item === "string" && item.length > 0) {
out.push(item);
}
}
return out;
}
Loading