Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 7 additions & 8 deletions packages/cli/src/LsCommand.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
import { Command } from "commander";
import { Config } from "@bb/types";
import { Config, normalizeCommitHashes, resolveIndexedCommit, type KnowledgeSource } from "@bb/types";
import { getConfigValue } from "@bb/config";
import { ensureServerRunning, ServerStartTimeoutError } from "./serverSpawn.ts";
import { getJson, HttpClientError } from "./httpClient.ts";
import { createSpinner, error } from "./output.ts";

interface RepoEntry {
knowledgeId: string;
source:
| { kind: "github"; repoUrl: string; branch?: string; commitId?: string; commitHashes?: string[] }
| { kind: "local"; sourcePath: string };
source: KnowledgeSource;
state: string;
createdAt: string;
updatedAt: string;
Expand Down Expand Up @@ -62,7 +60,7 @@ async function runLs(): Promise<void> {
}

function renderTable(repos: RepoEntry[]): void {
const headers = ["ID", "SOURCE", "STATE", "UPDATED", "HEAD", "COMMITS", "FILES"];
const headers = ["ID", "SOURCE", "STATE", "UPDATED", "COMMIT", "COMMITS", "FILES"];
const rows = repos.map((r) => [
`${r.knowledgeId.slice(0, 8)}…`,
formatSource(r.source),
Expand All @@ -86,17 +84,18 @@ function formatHead(source: RepoEntry["source"]): string {
if (source.kind !== "github") {
return "-";
}
if (source.commitId === undefined || source.commitId.length === 0) {
const commitId = resolveIndexedCommit(source);
if (commitId === undefined) {
return "-";
}
return source.commitId.slice(0, 8);
return commitId.slice(0, 8);
}

function formatCommits(source: RepoEntry["source"]): string {
if (source.kind !== "github") {
return "-";
}
return String(source.commitHashes?.length ?? 0);
return String(normalizeCommitHashes(source.commitHashes).length);
}

function formatSource(source: RepoEntry["source"]): string {
Expand Down
15 changes: 6 additions & 9 deletions packages/cli/src/repoSelectorPrompt.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import React from "react";
import { render } from "ink";
import { normalizeCommitHashes, resolveIndexedCommit, type KnowledgeSource } from "@bb/types";
import { getJson } from "./httpClient.ts";
import {
RepoSelector,
Expand All @@ -26,9 +27,7 @@ import {

export interface RepoListEntry {
knowledgeId: string;
source:
| { kind: "github"; repoUrl: string; branch?: string; commitId?: string; commitHashes?: string[] }
| { kind: "local"; sourcePath: string };
source: KnowledgeSource;
state: string;
createdAt: string;
updatedAt: string;
Expand Down Expand Up @@ -107,12 +106,10 @@ function formatDetail(repo: RepoListEntry): string {
if (repo.source.kind !== "github") {
return `${repo.state} ${idChunk} ${repo.fileCount} files`;
}
const head =
repo.source.commitId !== undefined && repo.source.commitId.length > 0
? `head=${repo.source.commitId.slice(0, 8)}`
: "head=-";
const commits = `${repo.source.commitHashes?.length ?? 0} commits`;
return `${repo.state} ${idChunk} ${head} ${commits} ${repo.fileCount} files`;
const commitId = resolveIndexedCommit(repo.source);
const commit = commitId !== undefined ? `commit=${commitId.slice(0, 8)}` : "commit=-";
const commits = `${normalizeCommitHashes(repo.source.commitHashes).length} commits`;
return `${repo.state} ${idChunk} ${commit} ${commits} ${repo.fileCount} files`;
}

function formatSourceLabel(source: RepoListEntry["source"]): string {
Expand Down
21 changes: 13 additions & 8 deletions packages/ingest-github/src/pipeline/pull.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
import { Config, KnowledgeState, type GithubPullPayload, type JobMessage } from "@bb/types";
import {
Config,
KnowledgeState,
isFullCommitHash,
resolveIndexedCommit,
type GithubPullPayload,
type JobMessage,
} from "@bb/types";
import { getConfigValue } from "@bb/config";
import { getKnowledge, recordProcessingStats, setKnowledgeCommit, setKnowledgeState } from "@bb/mongo";
import { setKnowledgeStateInGraph, snapshotFilesToVersion, type NodeScope } from "@bb/neo4j";
Expand All @@ -25,8 +32,6 @@ import {
buildFileAnalysisUserPrompt,
} from "src/strategies/flat-folder/prompts/file-analysis.ts";

const COMMIT_HASH_RE = /^[0-9a-f]{40}$/u;

function resolveOrgId(payload: { orgId?: string }): string {
if (typeof payload.orgId === "string" && payload.orgId.length > 0) {
return payload.orgId;
Expand All @@ -36,7 +41,7 @@ function resolveOrgId(payload: { orgId?: string }): string {

export async function runPull(msg: JobMessage<GithubPullPayload>): Promise<void> {
const { knowledgeId } = msg.payload;
if (msg.payload.targetCommitHash !== undefined && !COMMIT_HASH_RE.test(msg.payload.targetCommitHash)) {
if (msg.payload.targetCommitHash !== undefined && !isFullCommitHash(msg.payload.targetCommitHash)) {
throw new IngestError(
knowledgeId,
`targetCommitHash must be a 40-character hex SHA, got: ${msg.payload.targetCommitHash}`,
Expand All @@ -50,8 +55,8 @@ export async function runPull(msg: JobMessage<GithubPullPayload>): Promise<void>
if (knowledge.source.kind !== "github") {
throw new IngestError(knowledgeId, `pull is only supported for github knowledge (kind=${knowledge.source.kind})`);
}
const currentCommit = knowledge.source.commitId ?? "";
if (currentCommit.length === 0) {
const currentCommit = resolveIndexedCommit(knowledge.source);
if (currentCommit === undefined) {
throw new IngestError(
knowledgeId,
"pull requires a previously-indexed commit; this knowledge has no commitId. Run github_index first.",
Expand Down Expand Up @@ -81,10 +86,10 @@ export async function runPull(msg: JobMessage<GithubPullPayload>): Promise<void>
await syncRepository(cloneOpts);

const branchHead = await readHeadCommitHash(repoDir);
if (branchHead === "unknown") {
if (!isFullCommitHash(branchHead)) {
throw new IngestError(knowledgeId, "could not resolve branch HEAD after clone");
}
const targetCommit = msg.payload.targetCommitHash ?? branchHead;
const targetCommit = (msg.payload.targetCommitHash ?? branchHead).toLowerCase();

if (targetCommit === currentCommit) {
logger.info(`pull: ${knowledgeId} already at ${targetCommit.slice(0, 12)}; no-op`);
Expand Down
6 changes: 5 additions & 1 deletion packages/ingest-github/src/pipeline/run.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { Config, KnowledgeState, type GithubIndexPayload, type LocalIngestPayload } from "@bb/types";
import { Config, KnowledgeState, isFullCommitHash, type GithubIndexPayload, type LocalIngestPayload } from "@bb/types";
import { getConfigValue } from "@bb/config";
import { recordProcessingStats, setKnowledgeCommit, setKnowledgeState } from "@bb/mongo";
import { setKnowledgeStateInGraph } from "@bb/neo4j";
Expand Down Expand Up @@ -88,6 +88,10 @@ async function runGithub(
}
source = createDiskSourceReader({ repoDir, commitHash });
}
if (!isFullCommitHash(commitHash)) {
throw new IngestError(knowledgeId, `resolved HEAD is not a full commit hash: ${commitHash}`);
}
commitHash = commitHash.toLowerCase();

const metaPaths = metaPathsFor(knowledgeId);
await ensureMetaDirs(metaPaths);
Expand Down
38 changes: 33 additions & 5 deletions packages/mongo/src/knowledge.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
import { KnowledgeState, type KnowledgeDoc } from "@bb/types";
import {
KnowledgeState,
isFullCommitHash,
normalizeCommitHashes,
type KnowledgeDoc,
type KnowledgeSource,
} from "@bb/types";
import { KnowledgeNotFoundError } from "@bb/errors";
import { _getDb } from "./client.ts";
import { Collections } from "./collections.ts";
Expand All @@ -20,20 +26,24 @@ export async function setKnowledgeState(knowledgeId: string, state: KnowledgeSta

/**
* Records that this knowledge is now indexed at `commitHash`. Sets it as the
* current head pointer (`source.commitId`) and appends to the deduped history
* current indexed commit pointer (`source.commitId`) and appends to the deduped history
* array (`source.commitHashes`). Idempotent: re-recording the same commit is
* a no-op except for the `updatedAt` bump.
*
* Throws `KnowledgeNotFoundError` if the document doesn't exist.
*/
export async function setKnowledgeCommit(knowledgeId: string, commitHash: string): Promise<void> {
if (!isFullCommitHash(commitHash)) {
throw new Error(`invalid commit hash for knowledge ${knowledgeId}: ${commitHash}`);
}
const normalizedCommitHash = commitHash.toLowerCase();
const result = await _getDb()
.collection(Collections.Knowledge)
.updateOne(
{ knowledgeId },
{
$set: { "source.commitId": commitHash, updatedAt: new Date() },
$addToSet: { "source.commitHashes": commitHash },
$set: { "source.commitId": normalizedCommitHash, updatedAt: new Date() },
$addToSet: { "source.commitHashes": normalizedCommitHash },
},
Comment on lines 27 to 47
);
if (result.matchedCount === 0) {
Expand Down Expand Up @@ -61,13 +71,14 @@ export async function updateKnowledgeProgress(

export async function upsertKnowledge(doc: Omit<KnowledgeDoc, "updatedAt"> & { updatedAt?: Date }): Promise<void> {
const now = new Date();
const source = normalizeKnowledgeSourceForWrite(doc.knowledgeId, doc.source);
await _getDb()
.collection(Collections.Knowledge)
.updateOne(
{ knowledgeId: doc.knowledgeId },
{
$set: {
source: doc.source,
source,
status: doc.status,
updatedAt: doc.updatedAt ?? now,
},
Expand All @@ -80,6 +91,23 @@ export async function upsertKnowledge(doc: Omit<KnowledgeDoc, "updatedAt"> & { u
);
}

function normalizeKnowledgeSourceForWrite(knowledgeId: string, source: KnowledgeSource): KnowledgeSource {
if (source.kind !== "github") {
return source;
}
if (source.commitId !== undefined && !isFullCommitHash(source.commitId)) {
throw new Error(`invalid commit hash for knowledge ${knowledgeId}: ${source.commitId}`);
}
const commitHashes = normalizeCommitHashes(source.commitHashes);
return {
kind: "github",
repoUrl: source.repoUrl,
...(source.branch !== undefined ? { branch: source.branch } : {}),
...(source.commitId !== undefined ? { commitId: source.commitId.toLowerCase() } : {}),
...(commitHashes.length > 0 ? { commitHashes } : {}),
};
}

export interface DeleteKnowledgeResult {
knowledgeDeleted: number;
rawDeleted: number;
Expand Down
16 changes: 8 additions & 8 deletions packages/server/src/githubPullRoute.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import type { Request, Response, Router } from "express";
import express from "express";
import { isFullCommitHash, resolveIndexedCommit } from "@bb/types";
import { getKnowledge } from "@bb/mongo";
import { enqueueGithubPull } from "@bb/queue";
import { fetchLatestCommitHash } from "@bb/ingest-github";
Expand All @@ -17,8 +18,6 @@ interface PullResponse {
commitHash?: string;
}

const COMMIT_HASH_RE = /^[0-9a-f]{40}$/u;

/**
* `POST /api/v1/github/pull` — re-index a github knowledge to a specific commit
* reachable from its indexed branch. When the caller omits `targetCommitHash`,
Expand Down Expand Up @@ -46,7 +45,7 @@ export function buildGithubPullRoute(): Router {
const gitToken = typeof body.gitToken === "string" && body.gitToken.length > 0 ? body.gitToken : undefined;
const suppliedTarget =
typeof body.targetCommitHash === "string" && body.targetCommitHash.length > 0 ? body.targetCommitHash : undefined;
if (suppliedTarget !== undefined && !COMMIT_HASH_RE.test(suppliedTarget)) {
if (suppliedTarget !== undefined && !isFullCommitHash(suppliedTarget)) {
res.status(400).json({
error: "invalid targetCommitHash",
message: "targetCommitHash must be a 40-character hex SHA",
Expand All @@ -63,7 +62,8 @@ export function buildGithubPullRoute(): Router {
res.status(422).json({ error: `pull is only supported for github knowledge (kind=${knowledge.source.kind})` });
return;
}
if (knowledge.source.commitId === undefined || knowledge.source.commitId.length === 0) {
const currentCommit = resolveIndexedCommit(knowledge.source);
if (currentCommit === undefined) {
res.status(422).json({
error: "knowledge not yet indexed",
message: "pull requires a previously-indexed commit; this knowledge has no commitId. Run github_index first.",
Expand All @@ -72,19 +72,19 @@ export function buildGithubPullRoute(): Router {
}

const branch = knowledge.source.branch ?? "main";
let targetCommit = suppliedTarget;
let targetCommit = suppliedTarget?.toLowerCase();
if (targetCommit === undefined) {
try {
const head = await fetchLatestCommitHash(knowledge.source.repoUrl, branch, gitToken);
if (head !== null && COMMIT_HASH_RE.test(head)) {
targetCommit = head;
if (isFullCommitHash(head)) {
targetCommit = head.toLowerCase();
}
} catch {
// Transient API failure; leave target unset and let the worker resolve via git rev-parse.
}
}

if (targetCommit !== undefined && targetCommit === knowledge.source.commitId) {
if (targetCommit !== undefined && targetCommit === currentCommit) {
const response: PullResponse = { knowledgeId, noOp: true, commitHash: targetCommit };
res.status(200).json(response);
return;
Expand Down
58 changes: 58 additions & 0 deletions packages/server/src/knowledgeSourcePresenter.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import { describe, expect, test } from "bun:test";
import type { KnowledgeSource } from "@bb/types";
import { getLegacyInfo, normalizeRepoSource } from "./knowledgeSourcePresenter.ts";

const HASH_A = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
const HASH_B = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";

describe("knowledge source presentation", () => {
test("preserves local sources unchanged", () => {
const source: KnowledgeSource = { kind: "local", sourcePath: "/tmp/repo" };
expect(normalizeRepoSource(source)).toBe(source);
});

test("normalizes current github source commit fields for /repos consumers", () => {
expect(
normalizeRepoSource({
kind: "github",
repoUrl: "https://github.com/ByteBell/bytebell-oss",
branch: "main",
commitId: HASH_A.toUpperCase(),
commitHashes: [HASH_A, "latest", { hash: HASH_B.toUpperCase() }],
}),
).toEqual({
kind: "github",
repoUrl: "https://github.com/ByteBell/bytebell-oss",
branch: "main",
commitId: HASH_A,
commitHashes: [HASH_A, HASH_B],
});
});

test("does not leak legacy commitId='latest' through /repos", () => {
const legacySource = { kind: "github" } as KnowledgeSource;
const info = {
repoUrl: "https://github.com/ByteBell/bytebell-oss",
githubInfo: {
branchName: "main",
commitId: "latest",
commitHashes: [{ hash: HASH_A }, { hash: HASH_B }],
},
};

expect(normalizeRepoSource(legacySource, info)).toEqual({
kind: "github",
repoUrl: "https://github.com/ByteBell/bytebell-oss",
branch: "main",
commitId: HASH_B,
commitHashes: [HASH_A, HASH_B],
});
});

test("extracts legacy info only from object-shaped entries", () => {
const info = { githubInfo: { commitId: HASH_A } };
expect(getLegacyInfo({ info })).toBe(info);
expect(getLegacyInfo({ info: null })).toBeUndefined();
expect(getLegacyInfo({ info: "not-object" })).toBeUndefined();
});
});
Loading
Loading