diff --git a/packages/cli/src/LsCommand.ts b/packages/cli/src/LsCommand.ts index c0f103e..b912bf8 100644 --- a/packages/cli/src/LsCommand.ts +++ b/packages/cli/src/LsCommand.ts @@ -1,5 +1,5 @@ import { Command } from "commander"; -import { Config } from "@bb/types"; +import { Config, normalizeCommitHashes, resolveIndexedCommit, type KnowledgeSource } from "@bb/types"; import { getConfigValue } from "@bb/config"; import { ensureServerRunning, ServerStartTimeoutError } from "./serverSpawn.ts"; import { getJson, HttpClientError } from "./httpClient.ts"; @@ -7,9 +7,7 @@ import { createSpinner, error } from "./output.ts"; interface RepoEntry { knowledgeId: string; - source: - | { kind: "github"; repoUrl: string; branch?: string; commitId?: string; commitHashes?: string[] } - | { kind: "local"; sourcePath: string }; + source: KnowledgeSource; state: string; createdAt: string; updatedAt: string; @@ -62,7 +60,7 @@ async function runLs(): Promise { } function renderTable(repos: RepoEntry[]): void { - const headers = ["ID", "SOURCE", "STATE", "UPDATED", "HEAD", "COMMITS", "FILES"]; + const headers = ["ID", "SOURCE", "STATE", "UPDATED", "COMMIT", "COMMITS", "FILES"]; const rows = repos.map((r) => [ `${r.knowledgeId.slice(0, 8)}…`, formatSource(r.source), @@ -86,17 +84,18 @@ function formatHead(source: RepoEntry["source"]): string { if (source.kind !== "github") { return "-"; } - if (source.commitId === undefined || source.commitId.length === 0) { + const commitId = resolveIndexedCommit(source); + if (commitId === undefined) { return "-"; } - return source.commitId.slice(0, 8); + return commitId.slice(0, 8); } function formatCommits(source: RepoEntry["source"]): string { if (source.kind !== "github") { return "-"; } - return String(source.commitHashes?.length ?? 0); + return String(normalizeCommitHashes(source.commitHashes).length); } function formatSource(source: RepoEntry["source"]): string { diff --git a/packages/cli/src/repoSelectorPrompt.ts b/packages/cli/src/repoSelectorPrompt.ts index e08e256..a136c62 100644 --- a/packages/cli/src/repoSelectorPrompt.ts +++ b/packages/cli/src/repoSelectorPrompt.ts @@ -1,5 +1,6 @@ import React from "react"; import { render } from "ink"; +import { normalizeCommitHashes, resolveIndexedCommit, type KnowledgeSource } from "@bb/types"; import { getJson } from "./httpClient.ts"; import { RepoSelector, @@ -26,9 +27,7 @@ import { export interface RepoListEntry { knowledgeId: string; - source: - | { kind: "github"; repoUrl: string; branch?: string; commitId?: string; commitHashes?: string[] } - | { kind: "local"; sourcePath: string }; + source: KnowledgeSource; state: string; createdAt: string; updatedAt: string; @@ -107,12 +106,10 @@ function formatDetail(repo: RepoListEntry): string { if (repo.source.kind !== "github") { return `${repo.state} ${idChunk} ${repo.fileCount} files`; } - const head = - repo.source.commitId !== undefined && repo.source.commitId.length > 0 - ? `head=${repo.source.commitId.slice(0, 8)}` - : "head=-"; - const commits = `${repo.source.commitHashes?.length ?? 0} commits`; - return `${repo.state} ${idChunk} ${head} ${commits} ${repo.fileCount} files`; + const commitId = resolveIndexedCommit(repo.source); + const commit = commitId !== undefined ? `commit=${commitId.slice(0, 8)}` : "commit=-"; + const commits = `${normalizeCommitHashes(repo.source.commitHashes).length} commits`; + return `${repo.state} ${idChunk} ${commit} ${commits} ${repo.fileCount} files`; } function formatSourceLabel(source: RepoListEntry["source"]): string { diff --git a/packages/ingest-github/src/pipeline/pull.ts b/packages/ingest-github/src/pipeline/pull.ts index 2ce7452..ddd02a2 100644 --- a/packages/ingest-github/src/pipeline/pull.ts +++ b/packages/ingest-github/src/pipeline/pull.ts @@ -1,4 +1,11 @@ -import { Config, KnowledgeState, type GithubPullPayload, type JobMessage } from "@bb/types"; +import { + Config, + KnowledgeState, + isFullCommitHash, + resolveIndexedCommit, + type GithubPullPayload, + type JobMessage, +} from "@bb/types"; import { getConfigValue } from "@bb/config"; import { getKnowledge, recordProcessingStats, setKnowledgeCommit, setKnowledgeState } from "@bb/mongo"; import { setKnowledgeStateInGraph, snapshotFilesToVersion, type NodeScope } from "@bb/neo4j"; @@ -25,8 +32,6 @@ import { buildFileAnalysisUserPrompt, } from "src/strategies/flat-folder/prompts/file-analysis.ts"; -const COMMIT_HASH_RE = /^[0-9a-f]{40}$/u; - function resolveOrgId(payload: { orgId?: string }): string { if (typeof payload.orgId === "string" && payload.orgId.length > 0) { return payload.orgId; @@ -36,7 +41,7 @@ function resolveOrgId(payload: { orgId?: string }): string { export async function runPull(msg: JobMessage): Promise { const { knowledgeId } = msg.payload; - if (msg.payload.targetCommitHash !== undefined && !COMMIT_HASH_RE.test(msg.payload.targetCommitHash)) { + if (msg.payload.targetCommitHash !== undefined && !isFullCommitHash(msg.payload.targetCommitHash)) { throw new IngestError( knowledgeId, `targetCommitHash must be a 40-character hex SHA, got: ${msg.payload.targetCommitHash}`, @@ -50,8 +55,8 @@ export async function runPull(msg: JobMessage): Promise if (knowledge.source.kind !== "github") { throw new IngestError(knowledgeId, `pull is only supported for github knowledge (kind=${knowledge.source.kind})`); } - const currentCommit = knowledge.source.commitId ?? ""; - if (currentCommit.length === 0) { + const currentCommit = resolveIndexedCommit(knowledge.source); + if (currentCommit === undefined) { throw new IngestError( knowledgeId, "pull requires a previously-indexed commit; this knowledge has no commitId. Run github_index first.", @@ -81,10 +86,10 @@ export async function runPull(msg: JobMessage): Promise await syncRepository(cloneOpts); const branchHead = await readHeadCommitHash(repoDir); - if (branchHead === "unknown") { + if (!isFullCommitHash(branchHead)) { throw new IngestError(knowledgeId, "could not resolve branch HEAD after clone"); } - const targetCommit = msg.payload.targetCommitHash ?? branchHead; + const targetCommit = (msg.payload.targetCommitHash ?? branchHead).toLowerCase(); if (targetCommit === currentCommit) { logger.info(`pull: ${knowledgeId} already at ${targetCommit.slice(0, 12)}; no-op`); diff --git a/packages/ingest-github/src/pipeline/run.ts b/packages/ingest-github/src/pipeline/run.ts index 5d76146..66b6ede 100644 --- a/packages/ingest-github/src/pipeline/run.ts +++ b/packages/ingest-github/src/pipeline/run.ts @@ -1,4 +1,4 @@ -import { Config, KnowledgeState, type GithubIndexPayload, type LocalIngestPayload } from "@bb/types"; +import { Config, KnowledgeState, isFullCommitHash, type GithubIndexPayload, type LocalIngestPayload } from "@bb/types"; import { getConfigValue } from "@bb/config"; import { recordProcessingStats, setKnowledgeCommit, setKnowledgeState } from "@bb/mongo"; import { setKnowledgeStateInGraph } from "@bb/neo4j"; @@ -88,6 +88,10 @@ async function runGithub( } source = createDiskSourceReader({ repoDir, commitHash }); } + if (!isFullCommitHash(commitHash)) { + throw new IngestError(knowledgeId, `resolved HEAD is not a full commit hash: ${commitHash}`); + } + commitHash = commitHash.toLowerCase(); const metaPaths = metaPathsFor(knowledgeId); await ensureMetaDirs(metaPaths); diff --git a/packages/mongo/src/knowledge.ts b/packages/mongo/src/knowledge.ts index c1bfb15..f6f7aeb 100644 --- a/packages/mongo/src/knowledge.ts +++ b/packages/mongo/src/knowledge.ts @@ -1,4 +1,10 @@ -import { KnowledgeState, type KnowledgeDoc } from "@bb/types"; +import { + KnowledgeState, + isFullCommitHash, + normalizeCommitHashes, + type KnowledgeDoc, + type KnowledgeSource, +} from "@bb/types"; import { KnowledgeNotFoundError } from "@bb/errors"; import { _getDb } from "./client.ts"; import { Collections } from "./collections.ts"; @@ -20,20 +26,24 @@ export async function setKnowledgeState(knowledgeId: string, state: KnowledgeSta /** * Records that this knowledge is now indexed at `commitHash`. Sets it as the - * current head pointer (`source.commitId`) and appends to the deduped history + * current indexed commit pointer (`source.commitId`) and appends to the deduped history * array (`source.commitHashes`). Idempotent: re-recording the same commit is * a no-op except for the `updatedAt` bump. * * Throws `KnowledgeNotFoundError` if the document doesn't exist. */ export async function setKnowledgeCommit(knowledgeId: string, commitHash: string): Promise { + if (!isFullCommitHash(commitHash)) { + throw new Error(`invalid commit hash for knowledge ${knowledgeId}: ${commitHash}`); + } + const normalizedCommitHash = commitHash.toLowerCase(); const result = await _getDb() .collection(Collections.Knowledge) .updateOne( { knowledgeId }, { - $set: { "source.commitId": commitHash, updatedAt: new Date() }, - $addToSet: { "source.commitHashes": commitHash }, + $set: { "source.commitId": normalizedCommitHash, updatedAt: new Date() }, + $addToSet: { "source.commitHashes": normalizedCommitHash }, }, ); if (result.matchedCount === 0) { @@ -61,13 +71,14 @@ export async function updateKnowledgeProgress( export async function upsertKnowledge(doc: Omit & { updatedAt?: Date }): Promise { const now = new Date(); + const source = normalizeKnowledgeSourceForWrite(doc.knowledgeId, doc.source); await _getDb() .collection(Collections.Knowledge) .updateOne( { knowledgeId: doc.knowledgeId }, { $set: { - source: doc.source, + source, status: doc.status, updatedAt: doc.updatedAt ?? now, }, @@ -80,6 +91,23 @@ export async function upsertKnowledge(doc: Omit & { u ); } +function normalizeKnowledgeSourceForWrite(knowledgeId: string, source: KnowledgeSource): KnowledgeSource { + if (source.kind !== "github") { + return source; + } + if (source.commitId !== undefined && !isFullCommitHash(source.commitId)) { + throw new Error(`invalid commit hash for knowledge ${knowledgeId}: ${source.commitId}`); + } + const commitHashes = normalizeCommitHashes(source.commitHashes); + return { + kind: "github", + repoUrl: source.repoUrl, + ...(source.branch !== undefined ? { branch: source.branch } : {}), + ...(source.commitId !== undefined ? { commitId: source.commitId.toLowerCase() } : {}), + ...(commitHashes.length > 0 ? { commitHashes } : {}), + }; +} + export interface DeleteKnowledgeResult { knowledgeDeleted: number; rawDeleted: number; diff --git a/packages/server/src/githubPullRoute.ts b/packages/server/src/githubPullRoute.ts index f80d79f..30e9ac1 100644 --- a/packages/server/src/githubPullRoute.ts +++ b/packages/server/src/githubPullRoute.ts @@ -1,5 +1,6 @@ import type { Request, Response, Router } from "express"; import express from "express"; +import { isFullCommitHash, resolveIndexedCommit } from "@bb/types"; import { getKnowledge } from "@bb/mongo"; import { enqueueGithubPull } from "@bb/queue"; import { fetchLatestCommitHash } from "@bb/ingest-github"; @@ -17,8 +18,6 @@ interface PullResponse { commitHash?: string; } -const COMMIT_HASH_RE = /^[0-9a-f]{40}$/u; - /** * `POST /api/v1/github/pull` — re-index a github knowledge to a specific commit * reachable from its indexed branch. When the caller omits `targetCommitHash`, @@ -46,7 +45,7 @@ export function buildGithubPullRoute(): Router { const gitToken = typeof body.gitToken === "string" && body.gitToken.length > 0 ? body.gitToken : undefined; const suppliedTarget = typeof body.targetCommitHash === "string" && body.targetCommitHash.length > 0 ? body.targetCommitHash : undefined; - if (suppliedTarget !== undefined && !COMMIT_HASH_RE.test(suppliedTarget)) { + if (suppliedTarget !== undefined && !isFullCommitHash(suppliedTarget)) { res.status(400).json({ error: "invalid targetCommitHash", message: "targetCommitHash must be a 40-character hex SHA", @@ -63,7 +62,8 @@ export function buildGithubPullRoute(): Router { res.status(422).json({ error: `pull is only supported for github knowledge (kind=${knowledge.source.kind})` }); return; } - if (knowledge.source.commitId === undefined || knowledge.source.commitId.length === 0) { + const currentCommit = resolveIndexedCommit(knowledge.source); + if (currentCommit === undefined) { res.status(422).json({ error: "knowledge not yet indexed", message: "pull requires a previously-indexed commit; this knowledge has no commitId. Run github_index first.", @@ -72,19 +72,19 @@ export function buildGithubPullRoute(): Router { } const branch = knowledge.source.branch ?? "main"; - let targetCommit = suppliedTarget; + let targetCommit = suppliedTarget?.toLowerCase(); if (targetCommit === undefined) { try { const head = await fetchLatestCommitHash(knowledge.source.repoUrl, branch, gitToken); - if (head !== null && COMMIT_HASH_RE.test(head)) { - targetCommit = head; + if (isFullCommitHash(head)) { + targetCommit = head.toLowerCase(); } } catch { // Transient API failure; leave target unset and let the worker resolve via git rev-parse. } } - if (targetCommit !== undefined && targetCommit === knowledge.source.commitId) { + if (targetCommit !== undefined && targetCommit === currentCommit) { const response: PullResponse = { knowledgeId, noOp: true, commitHash: targetCommit }; res.status(200).json(response); return; diff --git a/packages/server/src/knowledgeSourcePresenter.test.ts b/packages/server/src/knowledgeSourcePresenter.test.ts new file mode 100644 index 0000000..4adf4ca --- /dev/null +++ b/packages/server/src/knowledgeSourcePresenter.test.ts @@ -0,0 +1,58 @@ +import { describe, expect, test } from "bun:test"; +import type { KnowledgeSource } from "@bb/types"; +import { getLegacyInfo, normalizeRepoSource } from "./knowledgeSourcePresenter.ts"; + +const HASH_A = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; +const HASH_B = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; + +describe("knowledge source presentation", () => { + test("preserves local sources unchanged", () => { + const source: KnowledgeSource = { kind: "local", sourcePath: "/tmp/repo" }; + expect(normalizeRepoSource(source)).toBe(source); + }); + + test("normalizes current github source commit fields for /repos consumers", () => { + expect( + normalizeRepoSource({ + kind: "github", + repoUrl: "https://github.com/ByteBell/bytebell-oss", + branch: "main", + commitId: HASH_A.toUpperCase(), + commitHashes: [HASH_A, "latest", { hash: HASH_B.toUpperCase() }], + }), + ).toEqual({ + kind: "github", + repoUrl: "https://github.com/ByteBell/bytebell-oss", + branch: "main", + commitId: HASH_A, + commitHashes: [HASH_A, HASH_B], + }); + }); + + test("does not leak legacy commitId='latest' through /repos", () => { + const legacySource = { kind: "github" } as KnowledgeSource; + const info = { + repoUrl: "https://github.com/ByteBell/bytebell-oss", + githubInfo: { + branchName: "main", + commitId: "latest", + commitHashes: [{ hash: HASH_A }, { hash: HASH_B }], + }, + }; + + expect(normalizeRepoSource(legacySource, info)).toEqual({ + kind: "github", + repoUrl: "https://github.com/ByteBell/bytebell-oss", + branch: "main", + commitId: HASH_B, + commitHashes: [HASH_A, HASH_B], + }); + }); + + test("extracts legacy info only from object-shaped entries", () => { + const info = { githubInfo: { commitId: HASH_A } }; + expect(getLegacyInfo({ info })).toBe(info); + expect(getLegacyInfo({ info: null })).toBeUndefined(); + expect(getLegacyInfo({ info: "not-object" })).toBeUndefined(); + }); +}); diff --git a/packages/server/src/knowledgeSourcePresenter.ts b/packages/server/src/knowledgeSourcePresenter.ts new file mode 100644 index 0000000..e32603c --- /dev/null +++ b/packages/server/src/knowledgeSourcePresenter.ts @@ -0,0 +1,60 @@ +import { normalizeCommitHashes, resolveIndexedCommit, type KnowledgeSource } from "@bb/types"; + +export interface LegacyKnowledgeInfo { + repoUrl?: unknown; + branch?: unknown; + git_url?: unknown; + githubInfo?: { commitId?: unknown; commitHashes?: unknown; branchName?: unknown }; +} + +export function getLegacyInfo(entry: unknown): LegacyKnowledgeInfo | undefined { + if (typeof entry !== "object" || entry === null) { + return undefined; + } + const info = (entry as { info?: unknown }).info; + return typeof info === "object" && info !== null ? (info as LegacyKnowledgeInfo) : undefined; +} + +export function normalizeRepoSource(source: KnowledgeSource, info?: LegacyKnowledgeInfo): KnowledgeSource { + if (source.kind !== "github") { + return source; + } + const sourceRecord = source as { repoUrl?: unknown; branch?: unknown }; + const commitHashes = normalizeCommitHashes(source.commitHashes); + const fallbackCommitHashes = normalizeCommitHashes(info?.githubInfo?.commitHashes); + const resolvedCommitHashes = commitHashes.length > 0 ? commitHashes : fallbackCommitHashes; + const fallbackCommitId = typeof info?.githubInfo?.commitId === "string" ? info.githubInfo.commitId : undefined; + const commitId = resolveIndexedCommit({ + kind: "github", + repoUrl: "", + ...(source.commitId !== undefined + ? { commitId: source.commitId } + : fallbackCommitId !== undefined + ? { commitId: fallbackCommitId } + : {}), + commitHashes: resolvedCommitHashes, + }); + const repoUrl = + typeof sourceRecord.repoUrl === "string" + ? sourceRecord.repoUrl + : typeof info?.repoUrl === "string" + ? info.repoUrl + : typeof info?.git_url === "string" + ? info.git_url + : ""; + const branch = + typeof sourceRecord.branch === "string" + ? sourceRecord.branch + : typeof info?.branch === "string" + ? info.branch + : typeof info?.githubInfo?.branchName === "string" + ? info.githubInfo.branchName + : undefined; + return { + kind: "github", + repoUrl, + ...(branch !== undefined ? { branch } : {}), + ...(commitId !== undefined ? { commitId } : {}), + commitHashes: resolvedCommitHashes, + }; +} diff --git a/packages/server/src/reposRoute.ts b/packages/server/src/reposRoute.ts index 3343e42..42c93e5 100644 --- a/packages/server/src/reposRoute.ts +++ b/packages/server/src/reposRoute.ts @@ -1,6 +1,7 @@ import type { Request, Response, Router } from "express"; import express from "express"; import { getKnowledge, listKnowledge } from "@bb/mongo"; +import { getLegacyInfo, normalizeRepoSource } from "./knowledgeSourcePresenter.ts"; export function buildReposRoute(): Router { const router = express.Router(); @@ -8,7 +9,7 @@ export function buildReposRoute(): Router { const entries = await listKnowledge(); const repos = entries.map((e) => ({ knowledgeId: e.knowledgeId, - source: e.source, + source: normalizeRepoSource(e.source, getLegacyInfo(e)), state: e.status.state, createdAt: e.createdAt instanceof Date ? e.createdAt.toISOString() : new Date(e.createdAt).toISOString(), updatedAt: e.updatedAt instanceof Date ? e.updatedAt.toISOString() : new Date(e.updatedAt).toISOString(), @@ -30,7 +31,7 @@ export function buildReposRoute(): Router { } res.status(200).json({ knowledgeId: entry.knowledgeId, - source: entry.source, + source: normalizeRepoSource(entry.source, getLegacyInfo(entry)), state: entry.status.state, createdAt: entry.createdAt instanceof Date ? entry.createdAt.toISOString() : new Date(entry.createdAt).toISOString(), diff --git a/packages/types/src/index.ts b/packages/types/src/index.ts index e6ccf57..20b88e5 100644 --- a/packages/types/src/index.ts +++ b/packages/types/src/index.ts @@ -2,7 +2,15 @@ export { Config } from "./config.ts"; export { JobType, JobPriority } from "./job.ts"; export type { GithubIndexPayload, GithubPullPayload, LocalIngestPayload, JobMessage, PayloadFor } from "./job.ts"; export { KnowledgeState } from "./knowledge.ts"; -export type { GithubKnowledgeSource, KnowledgeDoc, KnowledgeSource, LocalKnowledgeSource } from "./knowledge.ts"; +export { isFullCommitHash, normalizeCommitHashes, resolveIndexedCommit } from "./knowledge.ts"; +export type { + CommitHashEntry, + CommitHashRecord, + GithubKnowledgeSource, + KnowledgeDoc, + KnowledgeSource, + LocalKnowledgeSource, +} from "./knowledge.ts"; export type { ModelTokenBreakdown, ModelTokenUsage, diff --git a/packages/types/src/knowledge.test.ts b/packages/types/src/knowledge.test.ts new file mode 100644 index 0000000..0cb86e8 --- /dev/null +++ b/packages/types/src/knowledge.test.ts @@ -0,0 +1,81 @@ +import { describe, expect, test } from "bun:test"; +import { isFullCommitHash, normalizeCommitHashes, resolveIndexedCommit } from "./knowledge.ts"; + +const HASH_A = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; +const HASH_B = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; +const HASH_C_UPPER = "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"; + +describe("commit hash helpers", () => { + test("accepts only full 40-character commit hashes", () => { + expect(isFullCommitHash(HASH_A)).toBe(true); + expect(isFullCommitHash(HASH_C_UPPER)).toBe(true); + expect(isFullCommitHash("latest")).toBe(false); + expect(isFullCommitHash("deadbee")).toBe(false); + expect(isFullCommitHash("g".repeat(40))).toBe(false); + }); + + test("normalizes legacy string and object commit history", () => { + expect( + normalizeCommitHashes([ + null, + undefined, + HASH_A, + HASH_A.toUpperCase(), + { hash: HASH_B, inputTokens: "10", outputTokens: "5", costUsd: "0.01" }, + { hash: HASH_C_UPPER }, + "latest", + "deadbee", + { hash: "" }, + {}, + ]), + ).toEqual([HASH_A, HASH_B, HASH_C_UPPER.toLowerCase()]); + expect(normalizeCommitHashes(null)).toEqual([]); + expect(normalizeCommitHashes(undefined)).toEqual([]); + }); + + test("resolves the indexed commit from commitId first", () => { + expect( + resolveIndexedCommit({ + kind: "github", + repoUrl: "https://github.com/ByteBell/bytebell-oss", + commitId: HASH_A, + commitHashes: [HASH_B], + }), + ).toBe(HASH_A); + }); + + test("falls back to the newest valid history entry when commitId is not a hash", () => { + expect( + resolveIndexedCommit({ + kind: "github", + repoUrl: "https://github.com/ByteBell/bytebell-oss", + commitId: "latest", + commitHashes: [HASH_A, { hash: HASH_B }], + }), + ).toBe(HASH_B); + }); + + test("returns undefined when no valid commit hash was recorded", () => { + expect( + resolveIndexedCommit({ + kind: "github", + repoUrl: "https://github.com/ByteBell/bytebell-oss", + }), + ).toBeUndefined(); + expect( + resolveIndexedCommit({ + kind: "github", + repoUrl: "https://github.com/ByteBell/bytebell-oss", + commitHashes: [], + }), + ).toBeUndefined(); + expect( + resolveIndexedCommit({ + kind: "github", + repoUrl: "https://github.com/ByteBell/bytebell-oss", + commitId: "latest", + commitHashes: ["deadbee", { hash: "" }], + }), + ).toBeUndefined(); + }); +}); diff --git a/packages/types/src/knowledge.ts b/packages/types/src/knowledge.ts index a6e1309..a54d6da 100644 --- a/packages/types/src/knowledge.ts +++ b/packages/types/src/knowledge.ts @@ -7,14 +7,25 @@ export enum KnowledgeState { Failed = "FAILED", } +const FULL_COMMIT_HASH_RE = /^[0-9a-f]{40}$/iu; + +export interface CommitHashRecord { + hash: string; + inputTokens?: string; + outputTokens?: string; + costUsd?: string; +} + +export type CommitHashEntry = string | CommitHashRecord; + export interface GithubKnowledgeSource { kind: "github"; repoUrl: string; branch?: string; - /** Current head pointer — the most recently indexed commit. */ + /** Current indexed commit pointer. */ commitId?: string; - /** Every commit this knowledge has been indexed at, oldest → newest. Pull appends to this list. */ - commitHashes?: string[]; + /** Every commit this knowledge has been indexed at, oldest to newest. Pull appends to this list. */ + commitHashes?: CommitHashEntry[]; } export interface LocalKnowledgeSource { @@ -31,3 +42,39 @@ export interface KnowledgeDoc { createdAt: Date; updatedAt: Date; } + +export function isFullCommitHash(value: unknown): value is string { + return typeof value === "string" && FULL_COMMIT_HASH_RE.test(value); +} + +export function normalizeCommitHashes(value: unknown): string[] { + if (!Array.isArray(value)) { + return []; + } + + const hashes: string[] = []; + const seen = new Set(); + for (const item of value) { + const hash = + typeof item === "string" + ? item + : typeof item === "object" && item !== null && typeof (item as { hash?: unknown }).hash === "string" + ? (item as { hash: string }).hash + : ""; + if (isFullCommitHash(hash)) { + const normalized = hash.toLowerCase(); + if (!seen.has(normalized)) { + seen.add(normalized); + hashes.push(normalized); + } + } + } + return hashes; +} + +export function resolveIndexedCommit(source: GithubKnowledgeSource): string | undefined { + if (isFullCommitHash(source.commitId)) { + return source.commitId.toLowerCase(); + } + return normalizeCommitHashes(source.commitHashes).at(-1); +}