diff --git a/Makefile.cbm b/Makefile.cbm index d821cb76..ca195f10 100644 --- a/Makefile.cbm +++ b/Makefile.cbm @@ -214,6 +214,9 @@ TRACES_SRCS = src/traces/traces.c # Watcher module (new) WATCHER_SRCS = src/watcher/watcher.c +# Git context module (new) +GIT_SRCS = src/git/git_context.c + # CLI module (new) CLI_SRCS = src/cli/cli.c src/cli/progress_sink.c src/cli/hook_augment.c @@ -256,7 +259,7 @@ TRE_CFLAGS = -std=c11 -g -O1 -w -Ivendored/tre YYJSON_SRC = vendored/yyjson/yyjson.c # All production sources -PROD_SRCS = $(FOUNDATION_SRCS) $(STORE_SRCS) $(CYPHER_SRCS) $(MCP_SRCS) $(DISCOVER_SRCS) $(GRAPH_BUFFER_SRCS) $(PIPELINE_SRCS) $(SIMHASH_SRCS) $(SEMANTIC_SRCS) $(TRACES_SRCS) $(WATCHER_SRCS) $(CLI_SRCS) $(UI_SRCS) $(YYJSON_SRC) +PROD_SRCS = $(FOUNDATION_SRCS) $(STORE_SRCS) $(CYPHER_SRCS) $(MCP_SRCS) $(DISCOVER_SRCS) $(GRAPH_BUFFER_SRCS) $(PIPELINE_SRCS) $(SIMHASH_SRCS) $(SEMANTIC_SRCS) $(TRACES_SRCS) $(WATCHER_SRCS) $(GIT_SRCS) $(CLI_SRCS) $(UI_SRCS) $(YYJSON_SRC) EXISTING_C_SRCS = $(EXTRACTION_SRCS) $(LSP_SRCS) $(TS_RUNTIME_SRC) \ $(GRAMMAR_SRCS) $(AC_LZ4_SRCS) $(ZSTD_SRCS) $(SQLITE_WRITER_SRC) diff --git a/scripts/security-allowlist.txt b/scripts/security-allowlist.txt index 0acad067..814ba649 100644 --- a/scripts/security-allowlist.txt +++ b/scripts/security-allowlist.txt @@ -22,6 +22,10 @@ src/watcher/watcher.c:cbm_popen:git working tree status (git_is_dirty) src/watcher/watcher.c:cbm_popen:git file count (git_file_count) src/watcher/watcher.c:popen:via cbm_popen wrapper calls +# ── Git context: git metadata resolution (repo paths validated via cbm_validate_shell_arg) ── +src/git/git_context.c:cbm_popen:git rev-parse/symbolic-ref/merge-base metadata lookup +src/git/git_context.c:popen:via cbm_popen wrapper call + # ── MCP server: search and change detection ──────────────────────────────── src/mcp/mcp.c:cbm_popen:search_code via grep (pattern in temp file, path validated) src/mcp/mcp.c:cbm_popen:detect_changes via git diff (args validated) diff --git a/src/git/git_context.c b/src/git/git_context.c new file mode 100644 index 00000000..5f27b9f2 --- /dev/null +++ b/src/git/git_context.c @@ -0,0 +1,373 @@ +#include "git/git_context.h" + +#include "foundation/compat_fs.h" +#include "foundation/constants.h" +#include "foundation/str_util.h" + +#include +#include +#include +#include +#include +#include + +enum { + GIT_CMD_MAX = 1024, + GIT_OUTPUT_MAX = 4096, +}; + +static char *git_strdup(const char *s) { + if (!s) { + s = ""; + } + size_t n = strlen(s) + 1; + char *out = (char *)malloc(n); + if (!out) { + return NULL; + } + memcpy(out, s, n); + return out; +} + +static void trim_newlines(char *s) { + if (!s) { + return; + } + size_t n = strlen(s); + while (n > 0 && (s[n - 1] == '\n' || s[n - 1] == '\r')) { + s[--n] = '\0'; + } +} + +static bool git_validate_repo_path(const char *repo_path) { + if (!cbm_validate_shell_arg(repo_path)) { + return false; + } +#ifdef _WIN32 + for (const char *p = repo_path; *p; p++) { + if (*p == '%' || *p == '!' || *p == '^') { + return false; + } + } +#endif + return true; +} + +static int git_capture(const char *repo_path, const char *git_args, char **out) { + if (!out) { + return CBM_NOT_FOUND; + } + *out = NULL; + if (!repo_path || !git_args || !git_validate_repo_path(repo_path)) { + return CBM_NOT_FOUND; + } + + char cmd[GIT_CMD_MAX]; +#ifdef _WIN32 + const char *null_dev = "NUL"; +#else + const char *null_dev = "/dev/null"; +#endif + /* Double quotes work for POSIX shells and cmd.exe. cbm_validate_shell_arg() + * rejects quote/backslash/substitution metacharacters before interpolation. */ + int n = snprintf(cmd, sizeof(cmd), "git -C \"%s\" %s 2>%s", repo_path, git_args, null_dev); + if (n < 0 || n >= (int)sizeof(cmd)) { + return CBM_NOT_FOUND; + } + + FILE *fp = cbm_popen(cmd, "r"); + if (!fp) { + return CBM_NOT_FOUND; + } + + char buf[GIT_OUTPUT_MAX]; + if (!fgets(buf, sizeof(buf), fp)) { + cbm_pclose(fp); + return CBM_NOT_FOUND; + } + trim_newlines(buf); + + int rc = cbm_pclose(fp); + if (rc != 0 || buf[0] == '\0') { + return CBM_NOT_FOUND; + } + + *out = git_strdup(buf); + return *out ? 0 : CBM_NOT_FOUND; +} + +static bool path_is_absolute(const char *path) { + if (!path || !path[0]) { + return false; + } + if (path[0] == '/') { + return true; + } +#ifdef _WIN32 + return isalpha((unsigned char)path[0]) && path[1] == ':'; +#else + return false; +#endif +} + +static char *join_root_relative(const char *root, const char *rel) { + if (!root || !root[0]) { + return git_strdup(rel); + } + int n = snprintf(NULL, 0, "%s/%s", root, rel); + if (n < 0) { + return NULL; + } + char *out = (char *)malloc((size_t)n + 1); + if (!out) { + return NULL; + } + snprintf(out, (size_t)n + 1, "%s/%s", root, rel); + return out; +} + +static char *derive_canonical_root(const char *worktree_root, const char *git_common_dir) { + const char *src = git_common_dir && git_common_dir[0] ? git_common_dir : worktree_root; + if (!src) { + return git_strdup(""); + } + + char *root = path_is_absolute(src) ? git_strdup(src) : join_root_relative(worktree_root, src); + if (!root) { + return NULL; + } + + size_t len = strlen(root); + while (len > 1 && (root[len - 1] == '/' || root[len - 1] == '\\')) { + root[--len] = '\0'; + } + + if (len >= 5 && strcmp(root + len - 5, "/.git") == 0) { + root[len - 5] = '\0'; + } +#ifdef _WIN32 + else if (len >= 5 && strcmp(root + len - 5, "\\.git") == 0) { + root[len - 5] = '\0'; + } +#endif + + return root; +} + +static char *slug_from_branch(const char *branch, bool detached) { + const char *fallback = detached ? "detached" : "working-tree"; + const char *src = detached ? fallback : (branch && branch[0] ? branch : fallback); + size_t len = strlen(src); + char *slug = (char *)malloc(len + 1); + if (!slug) { + return NULL; + } + + size_t j = 0; + bool in_dash = false; + for (size_t i = 0; i < len; i++) { + unsigned char c = (unsigned char)src[i]; + if (isalnum(c) || c == '-' || c == '_' || c == '.') { + if (j == 0 && c == '-') { + in_dash = true; + continue; + } + slug[j++] = (char)c; + in_dash = false; + } else if (j > 0 && !in_dash) { + slug[j++] = '-'; + in_dash = true; + } + } + while (j > 0 && slug[j - 1] == '-') { + j--; + } + slug[j] = '\0'; + + if (slug[0] == '\0') { + free(slug); + return git_strdup(fallback); + } + return slug; +} + +void cbm_git_context_free(cbm_git_context_t *ctx) { + if (!ctx) { + return; + } + free(ctx->input_path); + free(ctx->worktree_root); + free(ctx->git_dir); + free(ctx->git_common_dir); + free(ctx->canonical_root); + free(ctx->branch); + free(ctx->branch_slug); + free(ctx->head_sha); + free(ctx->base_sha); + memset(ctx, 0, sizeof(*ctx)); +} + +int cbm_git_context_resolve(const char *path, cbm_git_context_t *out) { + if (!out) { + return CBM_NOT_FOUND; + } + + memset(out, 0, sizeof(*out)); + if (!path || !path[0]) { + return CBM_NOT_FOUND; + } + + out->input_path = git_strdup(path); + if (!out->input_path) { + return CBM_NOT_FOUND; + } + + struct stat st; + out->root_exists = (stat(path, &st) == 0); + if (!out->root_exists) { + return 0; + } + + if (git_capture(path, "rev-parse --show-toplevel", &out->worktree_root) != 0) { + out->is_git = false; + return 0; + } + out->is_git = true; + + if (git_capture(path, "rev-parse --git-dir", &out->git_dir) != 0) { + out->git_dir = git_strdup(""); + } + if (git_capture(path, "rev-parse --git-common-dir", &out->git_common_dir) != 0) { + out->git_common_dir = git_strdup(""); + } + if (git_capture(path, "rev-parse --verify HEAD", &out->head_sha) != 0) { + out->head_sha = git_strdup(""); + } + + if (git_capture(path, "symbolic-ref --quiet --short HEAD", &out->branch) != 0) { + out->branch = git_strdup("DETACHED"); + out->is_detached = true; + } + + out->is_worktree = + out->git_dir && out->git_common_dir && strcmp(out->git_dir, out->git_common_dir) != 0; + out->canonical_root = derive_canonical_root(out->worktree_root, out->git_common_dir); + out->branch_slug = slug_from_branch(out->branch, out->is_detached); + if (git_capture(path, "merge-base HEAD @{upstream}", &out->base_sha) != 0) { + out->base_sha = git_strdup(""); + } + + if (!out->git_dir || !out->git_common_dir || !out->head_sha || !out->branch || + !out->canonical_root || !out->branch_slug || !out->base_sha) { + cbm_git_context_free(out); + return CBM_NOT_FOUND; + } + + return 0; +} + +char *cbm_git_context_branch_qn(const char *project_name, const cbm_git_context_t *ctx) { + const char *project = project_name && project_name[0] ? project_name : "project"; + const char *slug = "working-tree"; + if (ctx) { + if (ctx->is_detached) { + slug = "detached"; + } else if (ctx->is_git && ctx->branch_slug && ctx->branch_slug[0]) { + slug = ctx->branch_slug; + } + } + + int n = snprintf(NULL, 0, "%s.__branch__.%s", project, slug); + if (n < 0) { + return NULL; + } + char *out = (char *)malloc((size_t)n + 1); + if (!out) { + return NULL; + } + snprintf(out, (size_t)n + 1, "%s.__branch__.%s", project, slug); + return out; +} + +static bool append_fmt_checked(char *buf, int buf_size, int *off, const char *fmt, ...) { + if (!buf || !off || buf_size <= 0 || *off < 0 || *off >= buf_size) { + return false; + } + + va_list ap; + va_start(ap, fmt); + int n = vsnprintf(buf + *off, (size_t)(buf_size - *off), fmt, ap); + va_end(ap); + if (n < 0 || n >= buf_size - *off) { + buf[buf_size - 1] = '\0'; + return false; + } + *off += n; + return true; +} + +static int json_escaped_len(const char *src) { + if (!src) { + return 0; + } + int len = 0; + for (int i = 0; src[i]; i++) { + unsigned char c = (unsigned char)src[i]; + if (c == '"' || c == '\\' || c == '\n' || c == '\r' || c == '\t') { + len += 2; + } else if (c < 0x20) { + continue; + } else { + len++; + } + } + return len; +} + +static bool json_append_bool(char *buf, int buf_size, int *off, const char *name, bool value, + bool comma) { + return append_fmt_checked(buf, buf_size, off, "\"%s\":%s%s", name, value ? "true" : "false", + comma ? "," : ""); +} + +static bool json_append_string(char *buf, int buf_size, int *off, const char *name, + const char *value, bool comma) { + int needed = json_escaped_len(value ? value : ""); + char *escaped = malloc((size_t)needed + 1); + if (!escaped) { + return false; + } + int actual = cbm_json_escape(escaped, needed + 1, value ? value : ""); + bool ok = actual == needed && append_fmt_checked(buf, buf_size, off, "\"%s\":\"%s\"%s", name, + escaped, comma ? "," : ""); + free(escaped); + return ok; +} + +int cbm_git_context_props_json(const cbm_git_context_t *ctx, char *buf, int buf_size) { + if (!ctx || !buf || buf_size <= 0) { + return 0; + } + + int off = 0; + bool ok = + append_fmt_checked(buf, buf_size, &off, "{") && + json_append_bool(buf, buf_size, &off, "is_git", ctx->is_git, true) && + json_append_bool(buf, buf_size, &off, "is_worktree", ctx->is_worktree, true) && + json_append_bool(buf, buf_size, &off, "is_detached", ctx->is_detached, true) && + json_append_bool(buf, buf_size, &off, "root_exists", ctx->root_exists, true) && + json_append_string(buf, buf_size, &off, "canonical_root", ctx->canonical_root, true) && + json_append_string(buf, buf_size, &off, "worktree_root", ctx->worktree_root, true) && + json_append_string(buf, buf_size, &off, "git_common_dir", ctx->git_common_dir, true) && + json_append_string(buf, buf_size, &off, "branch", ctx->branch, true) && + json_append_string(buf, buf_size, &off, "head_sha", ctx->head_sha, true) && + json_append_string(buf, buf_size, &off, "base_sha", ctx->base_sha, false) && + append_fmt_checked(buf, buf_size, &off, "}"); + if (!ok) { + if (buf_size > 0) { + buf[0] = '\0'; + } + return 0; + } + return off; +} diff --git a/src/git/git_context.h b/src/git/git_context.h new file mode 100644 index 00000000..876309eb --- /dev/null +++ b/src/git/git_context.h @@ -0,0 +1,27 @@ +#ifndef CBM_GIT_CONTEXT_H +#define CBM_GIT_CONTEXT_H + +#include + +typedef struct { + bool is_git; + bool is_worktree; + bool is_detached; + bool root_exists; + char *input_path; + char *worktree_root; + char *git_dir; + char *git_common_dir; + char *canonical_root; + char *branch; + char *branch_slug; + char *head_sha; + char *base_sha; +} cbm_git_context_t; + +int cbm_git_context_resolve(const char *path, cbm_git_context_t *out); +void cbm_git_context_free(cbm_git_context_t *ctx); +char *cbm_git_context_branch_qn(const char *project_name, const cbm_git_context_t *ctx); +int cbm_git_context_props_json(const cbm_git_context_t *ctx, char *buf, int buf_size); + +#endif diff --git a/src/mcp/mcp.c b/src/mcp/mcp.c index 8f75a260..22d172f9 100644 --- a/src/mcp/mcp.c +++ b/src/mcp/mcp.c @@ -44,6 +44,7 @@ enum { #include "cypher/cypher.h" #include "pipeline/pipeline.h" #include "pipeline/pass_cross_repo.h" +#include "git/git_context.h" #include "cli/cli.h" #include "watcher/watcher.h" #include "foundation/mem.h" @@ -847,6 +848,37 @@ static int collect_db_project_names(const char *dir_path, char *out, size_t out_ return count; } +static void add_git_context_string(yyjson_mut_doc *doc, yyjson_mut_val *obj, const char *key, + const char *value) { + if (value) { + yyjson_mut_obj_add_strcpy(doc, obj, key, value); + } else { + yyjson_mut_obj_add_null(doc, obj, key); + } +} + +static void add_git_context_json(yyjson_mut_doc *doc, yyjson_mut_val *obj, const char *root_path) { + cbm_git_context_t ctx = {0}; + (void)cbm_git_context_resolve(root_path, &ctx); + + yyjson_mut_val *git = yyjson_mut_obj(doc); + yyjson_mut_obj_add_bool(doc, git, "is_git", ctx.is_git); + yyjson_mut_obj_add_bool(doc, git, "is_worktree", ctx.is_worktree); + yyjson_mut_obj_add_bool(doc, git, "is_detached", ctx.is_detached); + yyjson_mut_obj_add_bool(doc, git, "root_exists", ctx.root_exists); + add_git_context_string(doc, git, "worktree_root", ctx.worktree_root); + add_git_context_string(doc, git, "git_dir", ctx.git_dir); + add_git_context_string(doc, git, "git_common_dir", ctx.git_common_dir); + add_git_context_string(doc, git, "canonical_root", ctx.canonical_root); + add_git_context_string(doc, git, "branch", ctx.branch); + add_git_context_string(doc, git, "branch_slug", ctx.branch_slug); + add_git_context_string(doc, git, "head_sha", ctx.head_sha); + add_git_context_string(doc, git, "base_sha", ctx.base_sha); + yyjson_mut_obj_add_val(doc, obj, "git", git); + + cbm_git_context_free(&ctx); +} + /* Build a helpful error listing available projects. Caller must free() result. */ static char *build_project_list_error(const char *reason) { char dir_path[CBM_SZ_1K]; @@ -934,6 +966,7 @@ static void build_project_json_entry(yyjson_mut_doc *doc, yyjson_mut_val *arr, c yyjson_mut_val *p = yyjson_mut_obj(doc); yyjson_mut_obj_add_strcpy(doc, p, "name", project_name); yyjson_mut_obj_add_strcpy(doc, p, "root_path", root_path_buf); + add_git_context_json(doc, p, root_path_buf[0] ? root_path_buf : NULL); yyjson_mut_obj_add_int(doc, p, "nodes", nodes); yyjson_mut_obj_add_int(doc, p, "edges", edges); yyjson_mut_obj_add_int(doc, p, "size_bytes", (int64_t)st->st_size); @@ -1687,6 +1720,15 @@ static char *handle_index_status(cbm_mcp_server_t *srv, const char *args) { yyjson_mut_obj_add_int(doc, root, "nodes", nodes); yyjson_mut_obj_add_int(doc, root, "edges", edges); yyjson_mut_obj_add_str(doc, root, "status", nodes > 0 ? "ready" : "empty"); + cbm_project_t proj_info = {0}; + if (cbm_store_get_project(store, project, &proj_info) == CBM_STORE_OK) { + yyjson_mut_obj_add_strcpy(doc, root, "root_path", + proj_info.root_path ? proj_info.root_path : ""); + add_git_context_json(doc, root, proj_info.root_path); + safe_str_free(&proj_info.name); + safe_str_free(&proj_info.indexed_at); + safe_str_free(&proj_info.root_path); + } if (nodes == 0) { yyjson_mut_obj_add_str( doc, root, "hint", diff --git a/src/pipeline/pipeline.c b/src/pipeline/pipeline.c index 7062eb87..389601dc 100644 --- a/src/pipeline/pipeline.c +++ b/src/pipeline/pipeline.c @@ -20,6 +20,7 @@ enum { CBM_DIR_PERMS = 0755, PL_RING = 4, PL_RING_MASK = 3, PL_SEQ_PASSES = 6, P #include "pipeline/pass_lsp_cross.h" #include "pipeline/worker_pool.h" #include "graph_buffer/graph_buffer.h" +#include "git/git_context.h" #include "store/store.h" #include "discover/discover.h" #include "discover/userconfig.h" @@ -73,6 +74,8 @@ struct cbm_pipeline { char *repo_path; char *db_path; char *project_name; + cbm_git_context_t git_ctx; + char *branch_qn; cbm_index_mode_t mode; atomic_int cancelled; bool persistence; /* write .codebase-memory/graph.db.zst after indexing */ @@ -132,6 +135,8 @@ cbm_pipeline_t *cbm_pipeline_new(const char *repo_path, const char *db_path, p->repo_path = strdup(repo_path); p->db_path = db_path ? strdup(db_path) : NULL; p->project_name = cbm_project_name_from_path(repo_path); + (void)cbm_git_context_resolve(repo_path, &p->git_ctx); + p->branch_qn = cbm_git_context_branch_qn(p->project_name, &p->git_ctx); p->mode = mode; p->persistence = false; atomic_init(&p->cancelled, 0); @@ -152,6 +157,8 @@ void cbm_pipeline_free(cbm_pipeline_t *p) { free(p->repo_path); free(p->db_path); free(p->project_name); + free(p->branch_qn); + cbm_git_context_free(&p->git_ctx); /* gbuf, store, registry freed during/after run */ /* Defensively free userconfig in case run() was never called or panicked */ if (p->userconfig) { @@ -234,7 +241,7 @@ static void create_folder_chain(cbm_pipeline_t *p, const char *dir, CBMHashTable const char *pqn; char *pqn_heap = NULL; if (pdir[0] == '\0') { - pqn = p->project_name; + pqn = p->branch_qn ? p->branch_qn : p->project_name; } else { pqn_heap = cbm_pipeline_fqn_folder(p->project_name, pdir); pqn = pqn_heap; @@ -262,6 +269,22 @@ static int pass_structure(cbm_pipeline_t *p, const cbm_file_info_t *files, int f /* Project node */ cbm_gbuf_upsert_node(p->gbuf, "Project", p->project_name, p->project_name, NULL, 0, 0, "{}"); + const char *branch_qn = p->branch_qn ? p->branch_qn : p->project_name; + const char *branch_name = p->git_ctx.branch ? p->git_ctx.branch : "working-tree"; + char branch_props[CBM_SZ_2K]; + const char *branch_props_json = "{}"; + if (cbm_git_context_props_json(&p->git_ctx, branch_props, sizeof(branch_props)) > 0) { + branch_props_json = branch_props; + } + if (p->branch_qn) { + int64_t branch_id = cbm_gbuf_upsert_node(p->gbuf, "Branch", branch_name, branch_qn, NULL, 0, + 0, branch_props_json); + const cbm_gbuf_node_t *project_node = cbm_gbuf_find_by_qn(p->gbuf, p->project_name); + if (project_node && branch_id > 0) { + cbm_gbuf_insert_edge(p->gbuf, project_node->id, branch_id, "HAS_BRANCH", + branch_props_json); + } + } /* Collect unique directories and create Folder/Package nodes */ CBMHashTable *seen_dirs = cbm_ht_create(CBM_SZ_256); @@ -301,7 +324,7 @@ static int pass_structure(cbm_pipeline_t *p, const cbm_file_info_t *files, int f const char *parent_qn; char *parent_qn_heap = NULL; if (dir[0] == '\0') { - parent_qn = p->project_name; + parent_qn = branch_qn; } else { parent_qn_heap = cbm_pipeline_fqn_folder(p->project_name, dir); parent_qn = parent_qn_heap; diff --git a/tests/test_mcp.c b/tests/test_mcp.c index 04796242..eeee4842 100644 --- a/tests/test_mcp.c +++ b/tests/test_mcp.c @@ -496,6 +496,30 @@ TEST(tool_index_status_no_project) { PASS(); } +TEST(tool_index_status_includes_git_metadata) { + char tmp[256]; + cbm_mcp_server_t *srv = setup_snippet_server(tmp, sizeof(tmp)); + ASSERT_NOT_NULL(srv); + + char *resp = + cbm_mcp_server_handle(srv, "{\"jsonrpc\":\"2.0\",\"id\":16,\"method\":\"tools/call\"," + "\"params\":{\"name\":\"index_status\"," + "\"arguments\":{\"project\":\"test-project\"}}}"); + ASSERT_NOT_NULL(resp); + char *inner = extract_text_content(resp); + ASSERT_NOT_NULL(inner); + ASSERT_NOT_NULL(strstr(inner, "\"root_path\"")); + ASSERT_NOT_NULL(strstr(inner, "\"git\"")); + ASSERT_NOT_NULL(strstr(inner, "\"is_git\":false")); + ASSERT_NOT_NULL(strstr(inner, "\"root_exists\":true")); + + free(inner); + free(resp); + cbm_mcp_server_free(srv); + cleanup_snippet_dir(tmp); + PASS(); +} + /* ══════════════════════════════════════════════════════════════════ * TOOL HANDLERS WITH DATA * ══════════════════════════════════════════════════════════════════ */ @@ -1921,6 +1945,7 @@ SUITE(mcp) { RUN_TEST(tool_search_graph_includes_node_properties); RUN_TEST(tool_query_graph_basic); RUN_TEST(tool_index_status_no_project); + RUN_TEST(tool_index_status_includes_git_metadata); /* Tool handlers with validation */ RUN_TEST(tool_trace_call_path_not_found); diff --git a/tests/test_pipeline.c b/tests/test_pipeline.c index 0dbe9fb9..bad24c0e 100644 --- a/tests/test_pipeline.c +++ b/tests/test_pipeline.c @@ -10,6 +10,7 @@ #include "pipeline/pipeline.h" #include "pipeline/pipeline_internal.h" #include "store/store.h" +#include "git/git_context.h" #include #include @@ -302,6 +303,93 @@ TEST(pipeline_structure_edges) { PASS(); } +TEST(pipeline_branch_root_structure) { + if (setup_test_repo() != 0) { + SKIP("failed to create temp dir"); + } + + char db_path[512]; + snprintf(db_path, sizeof(db_path), "%s/test_branch_root.db", g_tmpdir); + + cbm_pipeline_t *p = cbm_pipeline_new(g_tmpdir, db_path, CBM_MODE_FULL); + ASSERT_NOT_NULL(p); + int rc = cbm_pipeline_run(p); + ASSERT_EQ(rc, 0); + + cbm_store_t *s = cbm_store_open_path(db_path); + ASSERT_NOT_NULL(s); + const char *project = cbm_pipeline_project_name(p); + + char branch_qn[1024]; + snprintf(branch_qn, sizeof(branch_qn), "%s.__branch__.working-tree", project); + + cbm_node_t project_node = {0}; + cbm_node_t branch_node = {0}; + cbm_node_t root_file_node = {0}; + cbm_node_t root_folder_node = {0}; + rc = cbm_store_find_node_by_qn(s, project, project, &project_node); + ASSERT_EQ(rc, CBM_STORE_OK); + rc = cbm_store_find_node_by_qn(s, project, branch_qn, &branch_node); + ASSERT_EQ(rc, CBM_STORE_OK); + ASSERT_STR_EQ(branch_node.label, "Branch"); + ASSERT_STR_EQ(branch_node.name, "working-tree"); + ASSERT_NOT_NULL(strstr(branch_node.properties_json, "\"is_git\":false")); + char *root_folder_qn = cbm_pipeline_fqn_folder(project, "pkg"); + char *root_file_qn = cbm_pipeline_fqn_compute(project, "main.go", "__file__"); + ASSERT_NOT_NULL(root_folder_qn); + ASSERT_NOT_NULL(root_file_qn); + rc = cbm_store_find_node_by_qn(s, project, root_folder_qn, &root_folder_node); + ASSERT_EQ(rc, CBM_STORE_OK); + rc = cbm_store_find_node_by_qn(s, project, root_file_qn, &root_file_node); + ASSERT_EQ(rc, CBM_STORE_OK); + + cbm_edge_t *has_branch = NULL; + int has_branch_count = 0; + rc = cbm_store_find_edges_by_source_type(s, project_node.id, "HAS_BRANCH", &has_branch, + &has_branch_count); + ASSERT_EQ(rc, CBM_STORE_OK); + ASSERT_EQ(has_branch_count, 1); + ASSERT_EQ(has_branch[0].target_id, branch_node.id); + + cbm_edge_t *project_files = NULL; + int project_file_count = 0; + rc = cbm_store_find_edges_by_source_type(s, project_node.id, "CONTAINS_FILE", &project_files, + &project_file_count); + ASSERT_EQ(rc, CBM_STORE_OK); + ASSERT_EQ(project_file_count, 0); + + cbm_edge_t *branch_files = NULL; + int branch_file_count = 0; + rc = cbm_store_find_edges_by_source_type(s, branch_node.id, "CONTAINS_FILE", &branch_files, + &branch_file_count); + ASSERT_EQ(rc, CBM_STORE_OK); + ASSERT_EQ(branch_file_count, 1); + ASSERT_EQ(branch_files[0].target_id, root_file_node.id); + + cbm_edge_t *branch_folders = NULL; + int branch_folder_count = 0; + rc = cbm_store_find_edges_by_source_type(s, branch_node.id, "CONTAINS_FOLDER", &branch_folders, + &branch_folder_count); + ASSERT_EQ(rc, CBM_STORE_OK); + ASSERT_EQ(branch_folder_count, 1); + ASSERT_EQ(branch_folders[0].target_id, root_folder_node.id); + + cbm_store_free_edges(has_branch, has_branch_count); + cbm_store_free_edges(project_files, project_file_count); + cbm_store_free_edges(branch_files, branch_file_count); + cbm_store_free_edges(branch_folders, branch_folder_count); + cbm_node_free_fields(&project_node); + cbm_node_free_fields(&branch_node); + cbm_node_free_fields(&root_file_node); + cbm_node_free_fields(&root_folder_node); + free(root_folder_qn); + free(root_file_qn); + cbm_store_close(s); + cbm_pipeline_free(p); + teardown_test_repo(); + PASS(); +} + TEST(pipeline_project_name_derived) { if (setup_test_repo() != 0) { SKIP("failed to create temp dir"); @@ -572,7 +660,7 @@ TEST(githistory_coupling_carries_last_co_change) { {files_old, 2, 1700000000LL}, /* oldest a.go/b.go co-change */ {files_other, 2, 1750000000LL}, /* unrelated pair */ {files_mid, 2, 1720000000LL}, - {files_new, 2, 1800000000LL}, /* newest a.go/b.go co-change */ + {files_new, 2, 1800000000LL}, /* newest a.go/b.go co-change */ }; cbm_change_coupling_t results[16]; @@ -581,10 +669,9 @@ TEST(githistory_coupling_carries_last_co_change) { bool found_ab = false; for (int i = 0; i < n; i++) { - bool is_ab = (strcmp(results[i].file_a, "a.go") == 0 && - strcmp(results[i].file_b, "b.go") == 0) || - (strcmp(results[i].file_a, "b.go") == 0 && - strcmp(results[i].file_b, "a.go") == 0); + bool is_ab = + (strcmp(results[i].file_a, "a.go") == 0 && strcmp(results[i].file_b, "b.go") == 0) || + (strcmp(results[i].file_a, "b.go") == 0 && strcmp(results[i].file_b, "a.go") == 0); if (!is_ab) { continue; } @@ -1957,6 +2044,132 @@ TEST(project_name_from_path) { PASS(); } +static const char *test_null_dev(void) { +#ifdef _WIN32 + return "NUL"; +#else + return "/dev/null"; +#endif +} + +static bool git_available(void) { + char cmd[128]; + snprintf(cmd, sizeof(cmd), "git --version >%s 2>&1", test_null_dev()); + int rc = system(cmd); + return rc == 0; +} + +static int run_cmd(const char *cmd) { + return system(cmd); +} + +TEST(git_context_non_git_path) { + char *tmp = th_mktempdir("cbm_gitctx_nongit"); + ASSERT_NOT_NULL(tmp); + + cbm_git_context_t ctx = {0}; + ASSERT_EQ(cbm_git_context_resolve(tmp, &ctx), 0); + ASSERT_FALSE(ctx.is_git); + ASSERT_TRUE(ctx.root_exists); + + char *qn = cbm_git_context_branch_qn("proj", &ctx); + ASSERT_NOT_NULL(qn); + ASSERT_STR_EQ(qn, "proj.__branch__.working-tree"); + free(qn); + + char json[1024]; + ASSERT_GT(cbm_git_context_props_json(&ctx, json, sizeof(json)), 0); + ASSERT_NOT_NULL(strstr(json, "\"is_git\":false")); + ASSERT_NOT_NULL(strstr(json, "\"root_exists\":true")); + + char long_value[1200]; + memset(long_value, 'a', sizeof(long_value) - 1); + long_value[sizeof(long_value) - 1] = '\0'; + cbm_git_context_t long_ctx = { + .root_exists = true, + .canonical_root = long_value, + }; + char small_json[64]; + ASSERT_EQ(cbm_git_context_props_json(&long_ctx, small_json, sizeof(small_json)), 0); + + cbm_git_context_free(&ctx); + th_rmtree(tmp); + PASS(); +} + +TEST(git_context_linked_worktree) { + if (!git_available()) { + SKIP("git unavailable"); + } + + char *tmp = th_mktempdir("cbm_gitctx_repo"); + ASSERT_NOT_NULL(tmp); + + char repo[512], wt[512], cmd[2048]; + int n = snprintf(repo, sizeof(repo), "%s/repo with space", tmp); + ASSERT_TRUE(n > 0 && n < (int)sizeof(repo)); + n = snprintf(wt, sizeof(wt), "%s/wt with space", tmp); + ASSERT_TRUE(n > 0 && n < (int)sizeof(wt)); + ASSERT_EQ(th_mkdir_p(repo), 0); + + const char *null_dev = test_null_dev(); + snprintf(cmd, sizeof(cmd), "git -C \"%s\" init >%s 2>&1", repo, null_dev); + ASSERT_EQ(run_cmd(cmd), 0); + snprintf(cmd, sizeof(cmd), "git -C \"%s\" checkout -b main >%s 2>&1", repo, null_dev); + ASSERT_EQ(run_cmd(cmd), 0); + ASSERT_EQ(th_write_file(TH_PATH(repo, "file.txt"), "hello\n"), 0); + snprintf(cmd, sizeof(cmd), "git -C \"%s\" add file.txt >%s 2>&1", repo, null_dev); + ASSERT_EQ(run_cmd(cmd), 0); + snprintf(cmd, sizeof(cmd), + "git -C \"%s\" -c user.name=\"CBM Test\" -c user.email=\"cbm@example.invalid\" " + "commit -m \"initial\" >%s 2>&1", + repo, null_dev); + ASSERT_EQ(run_cmd(cmd), 0); + snprintf(cmd, sizeof(cmd), "git -C \"%s\" worktree add -b feature/git-context \"%s\" >%s 2>&1", + repo, wt, null_dev); + ASSERT_EQ(run_cmd(cmd), 0); + + cbm_git_context_t main_ctx = {0}; + cbm_git_context_t wt_ctx = {0}; + ASSERT_EQ(cbm_git_context_resolve(repo, &main_ctx), 0); + ASSERT_EQ(cbm_git_context_resolve(wt, &wt_ctx), 0); + + ASSERT_TRUE(main_ctx.is_git); + ASSERT_FALSE(main_ctx.is_worktree); + ASSERT_TRUE(wt_ctx.is_git); + ASSERT_TRUE(wt_ctx.is_worktree); + ASSERT_STR_EQ(main_ctx.canonical_root, wt_ctx.canonical_root); + ASSERT_NOT_NULL(wt_ctx.branch); + ASSERT_STR_EQ(wt_ctx.branch, "feature/git-context"); + ASSERT_STR_EQ(wt_ctx.branch_slug, "feature-git-context"); + ASSERT_NOT_NULL(wt_ctx.head_sha); + + char *qn = cbm_git_context_branch_qn("proj", &wt_ctx); + ASSERT_NOT_NULL(qn); + ASSERT_STR_EQ(qn, "proj.__branch__.feature-git-context"); + free(qn); + + char json[2048]; + ASSERT_GT(cbm_git_context_props_json(&wt_ctx, json, sizeof(json)), 0); + ASSERT_NOT_NULL(strstr(json, "\"is_git\":true")); + ASSERT_NOT_NULL(strstr(json, "\"is_worktree\":true")); + ASSERT_NOT_NULL(strstr(json, "\"branch\":\"feature/git-context\"")); + + cbm_git_context_free(&main_ctx); + cbm_git_context_free(&wt_ctx); + + snprintf(cmd, sizeof(cmd), "git -C \"%s\" checkout --detach HEAD >%s 2>&1", repo, null_dev); + ASSERT_EQ(run_cmd(cmd), 0); + cbm_git_context_t detached_ctx = {0}; + ASSERT_EQ(cbm_git_context_resolve(repo, &detached_ctx), 0); + ASSERT_TRUE(detached_ctx.is_detached); + ASSERT_STR_EQ(detached_ctx.branch_slug, "detached"); + cbm_git_context_free(&detached_ctx); + + th_rmtree(tmp); + PASS(); +} + TEST(project_name_uniqueness) { /* Port of TestProjectNameUniqueness */ char *a = cbm_project_name_from_path("/tmp/bench/zig/lib/std"); @@ -5382,6 +5595,7 @@ SUITE(pipeline) { /* Integration: structure pass */ RUN_TEST(pipeline_structure_nodes); RUN_TEST(pipeline_structure_edges); + RUN_TEST(pipeline_branch_root_structure); RUN_TEST(pipeline_project_name_derived); RUN_TEST(pipeline_fast_mode); /* Definitions pass */ @@ -5427,6 +5641,8 @@ SUITE(pipeline) { RUN_TEST(pipeline_docstring_go_class); /* Project name */ RUN_TEST(project_name_from_path); + RUN_TEST(git_context_non_git_path); + RUN_TEST(git_context_linked_worktree); RUN_TEST(project_name_uniqueness); /* Git diff helpers */ RUN_TEST(gitdiff_parse_range_with_count);