From 044b0dcdb8cbd3b7086b7dbffac8d5eb67d38372 Mon Sep 17 00:00:00 2001 From: lovanshu garg Date: Fri, 22 May 2026 18:04:00 +0530 Subject: [PATCH 1/2] feat(ladybug): added provider for graph --- bun.lock | 70 +++++- packages/config/src/schema-fields.ts | 4 + packages/config/src/schema.ts | 3 + packages/ingest-github/package.json | 1 + packages/ingest-github/src/bootstrap.ts | 1 + packages/ladybug/README.md | 64 ++++++ packages/ladybug/package.json | 22 ++ packages/ladybug/src/README.md | 24 ++ packages/ladybug/src/client.ts | 254 ++++++++++++++++++++++ packages/ladybug/src/fileVersions.ts | 51 +++++ packages/ladybug/src/files.ts | 193 ++++++++++++++++ packages/ladybug/src/flatFolderIndexes.ts | 5 + packages/ladybug/src/folder.ts | 78 +++++++ packages/ladybug/src/index.ts | 27 +++ packages/ladybug/src/indexes.ts | 5 + packages/ladybug/src/knowledge.ts | 146 +++++++++++++ packages/ladybug/src/provider.ts | 58 +++++ packages/ladybug/src/repo.ts | 85 ++++++++ packages/ladybug/tsconfig.json | 8 + packages/server/package.json | 1 + packages/server/src/index.ts | 2 + packages/types/src/config.ts | 1 + tsconfig.json | 3 +- 23 files changed, 1104 insertions(+), 2 deletions(-) create mode 100644 packages/ladybug/README.md create mode 100644 packages/ladybug/package.json create mode 100644 packages/ladybug/src/README.md create mode 100644 packages/ladybug/src/client.ts create mode 100644 packages/ladybug/src/fileVersions.ts create mode 100644 packages/ladybug/src/files.ts create mode 100644 packages/ladybug/src/flatFolderIndexes.ts create mode 100644 packages/ladybug/src/folder.ts create mode 100644 packages/ladybug/src/index.ts create mode 100644 packages/ladybug/src/indexes.ts create mode 100644 packages/ladybug/src/knowledge.ts create mode 100644 packages/ladybug/src/provider.ts create mode 100644 packages/ladybug/src/repo.ts create mode 100644 packages/ladybug/tsconfig.json diff --git a/bun.lock b/bun.lock index 7d36ab2..71e2bf6 100644 --- a/bun.lock +++ b/bun.lock @@ -109,6 +109,7 @@ "@bb/errors": "workspace:*", "@bb/graph-core": "workspace:*", "@bb/graph-db": "workspace:*", + "@bb/ladybug": "workspace:*", "@bb/llm": "workspace:*", "@bb/logger": "workspace:*", "@bb/mongo": "workspace:*", @@ -118,6 +119,18 @@ "@bb/types": "workspace:*", }, }, + "packages/ladybug": { + "name": "@bb/ladybug", + "version": "0.0.0", + "dependencies": { + "@bb/config": "workspace:*", + "@bb/errors": "workspace:*", + "@bb/graph-core": "workspace:*", + "@bb/graph-db": "workspace:*", + "@bb/types": "workspace:*", + "@ladybugdb/core": "^0.16.1", + }, + }, "packages/llm": { "name": "@bb/llm", "version": "0.0.0", @@ -216,6 +229,7 @@ "@bb/errors": "workspace:*", "@bb/graph-db": "workspace:*", "@bb/ingest-github": "workspace:*", + "@bb/ladybug": "workspace:*", "@bb/mcp": "workspace:*", "@bb/mongo": "workspace:*", "@bb/neo4j": "workspace:*", @@ -270,6 +284,8 @@ "@bb/ingest-github": ["@bb/ingest-github@workspace:packages/ingest-github"], + "@bb/ladybug": ["@bb/ladybug@workspace:packages/ladybug"], + "@bb/llm": ["@bb/llm@workspace:packages/llm"], "@bb/logger": ["@bb/logger@workspace:packages/logger"], @@ -360,6 +376,20 @@ "@ioredis/commands": ["@ioredis/commands@1.5.1", "", {}, "sha512-JH8ZL/ywcJyR9MmJ5BNqZllXNZQqQbnVZOqpPQqE1vHiFgAw4NHbvE0FOduNU8IX9babitBT46571OnPTT0Zcw=="], + "@isaacs/fs-minipass": ["@isaacs/fs-minipass@4.0.1", "", { "dependencies": { "minipass": "^7.0.4" } }, "sha512-wgm9Ehl2jpeqP3zw/7mo3kRHFp5MEDhqAdwy1fTGkHAwnkGOVsgpvQhL8B5n1qlb01jV3n/bI0ZfZp5lWA1k4w=="], + + "@ladybugdb/core": ["@ladybugdb/core@0.16.1", "", { "dependencies": { "cmake-js": "^8.0.0", "node-addon-api": "^6.0.0" }, "optionalDependencies": { "@ladybugdb/core-darwin-arm64": "0.16.1", "@ladybugdb/core-darwin-x64": "0.16.1", "@ladybugdb/core-linux-arm64": "0.16.1", "@ladybugdb/core-linux-x64": "0.16.1", "@ladybugdb/core-win32-x64": "0.16.1" } }, "sha512-qwuEcR8CVMKb6tNDaHtq7Ux8hT/XbPC0db+vwutX6JxNAejyx7YomHKPSy9XAKURhYK8mezZe3UN8rf+xpHOjQ=="], + + "@ladybugdb/core-darwin-arm64": ["@ladybugdb/core-darwin-arm64@0.16.1", "", { "os": "darwin", "cpu": "arm64" }, "sha512-Nl+Cf70rD+HaC9IBHv+oeUwqX9plghXD7PN9tyMzMohRVPvcGEbqWPB6YcdJa8rR7qRqCCbmaNMDen5wg4rY2w=="], + + "@ladybugdb/core-darwin-x64": ["@ladybugdb/core-darwin-x64@0.16.1", "", { "os": "darwin", "cpu": "x64" }, "sha512-4eAjfimAAQRSmDfUUkGrl9OhefxcW1ziA9tl0eljBlGoUseE7dL02+RSqjGohYMcQ+lzuHAq1QWb0XRlMA8YTQ=="], + + "@ladybugdb/core-linux-arm64": ["@ladybugdb/core-linux-arm64@0.16.1", "", { "os": "linux", "cpu": "arm64" }, "sha512-zkctksev+hsPFrNxHHdq4lYK5OWdLhWfRdQzjzkgDyaHayHU6yCL2fgD6uPGQ8TRQ6/2DxMErb4p3FzGW85Ubw=="], + + "@ladybugdb/core-linux-x64": ["@ladybugdb/core-linux-x64@0.16.1", "", { "os": "linux", "cpu": "x64" }, "sha512-5rAb9T5vif8WKhHwhobosu2/aiOwJkWb/ViybvUc5GFKunKl8VI6RmZQVeufT9zUzRktUwrxBrxblCxsnamXJw=="], + + "@ladybugdb/core-win32-x64": ["@ladybugdb/core-win32-x64@0.16.1", "", { "os": "win32", "cpu": "x64" }, "sha512-ShOUTrIuZKQ63J95tcRJxKf1cvg8yi2FSYx9kMTSercc1FdQZPV+zxUN0myMq3MTWOl7xDxsVMmdp/t80O29UQ=="], + "@modelcontextprotocol/sdk": ["@modelcontextprotocol/sdk@1.29.0", "", { "dependencies": { "@hono/node-server": "^1.19.9", "ajv": "^8.17.1", "ajv-formats": "^3.0.1", "content-type": "^1.0.5", "cors": "^2.8.5", "cross-spawn": "^7.0.5", "eventsource": "^3.0.2", "eventsource-parser": "^3.0.0", "express": "^5.2.1", "express-rate-limit": "^8.2.1", "hono": "^4.11.4", "jose": "^6.1.3", "json-schema-typed": "^8.0.2", "pkce-challenge": "^5.0.0", "raw-body": "^3.0.0", "zod": "^3.25 || ^4.0", "zod-to-json-schema": "^3.25.1" }, "peerDependencies": { "@cfworker/json-schema": "^4.1.1" }, "optionalPeers": ["@cfworker/json-schema"] }, "sha512-zo37mZA9hJWpULgkRpowewez1y6ML5GsXJPY8FI0tBBCd77HEvza4jDqRKOXgHNn867PVGCyTdzqpz0izu5ZjQ=="], "@mongodb-js/saslprep": ["@mongodb-js/saslprep@1.4.9", "", { "dependencies": { "sparse-bitfield": "^3.0.3" } }, "sha512-RXSxsokhAF/4nWys8An8npsqOI33Ex1Hlzqjw2pZOO+GKtMAR2noGnUdsFiGwsaO/xXI+56mtjTmDA3JXJsvmA=="], @@ -490,6 +520,8 @@ "chalk": ["chalk@5.6.2", "", {}, "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA=="], + "chownr": ["chownr@3.0.0", "", {}, "sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g=="], + "cli-boxes": ["cli-boxes@4.0.1", "", {}, "sha512-5IOn+jcCEHEraYolBPs/sT4BxYCe2nHg374OPiItB1O96KZFseS2gthU4twyYzeDcFew4DaUM/xwc5BQf08JJw=="], "cli-cursor": ["cli-cursor@4.0.0", "", { "dependencies": { "restore-cursor": "^4.0.0" } }, "sha512-VGtlMu3x/4DOtIUwEkRezxUZ2lBacNJCHash0N0WeZDBS+7Ux1dm3XWAgWYxLJFMMdOeXMHXorshEFhbMSGelg=="], @@ -500,6 +532,8 @@ "cluster-key-slot": ["cluster-key-slot@1.1.2", "", {}, "sha512-RMr0FhtfXemyinomL4hrWcYJxmX6deFdCxpJzhDttxgO1+bcCnkk+9drydLVDmAMG7NE6aN/fl4F7ucU/90gAA=="], + "cmake-js": ["cmake-js@8.0.0", "", { "dependencies": { "debug": "^4.4.3", "fs-extra": "^11.3.3", "node-api-headers": "^1.8.0", "rc": "1.2.8", "semver": "^7.7.3", "tar": "^7.5.6", "url-join": "^4.0.1", "which": "^6.0.0", "yargs": "^17.7.2" }, "bin": { "cmake-js": "bin/cmake-js" } }, "sha512-YbUP88RDwCvoQkZhRtGURYm9RIpWdtvZuhT87fKNoLjk8kIFIFeARpKfuZQGdwfH99GZpUmqSfcDrK62X7lTgg=="], + "code-excerpt": ["code-excerpt@4.0.0", "", { "dependencies": { "convert-to-spaces": "^2.0.1" } }, "sha512-xxodCmBen3iy2i0WtAK8FlFNrRzjUqjRsMfho58xT/wvZU1YTM3fCnRjcy1gJPMepaRlgm/0e6w8SpWHpn3/cA=="], "color": ["color@5.0.3", "", { "dependencies": { "color-convert": "^3.1.3", "color-string": "^2.1.3" } }, "sha512-ezmVcLR3xAVp8kYOm4GS45ZLLgIE6SPAFoduLr6hTDajwb3KZ2F46gulK3XpcwRFb5KKGCSezCBAY4Dw4HsyXA=="], @@ -546,6 +580,8 @@ "debug": ["debug@4.4.3", "", { "dependencies": { "ms": "^2.1.3" } }, "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA=="], + "deep-extend": ["deep-extend@0.6.0", "", {}, "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA=="], + "deep-is": ["deep-is@0.1.4", "", {}, "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ=="], "denque": ["denque@2.1.0", "", {}, "sha512-HVQE3AAb/pxF8fQAoiqpvg9i3evqug3hoiwakOyZAwJm+6vZehbkYXZ0l4JxS+I3QxM97v5aaRNhj8v5oBhekw=="], @@ -650,6 +686,8 @@ "fresh": ["fresh@2.0.0", "", {}, "sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A=="], + "fs-extra": ["fs-extra@11.3.5", "", { "dependencies": { "graceful-fs": "^4.2.0", "jsonfile": "^6.0.1", "universalify": "^2.0.0" } }, "sha512-eKpRKAovdpZtR1WopLHxlBWvAgPny3c4gX1G5Jhwmmw4XJj0ifSD5qB5TOo8hmA0wlRKDAOAhEE1yVPgs6Fgcg=="], + "function-bind": ["function-bind@1.1.2", "", {}, "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA=="], "get-caller-file": ["get-caller-file@2.0.5", "", {}, "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg=="], @@ -670,6 +708,8 @@ "gopd": ["gopd@1.2.0", "", {}, "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg=="], + "graceful-fs": ["graceful-fs@4.2.11", "", {}, "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ=="], + "has-symbols": ["has-symbols@1.1.0", "", {}, "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ=="], "hasown": ["hasown@2.0.3", "", { "dependencies": { "function-bind": "^1.1.2" } }, "sha512-ej4AhfhfL2Q2zpMmLo7U1Uv9+PyhIZpgQLGT1F9miIGmiCJIoCgSmczFdrc97mWT4kVY72KA+WnnhJ5pghSvSg=="], @@ -696,7 +736,7 @@ "inherits": ["inherits@2.0.4", "", {}, "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="], - "ini": ["ini@6.0.0", "", {}, "sha512-IBTdIkzZNOpqm7q3dRqJvMaldXjDHWkEDfrwGEQTs5eaQMWV+djAhR+wahyNNMAa+qpbDUhBMVt4ZKNwpPm7xQ=="], + "ini": ["ini@1.3.8", "", {}, "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew=="], "ink": ["ink@7.0.1", "", { "dependencies": { "@alcalzone/ansi-tokenize": "^0.3.0", "ansi-escapes": "^7.3.0", "ansi-styles": "^6.2.3", "auto-bind": "^5.0.1", "chalk": "^5.6.2", "cli-boxes": "^4.0.1", "cli-cursor": "^4.0.0", "cli-truncate": "^6.0.0", "code-excerpt": "^4.0.0", "es-toolkit": "^1.45.1", "indent-string": "^5.0.0", "is-in-ci": "^2.0.0", "patch-console": "^2.0.0", "react-reconciler": "^0.33.0", "scheduler": "^0.27.0", "signal-exit": "^3.0.7", "slice-ansi": "^9.0.0", "stack-utils": "^2.0.6", "string-width": "^8.2.0", "terminal-size": "^4.0.1", "type-fest": "^5.5.0", "widest-line": "^6.0.0", "wrap-ansi": "^10.0.0", "ws": "^8.20.0", "yoga-layout": "~3.2.1" }, "peerDependencies": { "@types/react": ">=19.2.0", "react": ">=19.2.0", "react-devtools-core": ">=6.1.2" }, "optionalPeers": ["@types/react", "react-devtools-core"] }, "sha512-o6LAC268PLawlGVYrXTyaTfke4VtJftEheuwbgkQf7yvSXyWp1nRwBbAyKEkWXFZZsW/la5wrMuNbuBvZK2C1w=="], @@ -746,6 +786,8 @@ "json-stable-stringify-without-jsonify": ["json-stable-stringify-without-jsonify@1.0.1", "", {}, "sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw=="], + "jsonfile": ["jsonfile@6.2.1", "", { "dependencies": { "universalify": "^2.0.0" }, "optionalDependencies": { "graceful-fs": "^4.1.6" } }, "sha512-zwOTdL3rFQ/lRdBnntKVOX6k5cKJwEc1HdilT71BWEu7J41gXIB2MRp+vxduPSwZJPWBxEzv4yH1wYLJGUHX4Q=="], + "keyv": ["keyv@4.5.4", "", { "dependencies": { "json-buffer": "3.0.1" } }, "sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw=="], "kuler": ["kuler@2.0.0", "", {}, "sha512-Xq9nH7KlWZmXAtodXDDRE7vs6DU1gTU8zYDHDiWLSip45Egwq3plLHzPn27NgvzL2r1LMPC1vdqh98sQxtqj4A=="], @@ -804,6 +846,10 @@ "minimist": ["minimist@1.2.8", "", {}, "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA=="], + "minipass": ["minipass@7.1.3", "", {}, "sha512-tEBHqDnIoM/1rXME1zgka9g6Q2lcoCkxHLuc7ODJ5BxbP5d4c2Z5cGgtXAku59200Cx7diuHTOYfSBD8n6mm8A=="], + + "minizlib": ["minizlib@3.1.0", "", { "dependencies": { "minipass": "^7.1.2" } }, "sha512-KZxYo1BUkWD2TVFLr0MQoM8vUUigWD3LlD83a/75BqC+4qE0Hb1Vo5v1FgcfaNXvfXzr+5EhQ6ing/CaBijTlw=="], + "moment": ["moment@2.30.1", "", {}, "sha512-uEmtNhbDOrWPFS+hdjFCBfy9f2YoyzRpwcl+DqpC6taX21FzsTLQVbMV/W7PzNSX6x/bhC1zA3c2UQ5NzH6how=="], "mongodb": ["mongodb@7.2.0", "", { "dependencies": { "@mongodb-js/saslprep": "^1.3.0", "bson": "^7.2.0", "mongodb-connection-string-url": "^7.0.0" }, "peerDependencies": { "@aws-sdk/credential-providers": "^3.806.0", "@mongodb-js/zstd": "^7.0.0", "gcp-metadata": "^7.0.1", "kerberos": "^7.0.0", "mongodb-client-encryption": ">=7.0.0 <7.1.0", "snappy": "^7.3.2", "socks": "^2.8.6" }, "optionalPeers": ["@aws-sdk/credential-providers", "@mongodb-js/zstd", "gcp-metadata", "kerberos", "mongodb-client-encryption", "snappy", "socks"] }, "sha512-F/2+BMZtLVhY30ioZp0dAmZ+IRZMBqI+nrv6t5+9/1AIwCa8sMRC3jBf81lpxMhnZgqq8CoUD503Z1oZWq1/sw=="], @@ -828,6 +874,10 @@ "node-abort-controller": ["node-abort-controller@3.1.1", "", {}, "sha512-AGK2yQKIjRuqnc6VkX2Xj5d+QW8xZ87pa1UK6yA6ouUyuxfHuMP6umE5QK7UmTeOAymo+Zx1Fxiuw9rVx8taHQ=="], + "node-addon-api": ["node-addon-api@6.1.0", "", {}, "sha512-+eawOlIgy680F0kBzPUNFhMZGtJ1YmqM6l4+Crf4IkImjYrO/mqPwRMh352g23uIaQKFItcQ64I7KMaJxHgAVA=="], + + "node-api-headers": ["node-api-headers@1.8.0", "", {}, "sha512-jfnmiKWjRAGbdD1yQS28bknFM1tbHC1oucyuMPjmkEs+kpiu76aRs40WlTmBmyEgzDM76ge1DQ7XJ3R5deiVjQ=="], + "node-gyp-build-optional-packages": ["node-gyp-build-optional-packages@5.2.2", "", { "dependencies": { "detect-libc": "^2.0.1" }, "bin": { "node-gyp-build-optional-packages": "bin.js", "node-gyp-build-optional-packages-optional": "optional.js", "node-gyp-build-optional-packages-test": "build-test.js" } }, "sha512-s+w+rBWnpTMwSFbaE0UXsRlg7hU4FjekKU4eyAih5T8nJuNZT1nNsskXpxmeqSK9UzkBl6UgRlnKc8hz8IEqOw=="], "object-assign": ["object-assign@4.1.1", "", {}, "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg=="], @@ -886,6 +936,8 @@ "raw-body": ["raw-body@3.0.2", "", { "dependencies": { "bytes": "~3.1.2", "http-errors": "~2.0.1", "iconv-lite": "~0.7.0", "unpipe": "~1.0.0" } }, "sha512-K5zQjDllxWkf7Z5xJdV0/B0WTNqx6vxG70zJE4N0kBs4LovmEYWJzQGxC9bS9RAKu3bgM40lrd5zoLJ12MQ5BA=="], + "rc": ["rc@1.2.8", "", { "dependencies": { "deep-extend": "^0.6.0", "ini": "~1.3.0", "minimist": "^1.2.0", "strip-json-comments": "~2.0.1" }, "bin": { "rc": "./cli.js" } }, "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw=="], + "react": ["react@19.2.5", "", {}, "sha512-llUJLzz1zTUBrskt2pwZgLq59AemifIftw4aB7JxOqf1HY2FDaGDxgwpAPVzHU1kdWabH7FauP4i1oEeer2WCA=="], "react-reconciler": ["react-reconciler@0.33.0", "", { "dependencies": { "scheduler": "^0.27.0" }, "peerDependencies": { "react": "^19.2.0" } }, "sha512-KetWRytFv1epdpJc3J4G75I4WrplZE5jOL7Yq0p34+OVOKF4Se7WrdIdVC45XsSSmUTlht2FM/fM1FZb1mfQeA=="], @@ -960,10 +1012,14 @@ "strip-ansi": ["strip-ansi@7.2.0", "", { "dependencies": { "ansi-regex": "^6.2.2" } }, "sha512-yDPMNjp4WyfYBkHnjIRLfca1i6KMyGCtsVgoKe/z1+6vukgaENdgGBZt+ZmKPc4gavvEZ5OgHfHdrazhgNyG7w=="], + "strip-json-comments": ["strip-json-comments@2.0.1", "", {}, "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ=="], + "synckit": ["synckit@0.11.12", "", { "dependencies": { "@pkgr/core": "^0.2.9" } }, "sha512-Bh7QjT8/SuKUIfObSXNHNSK6WHo6J1tHCqJsuaFDP7gP0fkzSfTxI8y85JrppZ0h8l0maIgc2tfuZQ6/t3GtnQ=="], "tagged-tag": ["tagged-tag@1.0.0", "", {}, "sha512-yEFYrVhod+hdNyx7g5Bnkkb0G6si8HJurOoOEgC8B/O0uXLHlaey/65KRv6cuWBNhBgHKAROVpc7QyYqE5gFng=="], + "tar": ["tar@7.5.15", "", { "dependencies": { "@isaacs/fs-minipass": "^4.0.0", "chownr": "^3.0.0", "minipass": "^7.1.2", "minizlib": "^3.1.0", "yallist": "^5.0.0" } }, "sha512-dzGK0boVlC4W5QFuQN1EFSl3bIDYsk7Tj40U6eIBnK2k/8ml7TZ5agbI5j5+qnoVcAA+rNtBml8SEiLxZpNqRQ=="], + "terminal-size": ["terminal-size@4.0.1", "", {}, "sha512-avMLDQpUI9I5XFrklECw1ZEUPJhqzcwSWsyyI8blhRLT+8N1jLJWLWWYQpB2q2xthq8xDvjZPISVh53T/+CLYQ=="], "text-hex": ["text-hex@1.0.0", "", {}, "sha512-uuVGNWzgJ4yhRaNSiubPY7OjISw4sw4E5Uv0wbjp+OzcbmVU/rsT8ujgcXJhn9ypzsgr5vlzpPqP+MBBKcGvbg=="], @@ -996,10 +1052,14 @@ "undici-types": ["undici-types@7.19.2", "", {}, "sha512-qYVnV5OEm2AW8cJMCpdV20CDyaN3g0AjDlOGf1OW4iaDEx8MwdtChUp4zu4H0VP3nDRF/8RKWH+IPp9uW0YGZg=="], + "universalify": ["universalify@2.0.1", "", {}, "sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw=="], + "unpipe": ["unpipe@1.0.0", "", {}, "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ=="], "uri-js": ["uri-js@4.4.1", "", { "dependencies": { "punycode": "^2.1.0" } }, "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg=="], + "url-join": ["url-join@4.0.1", "", {}, "sha512-jk1+QP6ZJqyOiuEI9AEWQfju/nB2Pw466kbA0LEZljHwKeMgd9WrAEgEGxjPDD2+TNbbb37rTyhEfrCXfuKXnA=="], + "util-deprecate": ["util-deprecate@1.0.2", "", {}, "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw=="], "vary": ["vary@1.1.2", "", {}, "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg=="], @@ -1028,6 +1088,8 @@ "y18n": ["y18n@5.0.8", "", {}, "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA=="], + "yallist": ["yallist@5.0.0", "", {}, "sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw=="], + "yaml": ["yaml@2.8.3", "", { "bin": { "yaml": "bin.mjs" } }, "sha512-AvbaCLOO2Otw/lW5bmh9d/WEdcDFdQp2Z2ZUH3pX9U2ihyUY0nvLv7J6TrWowklRGPYbB/IuIMfYgxaCPg5Bpg=="], "yargs": ["yargs@17.7.2", "", { "dependencies": { "cliui": "^8.0.1", "escalade": "^3.1.1", "get-caller-file": "^2.0.5", "require-directory": "^2.1.1", "string-width": "^4.2.3", "y18n": "^5.0.5", "yargs-parser": "^21.1.1" } }, "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w=="], @@ -1058,6 +1120,10 @@ "cliui/wrap-ansi": ["wrap-ansi@7.0.0", "", { "dependencies": { "ansi-styles": "^4.0.0", "string-width": "^4.1.0", "strip-ansi": "^6.0.0" } }, "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q=="], + "cmake-js/which": ["which@6.0.1", "", { "dependencies": { "isexe": "^4.0.0" }, "bin": { "node-which": "bin/which.js" } }, "sha512-oGLe46MIrCRqX7ytPUf66EAYvdeMIZYn3WaocqqKZAxrBpkqHfL/qvTyJ/bTk5+AqHCjXmrv3CEWgy368zhRUg=="], + + "global-directory/ini": ["ini@6.0.0", "", {}, "sha512-IBTdIkzZNOpqm7q3dRqJvMaldXjDHWkEDfrwGEQTs5eaQMWV+djAhR+wahyNNMAa+qpbDUhBMVt4ZKNwpPm7xQ=="], + "import-fresh/resolve-from": ["resolve-from@4.0.0", "", {}, "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g=="], "ink-text-input/type-fest": ["type-fest@4.41.0", "", {}, "sha512-TeTSQ6H5YHvpqVwBRcnLDCBnDOHWYu7IvGbHT6N8AOymcr9PJGjc1GTtiWZTYg0NCgYwvnYWEkVChQAr9bjfwA=="], @@ -1088,6 +1154,8 @@ "cliui/wrap-ansi/ansi-styles": ["ansi-styles@4.3.0", "", { "dependencies": { "color-convert": "^2.0.1" } }, "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg=="], + "cmake-js/which/isexe": ["isexe@4.0.0", "", {}, "sha512-FFUtZMpoZ8RqHS3XeXEmHWLA4thH+ZxCv2lOiPIn1Xc7CxrqhWzNSDzD+/chS/zbYezmiwWLdQC09JdQKmthOw=="], + "listr2/cli-truncate/slice-ansi": ["slice-ansi@8.0.0", "", { "dependencies": { "ansi-styles": "^6.2.3", "is-fullwidth-code-point": "^5.1.0" } }, "sha512-stxByr12oeeOyY2BlviTNQlYV5xOj47GirPr4yA1hE9JCtxfQN0+tVbkxwCtYDQWhEKWFHsEK48ORg5jrouCAg=="], "listr2/wrap-ansi/string-width": ["string-width@7.2.0", "", { "dependencies": { "emoji-regex": "^10.3.0", "get-east-asian-width": "^1.0.0", "strip-ansi": "^7.1.0" } }, "sha512-tsaTIkKW9b4N+AEj+SVA+WhJzV7/zMhcSu78mLKWSk7cXMOSHsBKFWUs0fWwq8QyK3MgJBQRX6Gbi4kYbdvGkQ=="], diff --git a/packages/config/src/schema-fields.ts b/packages/config/src/schema-fields.ts index 0f378e4..0ef19e0 100644 --- a/packages/config/src/schema-fields.ts +++ b/packages/config/src/schema-fields.ts @@ -75,6 +75,8 @@ export function readField(cfg: BytebellConfig, key: K): Config return cfg.graph_provider as ConfigValue; case Config.SqlitePath: return cfg.sqlite_path as ConfigValue; + case Config.LadybugPath: + return cfg.ladybug_path as ConfigValue; } } @@ -150,5 +152,7 @@ export function writeField(cfg: BytebellConfig, key: K, value: return { ...cfg, graph_provider: value as string }; case Config.SqlitePath: return { ...cfg, sqlite_path: value as string }; + case Config.LadybugPath: + return { ...cfg, ladybug_path: value as string }; } } diff --git a/packages/config/src/schema.ts b/packages/config/src/schema.ts index d20e96a..a4c9536 100644 --- a/packages/config/src/schema.ts +++ b/packages/config/src/schema.ts @@ -52,6 +52,7 @@ export const configSchema = z db_provider: z.string().default("sqlite"), graph_provider: z.string().default("neo4j"), sqlite_path: z.string().default(""), + ladybug_path: z.string().default(""), }) .strict(); @@ -95,6 +96,7 @@ export type ConfigValueMap = { [Config.DbProvider]: string; [Config.GraphProvider]: string; [Config.SqlitePath]: string; + [Config.LadybugPath]: string; }; export type ConfigValue = ConfigValueMap[K]; @@ -152,6 +154,7 @@ export const HINTS: Readonly> = { [Config.DbProvider]: "bytebell set db-provider ", [Config.GraphProvider]: "bytebell set graph-provider ", [Config.SqlitePath]: "bytebell set sqlite-path ", + [Config.LadybugPath]: "bytebell set ladybug-path ", }; export { readField, writeField } from "./schema-fields.ts"; diff --git a/packages/ingest-github/package.json b/packages/ingest-github/package.json index ecae6fb..2d17d40 100644 --- a/packages/ingest-github/package.json +++ b/packages/ingest-github/package.json @@ -21,6 +21,7 @@ "@bb/mongo": "workspace:*", "@bb/sqlite": "workspace:*", "@bb/neo4j": "workspace:*", + "@bb/ladybug": "workspace:*", "@bb/queue": "workspace:*", "@bb/db-core": "workspace:*", "@bb/graph-core": "workspace:*", diff --git a/packages/ingest-github/src/bootstrap.ts b/packages/ingest-github/src/bootstrap.ts index 583d7db..9fef78e 100644 --- a/packages/ingest-github/src/bootstrap.ts +++ b/packages/ingest-github/src/bootstrap.ts @@ -6,6 +6,7 @@ import { connectGraph } from "@bb/graph-db"; import "@bb/mongo"; import "@bb/sqlite"; import "@bb/neo4j"; +import "@bb/ladybug"; export interface BootstrapRuntimeOptions { config: unknown; diff --git a/packages/ladybug/README.md b/packages/ladybug/README.md new file mode 100644 index 0000000..8888df6 --- /dev/null +++ b/packages/ladybug/README.md @@ -0,0 +1,64 @@ +# `@bb/ladybug` — context + +## Tier + +Infrastructure. Depends on Kernel (`@bb/types` for `Config` and `KnowledgeState`, `@bb/errors` for typed error classes) and Sibling (`@bb/config` for `Config.LadybugPath`). +May be imported by Strategy (`@bb/queue` workers via `@bb/ingest-github`), Domain, and Binaries — never by `@bb/cli`. + +This package implements the `@bb/graph-core` `IGraphDatabaseProvider` interface for **LadybugDB**, an embedded property graph OLAP database. + +## Responsibility + +The package owns: + +- A single shared `@ladybugdb/core` `Database` and `Connection` instance (lazy connection; graceful close). +- A health probe (`pingLadybug`). +- An internal `_runCypher(query, params)` helper that compiles and runs a query, caching prepared statements globally. +- Schema bootstrap — creating tables for `Knowledge`, `Repo`, `Folder`, `File`, `FileVersion`, `Keyword`, `Class`, `Function`, and `Module`. +- Knowledge-node CRUD (`upsertKnowledgeNode`, `setKnowledgeStateInGraph`, `deleteKnowledgeGraph`). +- Folder and Repository CRUD (`upsertFolderNode`, `upsertRepoNode`). +- Optimized File-node Bulk Upsert (`bulkUpsertFiles`) — maps files to Parquet rows, writes them to temporary files on disk, and executes single-transaction `DELETE` and SQL `COPY FROM` commands. +- File-node Snapshotting (`snapshotFilesToVersion`) — copies live files to snapshots before updates. + +## Public exports + +```ts +function connectLadybug(): Promise; +function closeLadybug(): Promise; +function pingLadybug(): Promise; + +function upsertKnowledgeNode(doc: KnowledgeDoc): Promise; +function setKnowledgeStateInGraph(knowledgeId: string, state: KnowledgeState): Promise; +function deleteKnowledgeGraph(knowledgeId: string): Promise; +function upsertFileNode(input: UpsertFileNodeInput): Promise; +function bulkUpsertFiles(knowledgeId: string, fileStream: AsyncIterable): Promise; +function deleteFileNodes(knowledgeId: string, paths: string[]): Promise; + +function runCypher(query: string, params?: Record): Promise; +``` + +## Graph schema (v1) + +``` +(:Knowledge {knowledgeId, sourceKind, sourceUrl, branch, repoName, state, createdAt, updatedAt}) + -[:HAS_FILE]-> +(:File {id, orgId, knowledgeId, repoId, relativePath, language, sha, sizeBytes, purpose, summary, businessContext, ...}) + -[:HAS_KEYWORD]-> (:Keyword {name}) // global, lowercase, MERGE-deduped + -[:HAS_CLASS]-> (:Class {signature}) // global, MERGE-deduped + -[:HAS_FUNCTION]-> (:Function {signature}) // global, MERGE-deduped + -[:HAS_IMPORT_INTERNAL]-> (:Module {name}) // relative imports (./ or ../) + -[:HAS_IMPORT_EXTERNAL]-> (:Module {name}) // external packages / stdlib +``` + +Uniqueness and primary keys are enforced through computed surrogate IDs (e.g. `${knowledgeId}::${relativePath}`) due to LadybugDB's single-column primary key constraints. + +Polymorphic relationships (such as `CONTAINS` and `HAS_KEYWORD`) are loaded with explicit query routing: + +- `COPY CONTAINS FROM '...' (FROM='Folder', TO='File')` +- `COPY HAS_KEYWORD FROM '...' (FROM='File', TO='Keyword')` + +## Invariants + +1. **Memory-Safe Streaming Ingestion**: `bulkUpsertFiles` uses an `AsyncIterable` stream, writing inputs to temporary Parquet files on disk immediately to prevent heap allocation failures for large codebases. +2. **Graceful Connection**: Lazy initialization ensures connection runs once and caches the `Database` and `Connection` handles cleanly. +3. **Parameter Type Casting**: Explicit casts to `LbugValue` ensure type compatibility when executing parameter queries. diff --git a/packages/ladybug/package.json b/packages/ladybug/package.json new file mode 100644 index 0000000..9a5fe5e --- /dev/null +++ b/packages/ladybug/package.json @@ -0,0 +1,22 @@ +{ + "name": "@bb/ladybug", + "version": "0.0.0", + "private": true, + "type": "module", + "main": "./src/index.ts", + "types": "./src/index.ts", + "exports": { + ".": "./src/index.ts" + }, + "imports": { + "#src/*": "./src/*" + }, + "dependencies": { + "@bb/config": "workspace:*", + "@bb/errors": "workspace:*", + "@bb/graph-db": "workspace:*", + "@bb/graph-core": "workspace:*", + "@bb/types": "workspace:*", + "@ladybugdb/core": "^0.16.1" + } +} diff --git a/packages/ladybug/src/README.md b/packages/ladybug/src/README.md new file mode 100644 index 0000000..af2e139 --- /dev/null +++ b/packages/ladybug/src/README.md @@ -0,0 +1,24 @@ +# `@bb/ladybug/src` — context + +Implementation of `@bb/ladybug`. See [../README.md](../README.md) for the package-level contract; this file documents the code structure of the source directory. + +## Files + +- **[index.ts](index.ts)** — Public entrypoint. Re-exports driver controls, entity repositories, and helper types. +- **[client.ts](client.ts)** — Connection lifecycle (`connectLadybug`, `closeLadybug`), schema initialization, global Prepared Statement caching, and parameter query execution. +- **[provider.ts](provider.ts)** — Registers the `"ladybug"` provider and packages the repositories to conform to the `IGraphDatabaseProvider` contract. +- **[files.ts](files.ts)** — Handles files. Implements `bulkUpsertFiles` utilizing `parquetjs` writers and SQL `COPY FROM` commands, writing incoming streams directly to disk. +- **[fileVersions.ts](fileVersions.ts)** — Snapshots file records into the `FileVersion` table before updates. +- **[folder.ts](folder.ts)** — Manages folder node upserting. +- **[repo.ts](repo.ts)** — Manages repository node upserting. +- **[knowledge.ts](knowledge.ts)** — Manages knowledge metadata, branch state, and asynchronous sweeping of orphan entity nodes (`vacuumOrphanEntities`). +- **[indexes.ts](indexes.ts)** & **[flatFolderIndexes.ts](flatFolderIndexes.ts)** — No-op files satisfying interface constraints (indexing is natively optimized in LadybugDB). + +## Invariants + +- **PreparedStatement Caching**: All query strings run via `_runCypher` check a global map for a cached `PreparedStatement` instance, avoiding redundant compiling overhead during loops. +- **Surrogate Keys**: Primary keys are computed strictly in TypeScript (e.g. `${knowledgeId}::${relativePath}`) before inserts. +- **Clean Slate**: `bulkUpsertFiles` executes a targeted clean slate delete of `File` nodes matching the `knowledgeId` before loading the new files, keeping transactions atomic. +- **Polymorphic Copy Mapping**: Restricts database-side ambiguity by passing explicit parameter routing: + - `(FROM='Folder', TO='File')` for `CONTAINS` + - `(FROM='File', TO='Keyword')` for `HAS_KEYWORD` diff --git a/packages/ladybug/src/client.ts b/packages/ladybug/src/client.ts new file mode 100644 index 0000000..d36e2d2 --- /dev/null +++ b/packages/ladybug/src/client.ts @@ -0,0 +1,254 @@ +import { Database, Connection, PreparedStatement, type LbugValue } from "@ladybugdb/core"; +import { getConfigValue } from "@bb/config"; +import { Config } from "@bb/types"; + +export interface PingResult { + ok: boolean; + latencyMs: number; +} + +let db: Database | null = null; +let conn: Connection | null = null; +let connecting: Promise | null = null; + +export async function connectLadybug(): Promise { + if (conn !== null) { + return; + } + if (connecting !== null) { + return connecting; + } + connecting = doConnect().finally(() => { + connecting = null; + }); + return connecting; +} + +async function doConnect(): Promise { + let dbPath = getConfigValue(Config.LadybugPath); + if (dbPath === "") { + dbPath = ":memory:"; + } + + try { + db = new Database(dbPath); + conn = new Connection(db); + await ensureSchema(conn); + } catch (cause: unknown) { + if (db) { + db = null; + } + conn = null; + throw new Error( + `Failed to connect to LadybugDB at '${dbPath}': ${cause instanceof Error ? cause.message : String(cause)}`, + { cause }, + ); + } +} + +async function ensureSchema(c: Connection): Promise { + const nodeTables = [ + `CREATE NODE TABLE Knowledge ( + knowledgeId STRING PRIMARY KEY, + createdAt STRING, + sourceKind STRING, + sourceUrl STRING, + branch STRING, + repoName STRING, + state STRING, + updatedAt STRING + )`, + `CREATE NODE TABLE Repo ( + id STRING PRIMARY KEY, + orgId STRING, + knowledgeId STRING, + repoId STRING, + repoUrl STRING, + branch STRING, + purpose STRING, + summary STRING, + architecture STRING, + dataFlow STRING, + majorSubsystems STRING[], + keyPatterns STRING[], + updatedAt STRING + )`, + `CREATE NODE TABLE Folder ( + id STRING PRIMARY KEY, + orgId STRING, + knowledgeId STRING, + repoId STRING, + folderPath STRING, + purpose STRING, + summary STRING, + dependencyGraph STRING, + updatedAt STRING + )`, + `CREATE NODE TABLE File ( + id STRING PRIMARY KEY, + orgId STRING, + knowledgeId STRING, + repoId STRING, + relativePath STRING, + language STRING, + sha STRING, + sizeBytes INT64, + purpose STRING, + summary STRING, + businessContext STRING, + dataFlowDirection STRING, + ontologyConcepts STRING[], + businessEntities STRING[], + systemCapabilities STRING[], + sideEffects STRING[], + configDependencies STRING[], + integrationSurface STRING[], + contractsProvided STRING[], + contractsConsumed STRING[], + sectionNames STRING[], + sectionDescriptions STRING[], + isBigFile BOOLEAN, + totalChunks INT64, + totalTokenCount INT64, + updatedAt STRING + )`, + `CREATE NODE TABLE FileVersion ( + id STRING PRIMARY KEY, + knowledgeId STRING, + relativePath STRING, + commitHash STRING, + language STRING, + sha STRING, + sizeBytes INT64, + purpose STRING, + summary STRING, + businessContext STRING, + dataFlowDirection STRING, + ontologyConcepts STRING[], + businessEntities STRING[], + systemCapabilities STRING[], + sideEffects STRING[], + configDependencies STRING[], + integrationSurface STRING[], + contractsProvided STRING[], + contractsConsumed STRING[], + sectionNames STRING[], + sectionDescriptions STRING[], + snapshotAt STRING + )`, + `CREATE NODE TABLE Keyword ( + name STRING PRIMARY KEY + )`, + `CREATE NODE TABLE Class ( + signature STRING PRIMARY KEY + )`, + `CREATE NODE TABLE Function ( + signature STRING PRIMARY KEY + )`, + `CREATE NODE TABLE Module ( + name STRING PRIMARY KEY + )`, + ]; + + const relTables = [ + `CREATE REL TABLE HAS_REPO (FROM Knowledge TO Repo)`, + `CREATE REL TABLE HAS_FILE (FROM Knowledge TO File)`, + `CREATE REL TABLE CONTAINS (FROM Repo TO Folder, FROM Folder TO Folder, FROM Folder TO File)`, + `CREATE REL TABLE HAS_KEYWORD (FROM File TO Keyword, FROM Folder TO Keyword, FROM Repo TO Keyword)`, + `CREATE REL TABLE HAS_CLASS (FROM File TO Class)`, + `CREATE REL TABLE HAS_FUNCTION (FROM File TO Function)`, + `CREATE REL TABLE HAS_IMPORT_INTERNAL (FROM File TO Module)`, + `CREATE REL TABLE HAS_IMPORT_EXTERNAL (FROM File TO Module)`, + `CREATE REL TABLE HAS_VERSION (FROM File TO FileVersion)`, + ]; + + for (const q of [...nodeTables, ...relTables]) { + try { + await c.query(q); + } catch (e: unknown) { + const msg = e instanceof Error ? e.message : String(e); + if ( + !msg.includes("already exists") && + !msg.includes("table already exists") && + !msg.includes("Binder exception") + ) { + throw e; + } + } + } +} + +export async function closeLadybug(): Promise { + conn = null; + db = null; +} + +export async function pingLadybug(): Promise { + if (conn === null) { + return { ok: false, latencyMs: 0 }; + } + const start = performance.now(); + try { + await conn.query("MATCH (k:Knowledge) RETURN count(k) LIMIT 1"); + return { ok: true, latencyMs: Math.round(performance.now() - start) }; + } catch { + return { ok: false, latencyMs: Math.round(performance.now() - start) }; + } +} + +export function _getConnection(): Connection { + if (conn === null) { + throw new Error("LadybugDB not connected. Call connectLadybug() first."); + } + return conn; +} +//OPTIMIZATION NEEDED +// export async function _runCypher(query: string, params: Record = {}): Promise { +// const c = _getConnection(); +// const prepared = await c.prepare(query); +// if (!prepared.isSuccess()) { +// throw new Error(`Failed to prepare query: ${prepared.getErrorMessage()}`); +// } +// const result = await c.execute(prepared, params); +// const singleResult = Array.isArray(result) ? result[0] : result; +// if (!singleResult) { +// throw new Error("No query result returned from LadybugDB"); +// } +// const rows = await singleResult.getAll(); +// return rows as T[]; +// } + +// Add a global cache map at the top of client.ts +const preparedCache = new Map(); + +export async function _runCypher(query: string, params: Record = {}): Promise { + const c = _getConnection(); + + // 1. Check if the query has already been compiled and compiled plan is cached + let prepared = preparedCache.get(query); + + if (!prepared) { + prepared = await c.prepare(query); + if (!prepared.isSuccess()) { + throw new Error(`Failed to prepare query: ${prepared.getErrorMessage()}`); + } + // 2. Store it for future iterations in the ingest loop + preparedCache.set(query, prepared); + } + + const result = await c.execute(prepared, params); + const singleResult = Array.isArray(result) ? result[0] : result; + if (!singleResult) { + throw new Error("No query result returned from LadybugDB"); + } + const rows = await singleResult.getAll(); + return rows as T[]; +} + +// Clear the cache if tests reset +export function __resetForTests(): void { + db = null; + conn = null; + connecting = null; + preparedCache.clear(); // Clear cache here +} diff --git a/packages/ladybug/src/fileVersions.ts b/packages/ladybug/src/fileVersions.ts new file mode 100644 index 0000000..6e531df --- /dev/null +++ b/packages/ladybug/src/fileVersions.ts @@ -0,0 +1,51 @@ +import { _runCypher } from "./client.ts"; + +/** + * Snapshots the current `:File` set for a knowledge into `:FileVersion` nodes + * tagged with `commitHash`. Run **before** the strategy overwrites the `:File` + * nodes during a pull, so the prior commit's state is preserved as a version + * snapshot rather than being lost. + */ +const SNAPSHOT_FILES_TO_VERSION = ` +MATCH (f:File {knowledgeId: $knowledgeId}) +MERGE (fv:FileVersion { + id: $knowledgeId + "::" + f.relativePath + "::" + $commitHash +}) +SET fv.knowledgeId = $knowledgeId, + fv.relativePath = f.relativePath, + fv.commitHash = $commitHash, + fv.language = f.language, + fv.sha = f.sha, + fv.sizeBytes = f.sizeBytes, + fv.purpose = f.purpose, + fv.summary = f.summary, + fv.businessContext = f.businessContext, + fv.dataFlowDirection = f.dataFlowDirection, + fv.ontologyConcepts = f.ontologyConcepts, + fv.businessEntities = f.businessEntities, + fv.systemCapabilities = f.systemCapabilities, + fv.sideEffects = f.sideEffects, + fv.configDependencies = f.configDependencies, + fv.integrationSurface = f.integrationSurface, + fv.contractsProvided = f.contractsProvided, + fv.contractsConsumed = f.contractsConsumed, + fv.sectionNames = f.sectionNames, + fv.sectionDescriptions = f.sectionDescriptions, + fv.snapshotAt = $snapshotAt +MERGE (f)-[:HAS_VERSION]->(fv) +`; + +export interface SnapshotFilesInput { + knowledgeId: string; + /** The commit the current `:File` state corresponds to — i.e. the OLD commitId being archived. */ + commitHash: string; +} + +/** Copies every live `:File` into a `:FileVersion(commitHash)` snapshot. */ +export async function snapshotFilesToVersion(input: SnapshotFilesInput): Promise { + await _runCypher(SNAPSHOT_FILES_TO_VERSION, { + knowledgeId: input.knowledgeId, + commitHash: input.commitHash, + snapshotAt: new Date().toISOString(), + }); +} diff --git a/packages/ladybug/src/files.ts b/packages/ladybug/src/files.ts new file mode 100644 index 0000000..685b621 --- /dev/null +++ b/packages/ladybug/src/files.ts @@ -0,0 +1,193 @@ +import type { FileAnalysis } from "@bb/types"; +import { _runCypher } from "./client.ts"; + +const UPSERT_FILE = ` +MERGE (f:File {id: $id}) +SET f.knowledgeId = $knowledgeId, + f.relativePath = $relativePath, + f.orgId = $orgId, + f.repoId = $repoId, + f.language = $language, + f.sha = $sha, + f.sizeBytes = $sizeBytes, + f.purpose = $purpose, + f.summary = $summary, + f.businessContext = $businessContext, + f.dataFlowDirection = $dataFlowDirection, + f.ontologyConcepts = $ontologyConcepts, + f.businessEntities = $businessEntities, + f.systemCapabilities = $systemCapabilities, + f.sideEffects = $sideEffects, + f.configDependencies = $configDependencies, + f.integrationSurface = $integrationSurface, + f.contractsProvided = $contractsProvided, + f.contractsConsumed = $contractsConsumed, + f.sectionNames = $sectionNames, + f.sectionDescriptions = $sectionDescriptions, + f.isBigFile = $isBigFile, + f.totalChunks = $totalChunks, + f.totalTokenCount = $totalTokenCount, + f.updatedAt = $updatedAt +WITH f +MATCH (k:Knowledge {knowledgeId: $knowledgeId}) +MERGE (k)-[:HAS_FILE]->(f) +`; + +const ATTACH_FILE_TO_FOLDER = ` +MATCH (f:File {id: $id}) +MATCH (folder:Folder {id: $folderId}) +MERGE (folder)-[:CONTAINS]->(f) +`; + +const CLEAR_KEYWORDS = ` +MATCH (f:File {id: $id})-[r:HAS_KEYWORD]->() +DELETE r +`; + +const CLEAR_CLASSES = ` +MATCH (f:File {id: $id})-[r:HAS_CLASS]->() +DELETE r +`; + +const CLEAR_FUNCTIONS = ` +MATCH (f:File {id: $id})-[r:HAS_FUNCTION]->() +DELETE r +`; + +const CLEAR_IMPORTS_INTERNAL = ` +MATCH (f:File {id: $id})-[r:HAS_IMPORT_INTERNAL]->() +DELETE r +`; + +const CLEAR_IMPORTS_EXTERNAL = ` +MATCH (f:File {id: $id})-[r:HAS_IMPORT_EXTERNAL]->() +DELETE r +`; + +const ATTACH_KEYWORDS = ` +MATCH (f:File {id: $id}) +UNWIND $names AS name +MERGE (kw:Keyword {name: name}) +CREATE (f)-[:HAS_KEYWORD]->(kw) +`; + +const ATTACH_CLASSES = ` +MATCH (f:File {id: $id}) +UNWIND $signatures AS signature +MERGE (c:Class {signature: signature}) +CREATE (f)-[:HAS_CLASS]->(c) +`; + +const ATTACH_FUNCTIONS = ` +MATCH (f:File {id: $id}) +UNWIND $signatures AS signature +MERGE (fn:Function {signature: signature}) +CREATE (f)-[:HAS_FUNCTION]->(fn) +`; + +const ATTACH_IMPORTS_INTERNAL = ` +MATCH (f:File {id: $id}) +UNWIND $names AS name +MERGE (m:Module {name: name}) +CREATE (f)-[:HAS_IMPORT_INTERNAL]->(m) +`; + +const ATTACH_IMPORTS_EXTERNAL = ` +MATCH (f:File {id: $id}) +UNWIND $names AS name +MERGE (m:Module {name: name}) +CREATE (f)-[:HAS_IMPORT_EXTERNAL]->(m) +`; + +export interface UpsertFileNodeInput { + orgId?: string; + knowledgeId: string; + repoId?: string; + relativePath: string; + language: string; + sha: string; + sizeBytes: number; + analysis: FileAnalysis; + folderPath?: string; + isBigFile?: boolean; + totalChunks?: number; + totalTokenCount?: number; +} + +const DELETE_FILES = ` +MATCH (f:File {knowledgeId: $knowledgeId}) +WHERE f.relativePath IN $relativePaths +DETACH DELETE f +`; + +export async function deleteFileNodes(knowledgeId: string, relativePaths: string[]): Promise { + if (relativePaths.length === 0) { + return; + } + await _runCypher(DELETE_FILES, { knowledgeId, relativePaths }); +} + +export async function upsertFileNode(input: UpsertFileNodeInput): Promise { + const orgId = input.orgId ?? "local"; + const repoId = input.repoId ?? input.knowledgeId; + const id = `${input.knowledgeId}::${input.relativePath}`; + + const params = { id, knowledgeId: input.knowledgeId }; + const sectionMap = input.analysis.sectionMap ?? []; + + await _runCypher(UPSERT_FILE, { + id, + knowledgeId: input.knowledgeId, + relativePath: input.relativePath, + orgId, + repoId, + language: input.language, + sha: input.sha, + sizeBytes: input.sizeBytes, + purpose: input.analysis.purpose, + summary: input.analysis.summary, + businessContext: input.analysis.businessContext, + dataFlowDirection: input.analysis.dataFlowDirection ?? "", + ontologyConcepts: input.analysis.ontologyConcepts ?? [], + businessEntities: input.analysis.businessEntities ?? [], + systemCapabilities: input.analysis.systemCapabilities ?? [], + sideEffects: input.analysis.sideEffects ?? [], + configDependencies: input.analysis.configDependencies ?? [], + integrationSurface: input.analysis.integrationSurface ?? [], + contractsProvided: input.analysis.contractsProvided ?? [], + contractsConsumed: input.analysis.contractsConsumed ?? [], + sectionNames: sectionMap.map((s) => s.name), + sectionDescriptions: sectionMap.map((s) => s.description), + isBigFile: input.isBigFile ?? false, + totalChunks: input.totalChunks ?? 0, + totalTokenCount: input.totalTokenCount ?? 0, + updatedAt: new Date().toISOString(), + }); + + if (input.folderPath !== undefined) { + const folderId = `${orgId}::${input.knowledgeId}::${repoId}::${input.folderPath}`; + await _runCypher(ATTACH_FILE_TO_FOLDER, { id, folderId }); + } + + await _runCypher(CLEAR_KEYWORDS, params); + await _runCypher(CLEAR_CLASSES, params); + await _runCypher(CLEAR_FUNCTIONS, params); + await _runCypher(CLEAR_IMPORTS_INTERNAL, params); + await _runCypher(CLEAR_IMPORTS_EXTERNAL, params); + + if (input.analysis.keywords.length > 0) { + await _runCypher(ATTACH_KEYWORDS, { id, names: input.analysis.keywords.map((k) => k.toLowerCase()) }); + } + if (input.analysis.classes.length > 0) { + await _runCypher(ATTACH_CLASSES, { id, signatures: input.analysis.classes }); + } + if (input.analysis.functions.length > 0) { + await _runCypher(ATTACH_FUNCTIONS, { id, signatures: input.analysis.functions }); + } + if (input.analysis.importsInternal.length > 0) { + await _runCypher(ATTACH_IMPORTS_INTERNAL, { id, names: input.analysis.importsInternal }); + } + if (input.analysis.importsExternal.length > 0) { + await _runCypher(ATTACH_IMPORTS_EXTERNAL, { id, names: input.analysis.importsExternal }); + } +} diff --git a/packages/ladybug/src/flatFolderIndexes.ts b/packages/ladybug/src/flatFolderIndexes.ts new file mode 100644 index 0000000..cc809e1 --- /dev/null +++ b/packages/ladybug/src/flatFolderIndexes.ts @@ -0,0 +1,5 @@ +export async function ensureFlatFolderIndexes(): Promise { + // LadybugDB implements uniqueness natively via PRIMARY KEY constraints defined during schema creation. + // Full-text indexes are not supported via Cypher index syntax in LadybugDB; standard MATCH scans are used instead. + return Promise.resolve(); +} diff --git a/packages/ladybug/src/folder.ts b/packages/ladybug/src/folder.ts new file mode 100644 index 0000000..d29a204 --- /dev/null +++ b/packages/ladybug/src/folder.ts @@ -0,0 +1,78 @@ +import { _runCypher } from "./client.ts"; +import type { NodeScope } from "./repo.ts"; + +export interface FolderSummaryPayload { + purpose: string; + summary: string; + keywords: string[]; + classes: string[]; + functions: string[]; + importsInternal: string[]; + importsExternal: string[]; + dependencyGraph: string; +} + +export interface UpsertFolderNodeInput { + scope: NodeScope; + folderPath: string; + summary: FolderSummaryPayload; +} + +const UPSERT_FOLDER = ` +MERGE (folder:Folder {id: $id}) +SET folder.orgId = $orgId, + folder.knowledgeId = $knowledgeId, + folder.repoId = $repoId, + folder.folderPath = $folderPath, + folder.purpose = $purpose, + folder.summary = $summary, + folder.dependencyGraph = $dependencyGraph, + folder.updatedAt = $updatedAt +WITH folder +MATCH (r:Repo {id: $repoId_surrogate}) +MERGE (r)-[:CONTAINS]->(folder) +`; + +const CLEAR_FOLDER_KEYWORDS = ` +MATCH (folder:Folder {id: $id})-[rel:HAS_KEYWORD]->() +DELETE rel +`; + +const ATTACH_FOLDER_KEYWORDS = ` +MATCH (folder:Folder {id: $id}) +UNWIND $names AS name +MERGE (kw:Keyword {name: name}) +CREATE (folder)-[:HAS_KEYWORD]->(kw) +`; + +export async function upsertFolderNode(input: UpsertFolderNodeInput): Promise { + const scope = input.scope; + const id = `${scope.orgId}::${scope.knowledgeId}::${scope.repoId}::${input.folderPath}`; + const repoId_surrogate = `${scope.orgId}::${scope.knowledgeId}::${scope.repoId}`; + + const params = { + id, + orgId: scope.orgId, + knowledgeId: scope.knowledgeId, + repoId: scope.repoId, + folderPath: input.folderPath, + repoId_surrogate, + }; + + await _runCypher(UPSERT_FOLDER, { + ...params, + purpose: input.summary.purpose, + summary: input.summary.summary, + dependencyGraph: input.summary.dependencyGraph, + updatedAt: new Date().toISOString(), + }); + + await _runCypher(CLEAR_FOLDER_KEYWORDS, { id }); + + if (input.summary.keywords.length > 0) { + await _runCypher(ATTACH_FOLDER_KEYWORDS, { + id, + names: input.summary.keywords.map((k) => k.toLowerCase()), + }); + } +} diff --git a/packages/ladybug/src/index.ts b/packages/ladybug/src/index.ts new file mode 100644 index 0000000..4014145 --- /dev/null +++ b/packages/ladybug/src/index.ts @@ -0,0 +1,27 @@ +import "./provider.ts"; + +export { connectLadybug, closeLadybug, pingLadybug } from "./client.ts"; +export { _runCypher as runCypher } from "./client.ts"; +export type { PingResult } from "./client.ts"; + +export { ensureKnowledgeIndexes } from "./indexes.ts"; +export { ensureFlatFolderIndexes } from "./flatFolderIndexes.ts"; + +export { + upsertKnowledgeNode, + setKnowledgeStateInGraph, + setKnowledgeBranchInGraph, + deleteKnowledgeGraph, +} from "./knowledge.ts"; + +export { upsertFileNode, deleteFileNodes } from "./files.ts"; +export type { UpsertFileNodeInput } from "./files.ts"; + +export { upsertRepoNode } from "./repo.ts"; +export type { NodeScope, RepoSummaryPayload, UpsertRepoNodeInput } from "./repo.ts"; + +export { upsertFolderNode } from "./folder.ts"; +export type { FolderSummaryPayload, UpsertFolderNodeInput } from "./folder.ts"; + +export { snapshotFilesToVersion } from "./fileVersions.ts"; +export type { SnapshotFilesInput } from "./fileVersions.ts"; diff --git a/packages/ladybug/src/indexes.ts b/packages/ladybug/src/indexes.ts new file mode 100644 index 0000000..3a9930e --- /dev/null +++ b/packages/ladybug/src/indexes.ts @@ -0,0 +1,5 @@ +export async function ensureKnowledgeIndexes(): Promise { + // LadybugDB implements uniqueness natively via PRIMARY KEY constraints defined during schema creation. + // Full-text indexes are not supported via Cypher index syntax in LadybugDB; standard MATCH scans are used instead. + return Promise.resolve(); +} diff --git a/packages/ladybug/src/knowledge.ts b/packages/ladybug/src/knowledge.ts new file mode 100644 index 0000000..2eddd2c --- /dev/null +++ b/packages/ladybug/src/knowledge.ts @@ -0,0 +1,146 @@ +import path from "node:path"; +import type { KnowledgeDoc, KnowledgeState } from "@bb/types"; +import { _runCypher } from "./client.ts"; + +const UPSERT_KNOWLEDGE = ` +MERGE (k:Knowledge {knowledgeId: $knowledgeId}) +ON CREATE SET k.createdAt = $createdAt +SET k.sourceKind = $sourceKind, + k.sourceUrl = $sourceUrl, + k.branch = $branch, + k.repoName = $repoName, + k.state = $state, + k.updatedAt = $updatedAt +`; + +const SET_STATE = ` +MATCH (k:Knowledge {knowledgeId: $knowledgeId}) +SET k.state = $state, k.updatedAt = $updatedAt +`; + +const SET_BRANCH = ` +MATCH (k:Knowledge {knowledgeId: $knowledgeId}) +SET k.branch = $branch, k.updatedAt = $updatedAt +`; + +const DELETE_FILES_BY_KNOWLEDGE = ` +MATCH (f:File {knowledgeId: $knowledgeId}) +DETACH DELETE f +`; + +const DELETE_REPOS_BY_KNOWLEDGE = ` +MATCH (r:Repo {knowledgeId: $knowledgeId}) +DETACH DELETE r +`; + +const DELETE_FOLDERS_BY_KNOWLEDGE = ` +MATCH (folder:Folder {knowledgeId: $knowledgeId}) +DETACH DELETE folder +`; + +const DELETE_KNOWLEDGE_NODE = ` +MATCH (k:Knowledge {knowledgeId: $knowledgeId}) +DETACH DELETE k +`; + +// Defensive cleanup: wipe File nodes whose knowledgeId has no matching :Knowledge. +const DELETE_ORPHAN_FILES = ` +MATCH (f:File) +WHERE NOT EXISTS { MATCH (k:Knowledge {knowledgeId: f.knowledgeId}) } +DETACH DELETE f +`; + +// Sweep orphan entities individually. +const DELETE_ORPHAN_KEYWORDS = ` +MATCH (n:Keyword) +WHERE NOT EXISTS { MATCH (:File)-[]->(n) } AND NOT EXISTS { MATCH (:Folder)-[]->(n) } AND NOT EXISTS { MATCH (:Repo)-[]->(n) } +DETACH DELETE n +`; + +const DELETE_ORPHAN_CLASSES = ` +MATCH (n:Class) +WHERE NOT EXISTS { MATCH (:File)-[]->(n) } +DETACH DELETE n +`; + +const DELETE_ORPHAN_FUNCTIONS = ` +MATCH (n:Function) +WHERE NOT EXISTS { MATCH (:File)-[]->(n) } +DETACH DELETE n +`; + +const DELETE_ORPHAN_MODULES = ` +MATCH (n:Module) +WHERE NOT EXISTS { MATCH (:File)-[]->(n) } +DETACH DELETE n +`; + +export async function upsertKnowledgeNode(doc: KnowledgeDoc): Promise { + const sourceKind = doc.source.kind; + const sourceUrl = doc.source.kind === "github" ? (doc.info.repoUrl ?? "") : doc.source.sourcePath; + const branch = doc.source.kind === "github" ? (doc.info.branch ?? null) : null; + await _runCypher(UPSERT_KNOWLEDGE, { + knowledgeId: doc.knowledgeId, + sourceKind, + sourceUrl, + branch, + repoName: deriveRepoName(doc), + state: doc.status.state, + createdAt: doc.createdAt.toISOString(), + updatedAt: doc.updatedAt.toISOString(), + }); +} + +export async function setKnowledgeStateInGraph(knowledgeId: string, state: KnowledgeState): Promise { + await _runCypher(SET_STATE, { + knowledgeId, + state, + updatedAt: new Date().toISOString(), + }); +} + +export async function setKnowledgeBranchInGraph(knowledgeId: string, branch: string): Promise { + await _runCypher(SET_BRANCH, { + knowledgeId, + branch, + updatedAt: new Date().toISOString(), + }); +} + +export async function deleteKnowledgeGraph(knowledgeId: string): Promise { + await _runCypher(DELETE_FILES_BY_KNOWLEDGE, { knowledgeId }); + await _runCypher(DELETE_REPOS_BY_KNOWLEDGE, { knowledgeId }); + await _runCypher(DELETE_FOLDERS_BY_KNOWLEDGE, { knowledgeId }); + await _runCypher(DELETE_ORPHAN_FILES); + await _runCypher(DELETE_KNOWLEDGE_NODE, { knowledgeId }); + await _runCypher(DELETE_ORPHAN_KEYWORDS); + await _runCypher(DELETE_ORPHAN_CLASSES); + await _runCypher(DELETE_ORPHAN_FUNCTIONS); + await _runCypher(DELETE_ORPHAN_MODULES); +} + +function deriveRepoName(doc: KnowledgeDoc): string { + if (doc.source.kind === "local") { + return path.basename(doc.source.sourcePath); + } + return repoNameFromGithubUrl(doc.info.repoUrl ?? ""); +} + +function repoNameFromGithubUrl(repoUrl: string): string { + let pathname: string; + try { + pathname = new URL(repoUrl).pathname; + } catch { + pathname = repoUrl; + } + const segments = pathname + .split("/") + .map((segment) => segment.trim()) + .filter((segment) => segment.length > 0); + const repo = segments.at(-1); + const owner = segments.at(-2); + if (owner === undefined || repo === undefined) { + return repoUrl; + } + return `${owner}/${repo.replace(/\.git$/u, "")}`; +} diff --git a/packages/ladybug/src/provider.ts b/packages/ladybug/src/provider.ts new file mode 100644 index 0000000..8dbd7f8 --- /dev/null +++ b/packages/ladybug/src/provider.ts @@ -0,0 +1,58 @@ +import { connectLadybug, closeLadybug, pingLadybug, _runCypher } from "./client.ts"; +import * as knowledgeRepo from "./knowledge.ts"; +import * as filesRepo from "./files.ts"; +import * as fileVersionsRepo from "./fileVersions.ts"; +import * as folderRepo from "./folder.ts"; +import * as repoRepo from "./repo.ts"; +import * as indexRepo from "./indexes.ts"; +import * as flatFolderIndexRepo from "./flatFolderIndexes.ts"; + +import { registerGraphProvider } from "@bb/graph-db"; +import type { IGraphDatabaseProvider } from "@bb/graph-core"; +import type { LbugValue } from "@ladybugdb/core"; + +class LadybugGraphProvider implements IGraphDatabaseProvider { + knowledge = { + upsertKnowledgeNode: knowledgeRepo.upsertKnowledgeNode, + setKnowledgeStateInGraph: knowledgeRepo.setKnowledgeStateInGraph, + setKnowledgeBranchInGraph: knowledgeRepo.setKnowledgeBranchInGraph, + deleteKnowledgeGraph: knowledgeRepo.deleteKnowledgeGraph, + }; + + files = { + upsertFileNode: filesRepo.upsertFileNode, + deleteFileNodes: filesRepo.deleteFileNodes, + snapshotFilesToVersion: fileVersionsRepo.snapshotFilesToVersion, + }; + + folders = { + upsertFolderNode: folderRepo.upsertFolderNode, + }; + + repo = { + upsertRepoNode: repoRepo.upsertRepoNode, + }; + + indexes = { + ensureKnowledgeIndexes: indexRepo.ensureKnowledgeIndexes, + ensureFlatFolderIndexes: flatFolderIndexRepo.ensureFlatFolderIndexes, + }; + + async connect(): Promise { + await connectLadybug(); + } + + async close(): Promise { + await closeLadybug(); + } + + async ping() { + return pingLadybug(); + } + + async runCypher(query: string, params?: Record): Promise { + return _runCypher(query, params as Record); + } +} + +registerGraphProvider("ladybug", () => new LadybugGraphProvider()); diff --git a/packages/ladybug/src/repo.ts b/packages/ladybug/src/repo.ts new file mode 100644 index 0000000..e12f5da --- /dev/null +++ b/packages/ladybug/src/repo.ts @@ -0,0 +1,85 @@ +import { _runCypher } from "./client.ts"; + +export interface NodeScope { + orgId: string; + knowledgeId: string; + repoId: string; +} + +export interface RepoSummaryPayload { + purpose: string; + summary: string; + keywords: string[]; + architecture: string; + dataFlow: string; + majorSubsystems: string[]; + keyPatterns: string[]; +} + +export interface UpsertRepoNodeInput { + scope: NodeScope; + repoUrl: string; + branch: string; + summary: RepoSummaryPayload; +} + +const UPSERT_REPO = ` +MERGE (r:Repo {id: $id}) +SET r.orgId = $orgId, + r.knowledgeId = $knowledgeId, + r.repoId = $repoId, + r.repoUrl = $repoUrl, + r.branch = $branch, + r.purpose = $purpose, + r.summary = $summary, + r.architecture = $architecture, + r.dataFlow = $dataFlow, + r.majorSubsystems = $majorSubsystems, + r.keyPatterns = $keyPatterns, + r.updatedAt = $updatedAt +WITH r +MATCH (k:Knowledge {knowledgeId: $knowledgeId}) +MERGE (k)-[:HAS_REPO]->(r) +`; + +const CLEAR_REPO_KEYWORDS = ` +MATCH (r:Repo {id: $id})-[rel:HAS_KEYWORD]->() +DELETE rel +`; + +const ATTACH_REPO_KEYWORDS = ` +MATCH (r:Repo {id: $id}) +UNWIND $names AS name +MERGE (kw:Keyword {name: name}) +CREATE (r)-[:HAS_KEYWORD]->(kw) +`; + +export async function upsertRepoNode(input: UpsertRepoNodeInput): Promise { + const scope = input.scope; + const id = `${scope.orgId}::${scope.knowledgeId}::${scope.repoId}`; + + await _runCypher(UPSERT_REPO, { + id, + orgId: scope.orgId, + knowledgeId: scope.knowledgeId, + repoId: scope.repoId, + repoUrl: input.repoUrl, + branch: input.branch, + purpose: input.summary.purpose, + summary: input.summary.summary, + architecture: input.summary.architecture, + dataFlow: input.summary.dataFlow, + majorSubsystems: input.summary.majorSubsystems, + keyPatterns: input.summary.keyPatterns, + updatedAt: new Date().toISOString(), + }); + + await _runCypher(CLEAR_REPO_KEYWORDS, { id }); + + if (input.summary.keywords.length > 0) { + await _runCypher(ATTACH_REPO_KEYWORDS, { + id, + names: input.summary.keywords.map((k) => k.toLowerCase()), + }); + } +} diff --git a/packages/ladybug/tsconfig.json b/packages/ladybug/tsconfig.json new file mode 100644 index 0000000..79adbd3 --- /dev/null +++ b/packages/ladybug/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "rootDir": "src", + "outDir": "dist" + }, + "include": ["src/**/*.ts", "src/**/*.tsx", "src/**/*.json"] +} diff --git a/packages/server/package.json b/packages/server/package.json index 08b77e0..d19618f 100644 --- a/packages/server/package.json +++ b/packages/server/package.json @@ -24,6 +24,7 @@ "@bb/mongo": "workspace:*", "@bb/sqlite": "workspace:*", "@bb/neo4j": "workspace:*", + "@bb/ladybug": "workspace:*", "@bb/queue": "workspace:*", "@bb/redis": "workspace:*", "@bb/types": "workspace:*", diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts index 9bdcc54..e956d6f 100755 --- a/packages/server/src/index.ts +++ b/packages/server/src/index.ts @@ -11,6 +11,8 @@ import { connectQueue } from "@bb/queue"; import "@bb/mongo"; import "@bb/sqlite"; import "@bb/neo4j"; +import "@bb/ladybug"; + import { registerGithubWorkers, registerLocalIngestWorker } from "@bb/ingest-github"; import { ServerConfigError } from "@bb/errors"; import { registerRoutes } from "./routes.ts"; diff --git a/packages/types/src/config.ts b/packages/types/src/config.ts index 5ed037d..e526bbb 100644 --- a/packages/types/src/config.ts +++ b/packages/types/src/config.ts @@ -34,6 +34,7 @@ export enum Config { DbProvider = "db_provider", GraphProvider = "graph_provider", SqlitePath = "sqlite_path", + LadybugPath = "ladybug_path", } export enum DbProviderType { diff --git a/tsconfig.json b/tsconfig.json index 4a994b3..6182800 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -27,6 +27,7 @@ { "path": "packages/db-core" }, { "path": "packages/graph-core" }, { "path": "packages/db" }, - { "path": "packages/graph-db" } + { "path": "packages/graph-db" }, + { "path": "packages/ladybug" } ] } From 4ac442632a8a6f83b511b4a4a7dc582b052e298a Mon Sep 17 00:00:00 2001 From: lovanshu garg Date: Fri, 22 May 2026 18:41:09 +0530 Subject: [PATCH 2/2] feat(ladybug): added file streaming optimizing queries, bulk upsert with streaming --- bun.lock | 34 +- infra/docker/docker-compose.yml | 13 + packages/graph-core/src/index.ts | 1 + packages/graph-db/src/index.ts | 9 + .../flat-folder/phases/store-flat-analysis.ts | 19 +- .../src/strategies/flat-folder/store-pull.ts | 96 ++-- packages/ladybug/package.json | 8 +- packages/ladybug/src/fileSchemas.ts | 51 +++ packages/ladybug/src/fileVersions.ts | 48 +- packages/ladybug/src/files.ts | 422 +++++++++++------- packages/ladybug/src/index.ts | 5 +- packages/ladybug/src/knowledge.ts | 3 + packages/ladybug/src/provider.ts | 3 + 13 files changed, 462 insertions(+), 250 deletions(-) create mode 100644 packages/ladybug/src/fileSchemas.ts diff --git a/bun.lock b/bun.lock index 71e2bf6..ae68ee8 100644 --- a/bun.lock +++ b/bun.lock @@ -129,6 +129,10 @@ "@bb/graph-db": "workspace:*", "@bb/types": "workspace:*", "@ladybugdb/core": "^0.16.1", + "parquetjs": "^0.11.2", + }, + "devDependencies": { + "@types/parquetjs": "^0.10.6", }, }, "packages/llm": { @@ -434,6 +438,10 @@ "@types/node": ["@types/node@25.6.0", "", { "dependencies": { "undici-types": "~7.19.0" } }, "sha512-+qIYRKdNYJwY3vRCZMdJbPLJAtGjQBudzZzdzwQYkEPQd+PJGixUL5QfvCLDaULoLv+RhT3LDkwEfKaAkgSmNQ=="], + "@types/node-int64": ["@types/node-int64@0.4.32", "", { "dependencies": { "@types/node": "*" } }, "sha512-xf/JsSlnXQ+mzvc0IpXemcrO4BrCfpgNpMco+GLcXkFk01k/gW9lGJu+Vof0ZSvHK6DsHJDPSbjFPs36QkWXqw=="], + + "@types/parquetjs": ["@types/parquetjs@0.10.6", "", { "dependencies": { "@types/node-int64": "*" } }, "sha512-ZCsD6j97YD0mGU8/VnVs3NjORXa7zeHvqlpJpCqy4jU8a1O21dalL+MFn9QNbdEfy8rszR1N7NHeT7/LdtHf+A=="], + "@types/qs": ["@types/qs@6.15.0", "", {}, "sha512-JawvT8iBVWpzTrz3EGw9BTQFg3BQNmwERdKE22vlTxawwtbyUSlMppvZYKLZzB5zgACXdXxbD3m1bXaMqP/9ow=="], "@types/range-parser": ["@types/range-parser@1.2.7", "", {}, "sha512-hKormJbkJqzQGhziax5PItDUTMAM9uE2XXQmM37dyd4hVM+5aVl7oVxMVUiVQn2oCQFN/LKCZdvSM0pFRqbSmQ=="], @@ -498,11 +506,15 @@ "base64-js": ["base64-js@1.5.1", "", {}, "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA=="], + "bindings": ["bindings@1.2.1", "", {}, "sha512-u4cBQNepWxYA55FunZSM7wMi55yQaN0otnhhilNoWHq0MfOfJeQx0v0mRRpolGOExPjZcl6FtB0BB8Xkb88F0g=="], + "body-parser": ["body-parser@2.2.2", "", { "dependencies": { "bytes": "^3.1.2", "content-type": "^1.0.5", "debug": "^4.4.3", "http-errors": "^2.0.0", "iconv-lite": "^0.7.0", "on-finished": "^2.4.1", "qs": "^6.14.1", "raw-body": "^3.0.1", "type-is": "^2.0.1" } }, "sha512-oP5VkATKlNwcgvxi0vM0p/D3n2C3EReYVX+DNYs5TjZFn/oQt2j+4sVJtSMr18pdRr8wjTcBl6LoV+FUwzPmNA=="], "brace-expansion": ["brace-expansion@5.0.5", "", { "dependencies": { "balanced-match": "^4.0.2" } }, "sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ=="], - "bson": ["bson@7.2.0", "", {}, "sha512-YCEo7KjMlbNlyHhz7zAZNDpIpQbd+wOEHJYezv0nMYTn4x31eIUM2yomNNubclAt63dObUzKHWsBLJ9QcZNSnQ=="], + "brotli": ["brotli@1.3.3", "", { "dependencies": { "base64-js": "^1.1.2" } }, "sha512-oTKjJdShmDuGW94SyyaoQvAjf30dZaHnjJ8uAF+u2/vGJkJbJPJAT1gDiOJP5v1Zb6f9KEyW/1HpuaWIXtGHPg=="], + + "bson": ["bson@1.1.6", "", {}, "sha512-EvVNVeGo4tHxwi8L6bPj3y3itEvStdwvvlojVxxbyYfoaxJ6keLgrTuKdyfEAszFK+H3olzBuafE0yoh0D1gdg=="], "buffer": ["buffer@6.0.3", "", { "dependencies": { "base64-js": "^1.3.1", "ieee754": "^1.2.1" } }, "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA=="], @@ -742,6 +754,8 @@ "ink-text-input": ["ink-text-input@6.0.0", "", { "dependencies": { "chalk": "^5.3.0", "type-fest": "^4.18.2" }, "peerDependencies": { "ink": ">=5", "react": ">=18" } }, "sha512-Fw64n7Yha5deb1rHY137zHTAbSTNelUKuB5Kkk2HACXEtwIHBCf9OH2tP/LQ9fRYTl1F0dZgbW0zPnZk6FA9Lw=="], + "int53": ["int53@0.2.4", "", {}, "sha512-a5jlKftS7HUOhkUyYD7j2sJ/ZnvWiNlZS1ldR+g1ifQ+/UuZXIE+YTc/lK1qGj/GwAU5F8Z0e1eVq2t1J5Ob2g=="], + "ioredis": ["ioredis@5.10.1", "", { "dependencies": { "@ioredis/commands": "1.5.1", "cluster-key-slot": "^1.1.0", "debug": "^4.3.4", "denque": "^2.1.0", "lodash.defaults": "^4.2.0", "lodash.isarguments": "^3.1.0", "redis-errors": "^1.2.0", "redis-parser": "^3.0.0", "standard-as-callback": "^2.1.0" } }, "sha512-HuEDBTI70aYdx1v6U97SbNx9F1+svQKBDo30o0b9fw055LMepzpOOd0Ccg9Q6tbqmBSJaMuY0fB7yw9/vjBYCA=="], "ip-address": ["ip-address@10.1.0", "", {}, "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q=="], @@ -824,6 +838,8 @@ "luxon": ["luxon@3.7.2", "", {}, "sha512-vtEhXh/gNjI9Yg1u4jX/0YVPMvxzHuGgCm6tC5kZyb08yjGWGnqAjGJvcXbqQR2P3MyMEFnRbpcdFS6PBcLqew=="], + "lzo": ["lzo@0.4.11", "", { "dependencies": { "bindings": "~1.2.1" } }, "sha512-apQHNoW2Alg72FMqaC/7pn03I7umdgSVFt2KRkCXXils4Z9u3QBh1uOtl2O5WmZIDLd9g6Lu4lIdOLmiSTFVCQ=="], + "math-intrinsics": ["math-intrinsics@1.1.0", "", {}, "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g=="], "media-typer": ["media-typer@1.1.0", "", {}, "sha512-aisnrDP4GNe06UcKFnV5bfMNPBUw4jsLGaWwWfnH3v02GnBuXX2MCVn5RbrWo0j3pczUilYblq7fQ7Nw2t5XKw=="], @@ -880,12 +896,16 @@ "node-gyp-build-optional-packages": ["node-gyp-build-optional-packages@5.2.2", "", { "dependencies": { "detect-libc": "^2.0.1" }, "bin": { "node-gyp-build-optional-packages": "bin.js", "node-gyp-build-optional-packages-optional": "optional.js", "node-gyp-build-optional-packages-test": "build-test.js" } }, "sha512-s+w+rBWnpTMwSFbaE0UXsRlg7hU4FjekKU4eyAih5T8nJuNZT1nNsskXpxmeqSK9UzkBl6UgRlnKc8hz8IEqOw=="], + "node-int64": ["node-int64@0.4.0", "", {}, "sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw=="], + "object-assign": ["object-assign@4.1.1", "", {}, "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg=="], "object-hash": ["object-hash@3.0.0", "", {}, "sha512-RSn9F68PjH9HqtltsSnqYC1XXoWe9Bju5+213R98cNGttag9q9yAOTzdbsqvIa7aNm5WffBZFpWYr2aWrklWAw=="], "object-inspect": ["object-inspect@1.13.4", "", {}, "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew=="], + "object-stream": ["object-stream@0.0.1", "", {}, "sha512-+NPJnRvX9RDMRY9mOWOo/NDppBjbZhXirNNSu2IBnuNboClC9h1ZGHXgHBLDbJMHsxeJDq922aVmG5xs24a/cA=="], + "on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="], "once": ["once@1.4.0", "", { "dependencies": { "wrappy": "1" } }, "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w=="], @@ -902,6 +922,8 @@ "parent-module": ["parent-module@1.0.1", "", { "dependencies": { "callsites": "^3.0.0" } }, "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g=="], + "parquetjs": ["parquetjs@0.11.2", "", { "dependencies": { "brotli": "^1.3.0", "bson": "^1.0.4", "int53": "^0.2.4", "object-stream": "0.0.1", "snappyjs": "^0.6.0", "thrift": "^0.11.0", "varint": "^5.0.0" }, "optionalDependencies": { "lzo": "^0.4.0" } }, "sha512-Y6FOc3Oi2AxY4TzJPz7fhICCR8tQNL3p+2xGQoUAMbmlJBR7+JJmMrwuyMjIpDiM7G8Wj/8oqOH4UDUmu4I5ZA=="], + "parse-json": ["parse-json@5.2.0", "", { "dependencies": { "@babel/code-frame": "^7.0.0", "error-ex": "^1.3.1", "json-parse-even-better-errors": "^2.3.0", "lines-and-columns": "^1.1.6" } }, "sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg=="], "parseurl": ["parseurl@1.3.3", "", {}, "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ=="], @@ -930,6 +952,8 @@ "punycode": ["punycode@2.3.1", "", {}, "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg=="], + "q": ["q@1.5.1", "", {}, "sha512-kV/CThkXo6xyFEZUugw/+pIOywXcDbFYgSct5cT3gqlbkBE1SJdwy6UQoZvodiWF/ckQLZyDE/Bu1M6gVu5lVw=="], + "qs": ["qs@6.15.1", "", { "dependencies": { "side-channel": "^1.1.0" } }, "sha512-6YHEFRL9mfgcAvql/XhwTvf5jKcOiiupt2FiJxHkiX1z4j7WL8J/jRHYLluORvc1XxB5rV20KoeK00gVJamspg=="], "range-parser": ["range-parser@1.2.1", "", {}, "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg=="], @@ -994,6 +1018,8 @@ "slice-ansi": ["slice-ansi@9.0.0", "", { "dependencies": { "ansi-styles": "^6.2.3", "is-fullwidth-code-point": "^5.1.0" } }, "sha512-SO/3iYL5S3W57LLEniscOGPZgOqZUPCx6d3dB+52B80yJ0XstzsC/eV8gnA4tM3MHDrKz+OCFSLNjswdSC+/bA=="], + "snappyjs": ["snappyjs@0.6.1", "", {}, "sha512-YIK6I2lsH072UE0aOFxxY1dPDCS43I5ktqHpeAsuLNYWkE5pGxRGWfDM4/vSUfNzXjC1Ivzt3qx31PCLmc9yqg=="], + "sparse-bitfield": ["sparse-bitfield@3.0.3", "", { "dependencies": { "memory-pager": "^1.0.2" } }, "sha512-kvzhi7vqKTfkh0PZU+2D2PIllw2ymqJKujUcyPMd9Y75Nv4nPbGJZXNhxsgdQab2BmlDct1YnfQCguEvHr7VsQ=="], "stack-trace": ["stack-trace@0.0.10", "", {}, "sha512-KGzahc7puUKkzyMt+IqAep+TVNbKP+k2Lmwhub39m1AsTSkaDutx56aDCo+HLDzf/D26BIHTJWNiTG1KAJiQCg=="], @@ -1024,6 +1050,8 @@ "text-hex": ["text-hex@1.0.0", "", {}, "sha512-uuVGNWzgJ4yhRaNSiubPY7OjISw4sw4E5Uv0wbjp+OzcbmVU/rsT8ujgcXJhn9ypzsgr5vlzpPqP+MBBKcGvbg=="], + "thrift": ["thrift@0.11.0", "", { "dependencies": { "node-int64": "^0.4.0", "q": "^1.5.0", "ws": ">= 2.2.3" } }, "sha512-UpsBhOC45a45TpeHOXE4wwYwL8uD2apbHTbtBvkwtUU4dNwCjC7DpQTjw2Q6eIdfNtw+dKthdwq94uLXTJPfFw=="], + "tiktoken": ["tiktoken@1.0.22", "", {}, "sha512-PKvy1rVF1RibfF3JlXBSP0Jrcw2uq3yXdgcEXtKTYn3QJ/cBRBHDnrJ5jHky+MENZ6DIPwNUGWpkVx+7joCpNA=="], "tinyexec": ["tinyexec@1.1.1", "", {}, "sha512-VKS/ZaQhhkKFMANmAOhhXVoIfBXblQxGX1myCQ2faQrfmobMftXeJPcZGp0gS07ocvGJWDLZGyOZDadDBqYIJg=="], @@ -1062,6 +1090,8 @@ "util-deprecate": ["util-deprecate@1.0.2", "", {}, "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw=="], + "varint": ["varint@5.0.2", "", {}, "sha512-lKxKYG6H03yCZUpAGOPOsMcGxd1RHCu1iKvEHYDPmTyq2HueGhD73ssNBqqQWfvYs04G9iUFRvmAVLW20Jw6ow=="], + "vary": ["vary@1.1.2", "", {}, "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg=="], "webidl-conversions": ["webidl-conversions@7.0.0", "", {}, "sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g=="], @@ -1138,6 +1168,8 @@ "log-update/wrap-ansi": ["wrap-ansi@9.0.2", "", { "dependencies": { "ansi-styles": "^6.2.1", "string-width": "^7.0.0", "strip-ansi": "^7.1.0" } }, "sha512-42AtmgqjV+X1VpdOfyTGOYRi0/zsoLqtXQckTmqTeybT+BDIbM/Guxo7x3pE2vtpr1ok6xRqM9OpBe+Jyoqyww=="], + "mongodb/bson": ["bson@7.2.0", "", {}, "sha512-YCEo7KjMlbNlyHhz7zAZNDpIpQbd+wOEHJYezv0nMYTn4x31eIUM2yomNNubclAt63dObUzKHWsBLJ9QcZNSnQ=="], + "stack-utils/escape-string-regexp": ["escape-string-regexp@2.0.0", "", {}, "sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w=="], "yargs/string-width": ["string-width@4.2.3", "", { "dependencies": { "emoji-regex": "^8.0.0", "is-fullwidth-code-point": "^3.0.0", "strip-ansi": "^6.0.1" } }, "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g=="], diff --git a/infra/docker/docker-compose.yml b/infra/docker/docker-compose.yml index 3fdc22d..101a641 100644 --- a/infra/docker/docker-compose.yml +++ b/infra/docker/docker-compose.yml @@ -60,6 +60,19 @@ services: retries: 12 start_period: 5s + ladybug-explorer: + image: ghcr.io/ladybugdb/explorer:latest + container_name: bytebell-ladybug-explorer + restart: unless-stopped + ports: + - "127.0.0.1:8000:8000" + volumes: + - /Users/zeta/.bytebell:/database + environment: + - LBUG_FILE=ladybug.lbug + networks: + - bytebell + networks: bytebell: name: bytebell diff --git a/packages/graph-core/src/index.ts b/packages/graph-core/src/index.ts index 2319ab2..c212e0e 100644 --- a/packages/graph-core/src/index.ts +++ b/packages/graph-core/src/index.ts @@ -33,6 +33,7 @@ export interface IGraphFileRepository { upsertFileNode(input: UpsertFileNodeInput): Promise; deleteFileNodes(knowledgeId: string, paths: string[]): Promise; snapshotFilesToVersion(input: SnapshotFilesInput): Promise; + bulkUpsertFiles?(knowledgeId: string, fileStream: AsyncIterable): Promise; } export interface IGraphFolderRepository { diff --git a/packages/graph-db/src/index.ts b/packages/graph-db/src/index.ts index 6110ed8..26e967c 100644 --- a/packages/graph-db/src/index.ts +++ b/packages/graph-db/src/index.ts @@ -49,6 +49,15 @@ export const filesGraph: IGraphFileRepository = { upsertFileNode: (...args) => getGraph().files.upsertFileNode(...args), deleteFileNodes: (...args) => getGraph().files.deleteFileNodes(...args), snapshotFilesToVersion: (...args) => getGraph().files.snapshotFilesToVersion(...args), + bulkUpsertFiles: async (knowledgeId, fileStream) => { + const f = getGraph().files; + if (f.bulkUpsertFiles) { + return f.bulkUpsertFiles(knowledgeId, fileStream); + } + for await (const input of fileStream) { + await f.upsertFileNode(input); + } + }, }; export const foldersGraph: IGraphFolderRepository = { diff --git a/packages/ingest-github/src/strategies/flat-folder/phases/store-flat-analysis.ts b/packages/ingest-github/src/strategies/flat-folder/phases/store-flat-analysis.ts index 6e51d29..1ca930c 100644 --- a/packages/ingest-github/src/strategies/flat-folder/phases/store-flat-analysis.ts +++ b/packages/ingest-github/src/strategies/flat-folder/phases/store-flat-analysis.ts @@ -2,7 +2,7 @@ import { readFile } from "node:fs/promises"; import { logger } from "@bb/logger"; import { filesGraph, foldersGraph, repoGraph, indexesGraph } from "@bb/graph-db"; import type { GithubIndexPayload } from "@bb/types"; -import type { NodeScope } from "@bb/graph-core"; +import type { NodeScope, UpsertFileNodeInput } from "@bb/graph-core"; import type { MetaPaths } from "#src/types/meta-paths.ts"; import { throwIfCancelled } from "#src/pipeline/cancellation.ts"; import { iterateCondensed } from "#src/strategies/flat-folder/big-file/storage.ts"; @@ -93,7 +93,7 @@ export async function storeFlatAnalysis(input: StoreFlatAnalysisInput): Promise< total: { kind: "growing" }, }); await fileReporter?.start(); - try { + async function* yieldFiles() { for await (const file of iterateCondensed(input.metaPaths)) { throwIfCancelled(input.scope.knowledgeId); fileReporter?.incrementSeen(); @@ -108,7 +108,7 @@ export async function storeFlatAnalysis(input: StoreFlatAnalysisInput): Promise< foldersWritten += 1; nodesWritten += 1; } - await filesGraph.upsertFileNode({ + const upsertInput: UpsertFileNodeInput = { orgId: input.scope.orgId, knowledgeId: input.scope.knowledgeId, repoId: input.scope.repoId, @@ -121,11 +121,22 @@ export async function storeFlatAnalysis(input: StoreFlatAnalysisInput): Promise< isBigFile: file.isBigFile, totalChunks: file.totalChunks, totalTokenCount: file.totalTokenCount, - }); + }; filesWritten += 1; nodesWritten += 1; + yield upsertInput; fileReporter?.increment(1, { fileName: file.relativePath }); } + } + + try { + if (typeof filesGraph.bulkUpsertFiles === "function") { + await filesGraph.bulkUpsertFiles(input.scope.knowledgeId, yieldFiles()); + } else { + for await (const f of yieldFiles()) { + await filesGraph.upsertFileNode(f); + } + } } finally { fileReporter?.stop(); } diff --git a/packages/ingest-github/src/strategies/flat-folder/store-pull.ts b/packages/ingest-github/src/strategies/flat-folder/store-pull.ts index 6b2d23e..a435538 100644 --- a/packages/ingest-github/src/strategies/flat-folder/store-pull.ts +++ b/packages/ingest-github/src/strategies/flat-folder/store-pull.ts @@ -3,9 +3,8 @@ import { logger } from "@bb/logger"; import { filesGraph, foldersGraph, repoGraph, indexesGraph } from "@bb/graph-db"; import { rawDb } from "@bb/db"; import type { GithubIndexPayload } from "@bb/types"; -import type { NodeScope } from "@bb/graph-core"; +import type { NodeScope, UpsertFileNodeInput } from "@bb/graph-core"; import type { MetaPaths } from "#src/types/meta-paths.ts"; -import type { CondensedFileAnalysis } from "#src/types/condensed-file-analysis.ts"; import { throwIfCancelled } from "#src/pipeline/cancellation.ts"; import type { DiffResult } from "#src/pipeline/git-diff.ts"; import { readCondensed } from "#src/strategies/flat-folder/big-file/storage.ts"; @@ -99,33 +98,57 @@ export async function storePullAnalysis(input: StorePullInput): Promise r.newPath), ]; - const seen = new Set(); - for (const relativePath of upsertPaths) { - if (seen.has(relativePath)) { - continue; - } - seen.add(relativePath); - throwIfCancelled(input.scope.knowledgeId); - - const condensed = await readCondensed(input.metaPaths, relativePath); - if (condensed === null) { - logger.warn(`pull-store: condensed analysis missing for ${relativePath}; skipping file upsert`); - continue; - } - - const folderPath = directFolderOf(relativePath); - if (!folderPaths.has(folderPath)) { - await foldersGraph.upsertFolderNode({ - scope: input.scope, + async function* yieldFiles() { + const seen = new Set(); + for (const relativePath of upsertPaths) { + if (seen.has(relativePath)) { + continue; + } + seen.add(relativePath); + throwIfCancelled(input.scope.knowledgeId); + + const condensed = await readCondensed(input.metaPaths, relativePath); + if (condensed === null) { + logger.warn(`pull-store: condensed analysis missing for ${relativePath}; skipping file upsert`); + continue; + } + + const folderPath = directFolderOf(relativePath); + if (!folderPaths.has(folderPath)) { + await foldersGraph.upsertFolderNode({ + scope: input.scope, + folderPath, + summary: emptyFolderPayload(), + }); + folderPaths.add(folderPath); + foldersUpserted += 1; + } + + const upsertInput: UpsertFileNodeInput = { + orgId: input.scope.orgId, + knowledgeId: input.scope.knowledgeId, + repoId: input.scope.repoId, + relativePath: condensed.relativePath, folderPath, - summary: emptyFolderPayload(), - }); - folderPaths.add(folderPath); - foldersUpserted += 1; + language: condensed.language.length > 0 ? condensed.language : languageFromPath(condensed.relativePath), + sha: condensed.sha256, + sizeBytes: condensed.sizeBytes, + analysis: condensed.analysis, + isBigFile: condensed.isBigFile, + totalChunks: condensed.totalChunks, + totalTokenCount: condensed.totalTokenCount, + }; + filesUpserted += 1; + yield upsertInput; } + } - await upsertFileNodeFromCondensed(input.scope, folderPath, condensed); - filesUpserted += 1; + if (typeof filesGraph.bulkUpsertFiles === "function") { + await filesGraph.bulkUpsertFiles(input.scope.knowledgeId, yieldFiles()); + } else { + for await (const f of yieldFiles()) { + await filesGraph.upsertFileNode(f); + } } logger.info( @@ -134,27 +157,6 @@ export async function storePullAnalysis(input: StorePullInput): Promise { - await filesGraph.upsertFileNode({ - orgId: scope.orgId, - knowledgeId: scope.knowledgeId, - repoId: scope.repoId, - relativePath: file.relativePath, - folderPath, - language: file.language.length > 0 ? file.language : languageFromPath(file.relativePath), - sha: file.sha256, - sizeBytes: file.sizeBytes, - analysis: file.analysis, - isBigFile: file.isBigFile, - totalChunks: file.totalChunks, - totalTokenCount: file.totalTokenCount, - }); -} - function shapeFolderPayload(folder: FolderSummary): { purpose: string; summary: string; diff --git a/packages/ladybug/package.json b/packages/ladybug/package.json index 9a5fe5e..f993e80 100644 --- a/packages/ladybug/package.json +++ b/packages/ladybug/package.json @@ -14,9 +14,13 @@ "dependencies": { "@bb/config": "workspace:*", "@bb/errors": "workspace:*", - "@bb/graph-db": "workspace:*", "@bb/graph-core": "workspace:*", + "@bb/graph-db": "workspace:*", "@bb/types": "workspace:*", - "@ladybugdb/core": "^0.16.1" + "@ladybugdb/core": "^0.16.1", + "parquetjs": "^0.11.2" + }, + "devDependencies": { + "@types/parquetjs": "^0.10.6" } } diff --git a/packages/ladybug/src/fileSchemas.ts b/packages/ladybug/src/fileSchemas.ts new file mode 100644 index 0000000..943ad13 --- /dev/null +++ b/packages/ladybug/src/fileSchemas.ts @@ -0,0 +1,51 @@ +import { ParquetSchema } from "parquetjs"; +import type { FileAnalysis } from "@bb/types"; + +export const fileParquetSchema = new ParquetSchema({ + id: { type: "UTF8" }, + orgId: { type: "UTF8" }, + knowledgeId: { type: "UTF8" }, + repoId: { type: "UTF8" }, + relativePath: { type: "UTF8" }, + language: { type: "UTF8" }, + sha: { type: "UTF8" }, + sizeBytes: { type: "INT64" }, + purpose: { type: "UTF8" }, + summary: { type: "UTF8" }, + businessContext: { type: "UTF8" }, + dataFlowDirection: { type: "UTF8" }, + ontologyConcepts: { type: "UTF8", repeated: true }, + businessEntities: { type: "UTF8", repeated: true }, + systemCapabilities: { type: "UTF8", repeated: true }, + sideEffects: { type: "UTF8", repeated: true }, + configDependencies: { type: "UTF8", repeated: true }, + integrationSurface: { type: "UTF8", repeated: true }, + contractsProvided: { type: "UTF8", repeated: true }, + contractsConsumed: { type: "UTF8", repeated: true }, + sectionNames: { type: "UTF8", repeated: true }, + sectionDescriptions: { type: "UTF8", repeated: true }, + isBigFile: { type: "BOOLEAN" }, + totalChunks: { type: "INT64" }, + totalTokenCount: { type: "INT64" }, + updatedAt: { type: "UTF8" }, +}); + +export const relParquetSchema = new ParquetSchema({ + from: { type: "UTF8" }, + to: { type: "UTF8" }, +}); + +export interface UpsertFileNodeInput { + orgId?: string; + knowledgeId: string; + repoId?: string; + relativePath: string; + language: string; + sha: string; + sizeBytes: number; + analysis: FileAnalysis; + folderPath?: string; + isBigFile?: boolean; + totalChunks?: number; + totalTokenCount?: number; +} diff --git a/packages/ladybug/src/fileVersions.ts b/packages/ladybug/src/fileVersions.ts index 6e531df..5fd5ee2 100644 --- a/packages/ladybug/src/fileVersions.ts +++ b/packages/ladybug/src/fileVersions.ts @@ -8,31 +8,31 @@ import { _runCypher } from "./client.ts"; */ const SNAPSHOT_FILES_TO_VERSION = ` MATCH (f:File {knowledgeId: $knowledgeId}) -MERGE (fv:FileVersion { - id: $knowledgeId + "::" + f.relativePath + "::" + $commitHash +CREATE (fv:FileVersion { + id: $knowledgeId + "::" + f.relativePath + "::" + $commitHash, + knowledgeId: $knowledgeId, + relativePath: f.relativePath, + commitHash: $commitHash, + language: f.language, + sha: f.sha, + sizeBytes: f.sizeBytes, + purpose: f.purpose, + summary: f.summary, + businessContext: f.businessContext, + dataFlowDirection: f.dataFlowDirection, + ontologyConcepts: f.ontologyConcepts, + businessEntities: f.businessEntities, + systemCapabilities: f.systemCapabilities, + sideEffects: f.sideEffects, + configDependencies: f.configDependencies, + integrationSurface: f.integrationSurface, + contractsProvided: f.contractsProvided, + contractsConsumed: f.contractsConsumed, + sectionNames: f.sectionNames, + sectionDescriptions: f.sectionDescriptions, + snapshotAt: $snapshotAt }) -SET fv.knowledgeId = $knowledgeId, - fv.relativePath = f.relativePath, - fv.commitHash = $commitHash, - fv.language = f.language, - fv.sha = f.sha, - fv.sizeBytes = f.sizeBytes, - fv.purpose = f.purpose, - fv.summary = f.summary, - fv.businessContext = f.businessContext, - fv.dataFlowDirection = f.dataFlowDirection, - fv.ontologyConcepts = f.ontologyConcepts, - fv.businessEntities = f.businessEntities, - fv.systemCapabilities = f.systemCapabilities, - fv.sideEffects = f.sideEffects, - fv.configDependencies = f.configDependencies, - fv.integrationSurface = f.integrationSurface, - fv.contractsProvided = f.contractsProvided, - fv.contractsConsumed = f.contractsConsumed, - fv.sectionNames = f.sectionNames, - fv.sectionDescriptions = f.sectionDescriptions, - fv.snapshotAt = $snapshotAt -MERGE (f)-[:HAS_VERSION]->(fv) +CREATE (f)-[:HAS_VERSION]->(fv) `; export interface SnapshotFilesInput { diff --git a/packages/ladybug/src/files.ts b/packages/ladybug/src/files.ts index 685b621..215564a 100644 --- a/packages/ladybug/src/files.ts +++ b/packages/ladybug/src/files.ts @@ -1,193 +1,275 @@ -import type { FileAnalysis } from "@bb/types"; import { _runCypher } from "./client.ts"; +import { ParquetSchema, ParquetWriter } from "parquetjs"; +import { join } from "node:path"; +import { unlinkSync } from "node:fs"; +import { fileParquetSchema, relParquetSchema } from "./fileSchemas.ts"; +import type { UpsertFileNodeInput } from "./fileSchemas.ts"; -const UPSERT_FILE = ` -MERGE (f:File {id: $id}) -SET f.knowledgeId = $knowledgeId, - f.relativePath = $relativePath, - f.orgId = $orgId, - f.repoId = $repoId, - f.language = $language, - f.sha = $sha, - f.sizeBytes = $sizeBytes, - f.purpose = $purpose, - f.summary = $summary, - f.businessContext = $businessContext, - f.dataFlowDirection = $dataFlowDirection, - f.ontologyConcepts = $ontologyConcepts, - f.businessEntities = $businessEntities, - f.systemCapabilities = $systemCapabilities, - f.sideEffects = $sideEffects, - f.configDependencies = $configDependencies, - f.integrationSurface = $integrationSurface, - f.contractsProvided = $contractsProvided, - f.contractsConsumed = $contractsConsumed, - f.sectionNames = $sectionNames, - f.sectionDescriptions = $sectionDescriptions, - f.isBigFile = $isBigFile, - f.totalChunks = $totalChunks, - f.totalTokenCount = $totalTokenCount, - f.updatedAt = $updatedAt -WITH f -MATCH (k:Knowledge {knowledgeId: $knowledgeId}) -MERGE (k)-[:HAS_FILE]->(f) +const DELETE_FILES = ` +MATCH (f:File) +WHERE f.id IN $ids +DETACH DELETE f `; -const ATTACH_FILE_TO_FOLDER = ` -MATCH (f:File {id: $id}) -MATCH (folder:Folder {id: $folderId}) -MERGE (folder)-[:CONTAINS]->(f) -`; +export async function deleteFileNodes(knowledgeId: string, relativePaths: string[]): Promise { + if (relativePaths.length === 0) { + return; + } + const ids = relativePaths.map((p) => `${knowledgeId}::${p}`); + await _runCypher(DELETE_FILES, { ids }); +} -const CLEAR_KEYWORDS = ` -MATCH (f:File {id: $id})-[r:HAS_KEYWORD]->() -DELETE r -`; +export async function bulkUpsertFiles( + knowledgeId: string, + fileStream: AsyncIterable, +): Promise { + const timestamp = Date.now(); + const rand = Math.random().toString(36).substring(2, 9); + const tempPaths: string[] = []; -const CLEAR_CLASSES = ` -MATCH (f:File {id: $id})-[r:HAS_CLASS]->() -DELETE r -`; + const openWriter = async ( + prefix: string, + schema: ParquetSchema, + ): Promise<{ writer: ParquetWriter; path: string }> => { + const path = join(process.cwd(), `temp_${prefix}_${timestamp}_${rand}.parquet`); + tempPaths.push(path); + const writer = await ParquetWriter.openFile(schema, path); + return { writer, path }; + }; -const CLEAR_FUNCTIONS = ` -MATCH (f:File {id: $id})-[r:HAS_FUNCTION]->() -DELETE r -`; + // Generate paths and open all writers upfront + const fileWriterInfo = await openWriter("files", fileParquetSchema); + const hasFileRelWriterInfo = await openWriter("has_file_rel", relParquetSchema); + const containsRelWriterInfo = await openWriter("contains_rel", relParquetSchema); + const hasKeywordRelWriterInfo = await openWriter("keyword_rel", relParquetSchema); + const hasClassRelWriterInfo = await openWriter("class_rel", relParquetSchema); + const hasFunctionRelWriterInfo = await openWriter("function_rel", relParquetSchema); + const hasImportInternalRelWriterInfo = await openWriter("import_int_rel", relParquetSchema); + const hasImportExternalRelWriterInfo = await openWriter("import_ext_rel", relParquetSchema); -const CLEAR_IMPORTS_INTERNAL = ` -MATCH (f:File {id: $id})-[r:HAS_IMPORT_INTERNAL]->() -DELETE r -`; + // Initialize record counters to selectively run COPY queries + let fileCount = 0; + let hasFileCount = 0; + let containsCount = 0; + let keywordCount = 0; + let classCount = 0; + let functionCount = 0; + let importIntCount = 0; + let importExtCount = 0; -const CLEAR_IMPORTS_EXTERNAL = ` -MATCH (f:File {id: $id})-[r:HAS_IMPORT_EXTERNAL]->() -DELETE r -`; + try { + const allKeywords = new Set(); + const allClasses = new Set(); + const allFunctions = new Set(); + const allImportsInternal = new Set(); + const allImportsExternal = new Set(); -const ATTACH_KEYWORDS = ` -MATCH (f:File {id: $id}) -UNWIND $names AS name -MERGE (kw:Keyword {name: name}) -CREATE (f)-[:HAS_KEYWORD]->(kw) -`; + for await (const input of fileStream) { + const orgId = input.orgId ?? "local"; + const repoId = input.repoId ?? input.knowledgeId; + const id = `${input.knowledgeId}::${input.relativePath}`; -const ATTACH_CLASSES = ` -MATCH (f:File {id: $id}) -UNWIND $signatures AS signature -MERGE (c:Class {signature: signature}) -CREATE (f)-[:HAS_CLASS]->(c) -`; + // Collect entities for UNWIND MERGE + for (const kw of input.analysis.keywords) { + allKeywords.add(kw.toLowerCase()); + } + for (const c of input.analysis.classes) { + allClasses.add(c); + } + for (const f of input.analysis.functions) { + allFunctions.add(f); + } + for (const i of input.analysis.importsInternal) { + allImportsInternal.add(i); + } + for (const e of input.analysis.importsExternal) { + allImportsExternal.add(e); + } -const ATTACH_FUNCTIONS = ` -MATCH (f:File {id: $id}) -UNWIND $signatures AS signature -MERGE (fn:Function {signature: signature}) -CREATE (f)-[:HAS_FUNCTION]->(fn) -`; + // Write file node row + const sectionMap = input.analysis.sectionMap ?? []; + const fileRow = { + id, + orgId, + knowledgeId: input.knowledgeId, + repoId, + relativePath: input.relativePath, + language: input.language, + sha: input.sha, + sizeBytes: input.sizeBytes, + purpose: input.analysis.purpose, + summary: input.analysis.summary, + businessContext: input.analysis.businessContext, + dataFlowDirection: input.analysis.dataFlowDirection ?? "", + ontologyConcepts: input.analysis.ontologyConcepts ?? [], + businessEntities: input.analysis.businessEntities ?? [], + systemCapabilities: input.analysis.systemCapabilities ?? [], + sideEffects: input.analysis.sideEffects ?? [], + configDependencies: input.analysis.configDependencies ?? [], + integrationSurface: input.analysis.integrationSurface ?? [], + contractsProvided: input.analysis.contractsProvided ?? [], + contractsConsumed: input.analysis.contractsConsumed ?? [], + sectionNames: sectionMap.map((s) => s.name), + sectionDescriptions: sectionMap.map((s) => s.description), + isBigFile: input.isBigFile ?? false, + totalChunks: input.totalChunks ?? 0, + totalTokenCount: input.totalTokenCount ?? 0, + updatedAt: new Date().toISOString(), + }; + await fileWriterInfo.writer.appendRow(fileRow); + fileCount++; -const ATTACH_IMPORTS_INTERNAL = ` -MATCH (f:File {id: $id}) -UNWIND $names AS name -MERGE (m:Module {name: name}) -CREATE (f)-[:HAS_IMPORT_INTERNAL]->(m) -`; + // HAS_FILE link row + await hasFileRelWriterInfo.writer.appendRow({ from: input.knowledgeId, to: id }); + hasFileCount++; -const ATTACH_IMPORTS_EXTERNAL = ` -MATCH (f:File {id: $id}) -UNWIND $names AS name -MERGE (m:Module {name: name}) -CREATE (f)-[:HAS_IMPORT_EXTERNAL]->(m) -`; + // CONTAINS link row (Folder) + if (input.folderPath !== undefined) { + const folderId = `${orgId}::${input.knowledgeId}::${repoId}::${input.folderPath}`; + await containsRelWriterInfo.writer.appendRow({ from: folderId, to: id }); + containsCount++; + } -export interface UpsertFileNodeInput { - orgId?: string; - knowledgeId: string; - repoId?: string; - relativePath: string; - language: string; - sha: string; - sizeBytes: number; - analysis: FileAnalysis; - folderPath?: string; - isBigFile?: boolean; - totalChunks?: number; - totalTokenCount?: number; -} + // HAS_KEYWORD rows + if (input.analysis.keywords.length > 0) { + for (const kw of input.analysis.keywords) { + await hasKeywordRelWriterInfo.writer.appendRow({ from: id, to: kw.toLowerCase() }); + keywordCount++; + } + } -const DELETE_FILES = ` -MATCH (f:File {knowledgeId: $knowledgeId}) -WHERE f.relativePath IN $relativePaths -DETACH DELETE f -`; + // HAS_CLASS rows + if (input.analysis.classes.length > 0) { + for (const c of input.analysis.classes) { + await hasClassRelWriterInfo.writer.appendRow({ from: id, to: c }); + classCount++; + } + } -export async function deleteFileNodes(knowledgeId: string, relativePaths: string[]): Promise { - if (relativePaths.length === 0) { - return; - } - await _runCypher(DELETE_FILES, { knowledgeId, relativePaths }); -} + // HAS_FUNCTION rows + if (input.analysis.functions.length > 0) { + for (const f of input.analysis.functions) { + await hasFunctionRelWriterInfo.writer.appendRow({ from: id, to: f }); + functionCount++; + } + } -export async function upsertFileNode(input: UpsertFileNodeInput): Promise { - const orgId = input.orgId ?? "local"; - const repoId = input.repoId ?? input.knowledgeId; - const id = `${input.knowledgeId}::${input.relativePath}`; - - const params = { id, knowledgeId: input.knowledgeId }; - const sectionMap = input.analysis.sectionMap ?? []; - - await _runCypher(UPSERT_FILE, { - id, - knowledgeId: input.knowledgeId, - relativePath: input.relativePath, - orgId, - repoId, - language: input.language, - sha: input.sha, - sizeBytes: input.sizeBytes, - purpose: input.analysis.purpose, - summary: input.analysis.summary, - businessContext: input.analysis.businessContext, - dataFlowDirection: input.analysis.dataFlowDirection ?? "", - ontologyConcepts: input.analysis.ontologyConcepts ?? [], - businessEntities: input.analysis.businessEntities ?? [], - systemCapabilities: input.analysis.systemCapabilities ?? [], - sideEffects: input.analysis.sideEffects ?? [], - configDependencies: input.analysis.configDependencies ?? [], - integrationSurface: input.analysis.integrationSurface ?? [], - contractsProvided: input.analysis.contractsProvided ?? [], - contractsConsumed: input.analysis.contractsConsumed ?? [], - sectionNames: sectionMap.map((s) => s.name), - sectionDescriptions: sectionMap.map((s) => s.description), - isBigFile: input.isBigFile ?? false, - totalChunks: input.totalChunks ?? 0, - totalTokenCount: input.totalTokenCount ?? 0, - updatedAt: new Date().toISOString(), - }); - - if (input.folderPath !== undefined) { - const folderId = `${orgId}::${input.knowledgeId}::${repoId}::${input.folderPath}`; - await _runCypher(ATTACH_FILE_TO_FOLDER, { id, folderId }); - } + // HAS_IMPORT_INTERNAL rows + if (input.analysis.importsInternal.length > 0) { + for (const i of input.analysis.importsInternal) { + await hasImportInternalRelWriterInfo.writer.appendRow({ from: id, to: i }); + importIntCount++; + } + } - await _runCypher(CLEAR_KEYWORDS, params); - await _runCypher(CLEAR_CLASSES, params); - await _runCypher(CLEAR_FUNCTIONS, params); - await _runCypher(CLEAR_IMPORTS_INTERNAL, params); - await _runCypher(CLEAR_IMPORTS_EXTERNAL, params); + // HAS_IMPORT_EXTERNAL rows + if (input.analysis.importsExternal.length > 0) { + for (const e of input.analysis.importsExternal) { + await hasImportExternalRelWriterInfo.writer.appendRow({ from: id, to: e }); + importExtCount++; + } + } + } - if (input.analysis.keywords.length > 0) { - await _runCypher(ATTACH_KEYWORDS, { id, names: input.analysis.keywords.map((k) => k.toLowerCase()) }); - } - if (input.analysis.classes.length > 0) { - await _runCypher(ATTACH_CLASSES, { id, signatures: input.analysis.classes }); - } - if (input.analysis.functions.length > 0) { - await _runCypher(ATTACH_FUNCTIONS, { id, signatures: input.analysis.functions }); - } - if (input.analysis.importsInternal.length > 0) { - await _runCypher(ATTACH_IMPORTS_INTERNAL, { id, names: input.analysis.importsInternal }); + // Close all open writers + await fileWriterInfo.writer.close(); + await hasFileRelWriterInfo.writer.close(); + await containsRelWriterInfo.writer.close(); + await hasKeywordRelWriterInfo.writer.close(); + await hasClassRelWriterInfo.writer.close(); + await hasFunctionRelWriterInfo.writer.close(); + await hasImportInternalRelWriterInfo.writer.close(); + await hasImportExternalRelWriterInfo.writer.close(); + + // If no files were written, we are done + if (fileCount === 0) { + return; + } + + // A single Cypher query to clear out old data for this knowledgeId + // Clean slate deletion: MATCH (f:File {knowledgeId: $knowledgeId}) DETACH DELETE f + await _runCypher( + `MATCH (f:File {knowledgeId: $knowledgeId}) + DETACH DELETE f`, + { knowledgeId }, + ); + + // UNWIND MERGE queries for referenced nodes + if (allKeywords.size > 0) { + await _runCypher( + `UNWIND $names AS name + MERGE (kw:Keyword {name: name})`, + { names: Array.from(allKeywords) }, + ); + } + if (allClasses.size > 0) { + await _runCypher( + `UNWIND $signatures AS signature + MERGE (c:Class {signature: signature})`, + { signatures: Array.from(allClasses) }, + ); + } + if (allFunctions.size > 0) { + await _runCypher( + `UNWIND $signatures AS signature + MERGE (fn:Function {signature: signature})`, + { signatures: Array.from(allFunctions) }, + ); + } + if (allImportsInternal.size > 0) { + await _runCypher( + `UNWIND $names AS name + MERGE (m:Module {name: name})`, + { names: Array.from(allImportsInternal) }, + ); + } + if (allImportsExternal.size > 0) { + await _runCypher( + `UNWIND $names AS name + MERGE (m:Module {name: name})`, + { names: Array.from(allImportsExternal) }, + ); + } + + // Execute COPY FROM commands exactly once + if (fileCount > 0) { + await _runCypher(`COPY File FROM '${fileWriterInfo.path}'`); + } + if (hasFileCount > 0) { + await _runCypher(`COPY HAS_FILE FROM '${hasFileRelWriterInfo.path}'`); + } + if (containsCount > 0) { + await _runCypher(`COPY CONTAINS FROM '${containsRelWriterInfo.path}' (FROM='Folder', TO='File')`); + } + if (keywordCount > 0) { + await _runCypher(`COPY HAS_KEYWORD FROM '${hasKeywordRelWriterInfo.path}' (FROM='File', TO='Keyword')`); + } + if (classCount > 0) { + await _runCypher(`COPY HAS_CLASS FROM '${hasClassRelWriterInfo.path}'`); + } + if (functionCount > 0) { + await _runCypher(`COPY HAS_FUNCTION FROM '${hasFunctionRelWriterInfo.path}'`); + } + if (importIntCount > 0) { + await _runCypher(`COPY HAS_IMPORT_INTERNAL FROM '${hasImportInternalRelWriterInfo.path}'`); + } + if (importExtCount > 0) { + await _runCypher(`COPY HAS_IMPORT_EXTERNAL FROM '${hasImportExternalRelWriterInfo.path}'`); + } + } finally { + for (const p of tempPaths) { + try { + unlinkSync(p); + } catch { + // ignore + } + } } - if (input.analysis.importsExternal.length > 0) { - await _runCypher(ATTACH_IMPORTS_EXTERNAL, { id, names: input.analysis.importsExternal }); +} + +export async function upsertFileNode(input: UpsertFileNodeInput): Promise { + async function* single() { + yield input; } + await bulkUpsertFiles(input.knowledgeId, single()); } diff --git a/packages/ladybug/src/index.ts b/packages/ladybug/src/index.ts index 4014145..ac157b3 100644 --- a/packages/ladybug/src/index.ts +++ b/packages/ladybug/src/index.ts @@ -12,10 +12,11 @@ export { setKnowledgeStateInGraph, setKnowledgeBranchInGraph, deleteKnowledgeGraph, + vacuumOrphanEntities, } from "./knowledge.ts"; -export { upsertFileNode, deleteFileNodes } from "./files.ts"; -export type { UpsertFileNodeInput } from "./files.ts"; +export { upsertFileNode, deleteFileNodes, bulkUpsertFiles } from "./files.ts"; +export type { UpsertFileNodeInput } from "./fileSchemas.ts"; export { upsertRepoNode } from "./repo.ts"; export type { NodeScope, RepoSummaryPayload, UpsertRepoNodeInput } from "./repo.ts"; diff --git a/packages/ladybug/src/knowledge.ts b/packages/ladybug/src/knowledge.ts index 2eddd2c..bbbcbbc 100644 --- a/packages/ladybug/src/knowledge.ts +++ b/packages/ladybug/src/knowledge.ts @@ -113,6 +113,9 @@ export async function deleteKnowledgeGraph(knowledgeId: string): Promise { await _runCypher(DELETE_FOLDERS_BY_KNOWLEDGE, { knowledgeId }); await _runCypher(DELETE_ORPHAN_FILES); await _runCypher(DELETE_KNOWLEDGE_NODE, { knowledgeId }); +} + +export async function vacuumOrphanEntities(): Promise { await _runCypher(DELETE_ORPHAN_KEYWORDS); await _runCypher(DELETE_ORPHAN_CLASSES); await _runCypher(DELETE_ORPHAN_FUNCTIONS); diff --git a/packages/ladybug/src/provider.ts b/packages/ladybug/src/provider.ts index 8dbd7f8..ef64e7c 100644 --- a/packages/ladybug/src/provider.ts +++ b/packages/ladybug/src/provider.ts @@ -23,6 +23,7 @@ class LadybugGraphProvider implements IGraphDatabaseProvider { upsertFileNode: filesRepo.upsertFileNode, deleteFileNodes: filesRepo.deleteFileNodes, snapshotFilesToVersion: fileVersionsRepo.snapshotFilesToVersion, + bulkUpsertFiles: filesRepo.bulkUpsertFiles, }; folders = { @@ -56,3 +57,5 @@ class LadybugGraphProvider implements IGraphDatabaseProvider { } registerGraphProvider("ladybug", () => new LadybugGraphProvider()); + +export { vacuumOrphanEntities } from "./knowledge.ts";