Skip to content

Commit 21cbfaf

Browse files
authored
Add git indexing: orchestrator, worker, chunking, and embeddings (#2)
1 parent 0d66e0a commit 21cbfaf

44 files changed

Lines changed: 3956 additions & 14 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/ci.yml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
name: CI
2+
3+
on:
4+
pull_request:
5+
branches:
6+
- main
7+
push:
8+
branches:
9+
- main
10+
11+
jobs:
12+
build-and-test:
13+
name: Build and Test
14+
runs-on: ubuntu-latest
15+
16+
steps:
17+
- name: Checkout
18+
uses: actions/checkout@v4
19+
20+
- name: Set up Go
21+
uses: actions/setup-go@v5
22+
with:
23+
go-version-file: go.mod
24+
cache: true
25+
26+
- name: Download dependencies
27+
run: go mod download
28+
29+
- name: Build
30+
run: go build ./...
31+
32+
- name: Test
33+
run: go test ./...

cmd/api/main.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ package main
33
import (
44
"github.com/gomantics/semantix/internal/api"
55
"github.com/gomantics/semantix/internal/db"
6+
"github.com/gomantics/semantix/internal/domains/indexing"
7+
"github.com/gomantics/semantix/internal/libs/openai"
68
"github.com/gomantics/semantix/internal/qdrant"
79
"github.com/gomantics/semantix/pkg/logger"
810
"go.uber.org/fx"
@@ -21,7 +23,9 @@ func main() {
2123
fx.Invoke(
2224
db.Init,
2325
qdrant.Init,
26+
openai.Init,
2427
api.Run,
28+
indexing.Run,
2529
),
2630
fx.WithLogger(func(l *zap.Logger) fxevent.Logger {
2731
return &fxevent.ZapLogger{

cmd/dev/main.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ package main
33
import (
44
"github.com/gomantics/semantix/internal/api"
55
"github.com/gomantics/semantix/internal/db"
6+
"github.com/gomantics/semantix/internal/domains/indexing"
7+
"github.com/gomantics/semantix/internal/libs/openai"
68
"github.com/gomantics/semantix/internal/qdrant"
79
"github.com/gomantics/semantix/pkg/logger"
810
"go.uber.org/fx"
@@ -21,7 +23,9 @@ func main() {
2123
fx.Invoke(
2224
db.Init,
2325
qdrant.Init,
26+
openai.Init,
2427
api.Run,
28+
indexing.Run,
2529
),
2630
fx.WithLogger(func(l *zap.Logger) fxevent.Logger {
2731
return &fxevent.ZapLogger{
Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -73,18 +73,19 @@ Support the major git hosting providers.
7373

7474
### 2.3 Tree-sitter Chunking
7575

76-
AST-aware code chunking using chunkx or similar.
76+
AST-aware code chunking using [`github.com/gomantics/chunkx`](https://github.com/gomantics/chunkx) - our own Go library implementing the CAST algorithm.
7777

78-
- [ ] **Language detection** from file extension and content
78+
- [ ] **Language detection** - chunkx uses file extension via `languages.*` constants
7979

8080
- [ ] **Chunking strategy**:
8181
- Functions/methods as primary chunks
8282
- Classes/structs with their methods
8383
- Large functions split at logical boundaries
84-
- Target: ~500 tokens per chunk
84+
- Target: ~500 tokens per chunk via `chunkx.WithMaxSize(500)`
8585

86-
- [ ] **Chunk metadata**:
86+
- [ ] **Chunk metadata** - map chunkx output to our internal type:
8787
```go
88+
// chunkx returns []chunkx.Chunk; map to:
8889
type Chunk struct {
8990
Content string
9091
FilePath string
@@ -96,14 +97,27 @@ AST-aware code chunking using chunkx or similar.
9697
}
9798
```
9899

100+
- [ ] **Quick example**:
101+
```go
102+
import (
103+
"github.com/gomantics/chunkx"
104+
"github.com/gomantics/chunkx/languages"
105+
)
106+
107+
chunker := chunkx.NewChunker()
108+
chunks, err := chunker.Chunk(code,
109+
chunkx.WithLanguage(languages.Go),
110+
chunkx.WithMaxSize(500))
111+
```
112+
99113
- [ ] **Language support** (priority order):
100114
- Go, Python, JavaScript/TypeScript
101115
- Java, Rust, C/C++
102116
- Ruby, PHP
103117
- Markdown, YAML, JSON (as text)
104118

105119
**Files to create/modify:**
106-
- `libs/chunking/chunker.go`
120+
- `libs/chunking/chunker.go` - thin wrapper around chunkx
107121
- `domains/chunking/chunker.go` - higher-level orchestration
108122

109123
---
@@ -327,7 +341,7 @@ POST /v1/workspaces/:wid/repos (status = pending)
327341
## Dependencies
328342

329343
- `github.com/go-git/go-git/v5` - Git operations
330-
- Tree-sitter Go bindings or `chunkx` CLI
344+
- `github.com/gomantics/chunkx` - AST-based code chunking (CAST algorithm, 30+ languages)
331345
- `github.com/sashabaranov/go-openai` - OpenAI client
332346
- `github.com/qdrant/go-client` - Qdrant client
333347

go.mod

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,18 @@ tool (
99
)
1010

1111
require (
12+
github.com/approvals/go-approval-tests v1.5.0
13+
github.com/go-git/go-git/v5 v5.17.0
14+
github.com/gomantics/chunkx v0.0.3
1215
github.com/jackc/pgx/v5 v5.8.0
1316
github.com/labstack/echo/v4 v4.13.4
1417
github.com/pressly/goose/v3 v3.27.0
1518
github.com/qdrant/go-client v1.16.2
19+
github.com/sashabaranov/go-openai v1.41.2
20+
github.com/stretchr/testify v1.11.1
21+
github.com/testcontainers/testcontainers-go v0.40.0
22+
github.com/testcontainers/testcontainers-go/modules/postgres v0.40.0
23+
github.com/testcontainers/testcontainers-go/modules/qdrant v0.40.0
1624
go.uber.org/fx v1.24.0
1725
go.uber.org/zap v1.27.1
1826
google.golang.org/grpc v1.79.1
@@ -22,47 +30,97 @@ require (
2230
cel.dev/expr v0.25.1 // indirect
2331
dario.cat/mergo v1.0.2 // indirect
2432
filippo.io/edwards25519 v1.2.0 // indirect
33+
github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect
2534
github.com/BurntSushi/toml v1.5.0 // indirect
35+
github.com/Microsoft/go-winio v0.6.2 // indirect
36+
github.com/ProtonMail/go-crypto v1.1.6 // indirect
2637
github.com/air-verse/air v1.64.5 // indirect
2738
github.com/andybalholm/brotli v1.2.0 // indirect
2839
github.com/antlr4-go/antlr/v4 v4.13.1 // indirect
2940
github.com/bep/godartsass/v2 v2.5.0 // indirect
3041
github.com/bep/golibsass v1.2.0 // indirect
42+
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
43+
github.com/cenkalti/backoff/v5 v5.0.3 // indirect
44+
github.com/cespare/xxhash/v2 v2.3.0 // indirect
45+
github.com/cloudflare/circl v1.6.1 // indirect
46+
github.com/containerd/errdefs v1.0.0 // indirect
47+
github.com/containerd/errdefs/pkg v0.3.0 // indirect
48+
github.com/containerd/log v0.1.0 // indirect
49+
github.com/containerd/platforms v0.2.1 // indirect
50+
github.com/cpuguy83/dockercfg v0.3.2 // indirect
3151
github.com/cubicdaiya/gonp v1.0.4 // indirect
52+
github.com/cyphar/filepath-securejoin v0.4.1 // indirect
3253
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
54+
github.com/distribution/reference v0.6.0 // indirect
55+
github.com/docker/docker v28.5.2+incompatible // indirect
56+
github.com/docker/go-connections v0.6.0 // indirect
57+
github.com/docker/go-units v0.5.0 // indirect
3358
github.com/dustin/go-humanize v1.0.1 // indirect
59+
github.com/ebitengine/purego v0.9.1 // indirect
60+
github.com/emirpasic/gods v1.18.1 // indirect
3461
github.com/fatih/color v1.18.0 // indirect
3562
github.com/fatih/structtag v1.2.0 // indirect
63+
github.com/felixge/httpsnoop v1.0.4 // indirect
3664
github.com/fsnotify/fsnotify v1.9.0 // indirect
65+
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect
66+
github.com/go-git/go-billy/v5 v5.8.0 // indirect
67+
github.com/go-logr/logr v1.4.3 // indirect
68+
github.com/go-logr/stdr v1.2.2 // indirect
69+
github.com/go-ole/go-ole v1.3.0 // indirect
3770
github.com/go-sql-driver/mysql v1.9.3 // indirect
3871
github.com/gobwas/glob v0.2.3 // indirect
3972
github.com/gohugoio/hugo v0.149.1 // indirect
73+
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
4074
github.com/gomantics/cfgx v0.0.7 // indirect
4175
github.com/gomantics/sx v0.0.3 // indirect
4276
github.com/google/cel-go v0.26.1 // indirect
4377
github.com/google/uuid v1.6.0 // indirect
78+
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect
4479
github.com/inconshreveable/mousetrap v1.1.0 // indirect
4580
github.com/jackc/pgpassfile v1.0.0 // indirect
4681
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
4782
github.com/jackc/puddle/v2 v2.2.2 // indirect
83+
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect
4884
github.com/jinzhu/inflection v1.0.0 // indirect
4985
github.com/joho/godotenv v1.5.1 // indirect
86+
github.com/kevinburke/ssh_config v1.2.0 // indirect
87+
github.com/klauspost/compress v1.18.4 // indirect
5088
github.com/labstack/gommon v0.4.2 // indirect
89+
github.com/lufia/plan9stats v0.0.0-20251013123823-9fd1530e3ec3 // indirect
90+
github.com/magiconair/properties v1.8.10 // indirect
5191
github.com/mattn/go-colorable v0.1.14 // indirect
5292
github.com/mattn/go-isatty v0.0.20 // indirect
5393
github.com/mfridman/interpolate v0.0.2 // indirect
94+
github.com/moby/docker-image-spec v1.3.1 // indirect
95+
github.com/moby/go-archive v0.1.0 // indirect
96+
github.com/moby/patternmatcher v0.6.0 // indirect
97+
github.com/moby/sys/sequential v0.6.0 // indirect
98+
github.com/moby/sys/user v0.4.0 // indirect
99+
github.com/moby/sys/userns v0.1.0 // indirect
100+
github.com/moby/term v0.5.2 // indirect
101+
github.com/morikuni/aec v1.0.0 // indirect
54102
github.com/ncruces/go-strftime v1.0.0 // indirect
103+
github.com/opencontainers/go-digest v1.0.0 // indirect
104+
github.com/opencontainers/image-spec v1.1.1 // indirect
55105
github.com/pelletier/go-toml v1.9.5 // indirect
56106
github.com/pelletier/go-toml/v2 v2.2.4 // indirect
57107
github.com/pganalyze/pg_query_go/v6 v6.1.0 // indirect
58108
github.com/pingcap/errors v0.11.5-0.20240311024730-e056997136bb // indirect
59109
github.com/pingcap/failpoint v0.0.0-20240528011301-b51a646c7c86 // indirect
60110
github.com/pingcap/log v1.1.1-0.20221015072633-39906604fb81 // indirect
61111
github.com/pingcap/tidb/pkg/parser v0.0.0-20250324122243-d51e00e5bbf0 // indirect
112+
github.com/pjbgf/sha1cd v0.3.2 // indirect
113+
github.com/pkg/errors v0.9.1 // indirect
62114
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
115+
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect
63116
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
64117
github.com/riza-io/grpc-go v0.2.0 // indirect
118+
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect
65119
github.com/sethvargo/go-retry v0.3.0 // indirect
120+
github.com/shirou/gopsutil/v4 v4.25.10 // indirect
121+
github.com/sirupsen/logrus v1.9.3 // indirect
122+
github.com/skeema/knownhosts v1.3.1 // indirect
123+
github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82 // indirect
66124
github.com/spf13/afero v1.14.0 // indirect
67125
github.com/spf13/cast v1.9.2 // indirect
68126
github.com/spf13/cobra v1.10.1 // indirect
@@ -71,10 +129,19 @@ require (
71129
github.com/stoewer/go-strcase v1.2.0 // indirect
72130
github.com/tdewolff/parse/v2 v2.8.3 // indirect
73131
github.com/tetratelabs/wazero v1.9.0 // indirect
132+
github.com/tklauser/go-sysconf v0.3.16 // indirect
133+
github.com/tklauser/numcpus v0.11.0 // indirect
74134
github.com/valyala/bytebufferpool v1.0.0 // indirect
75135
github.com/valyala/fasttemplate v1.2.2 // indirect
76136
github.com/wasilibs/go-pgquery v0.0.0-20250409022910-10ac41983c07 // indirect
77137
github.com/wasilibs/wazero-helpers v0.0.0-20240620070341-3dff1577cd52 // indirect
138+
github.com/xanzy/ssh-agent v0.3.3 // indirect
139+
github.com/yusufpapurcu/wmi v1.2.4 // indirect
140+
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
141+
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0 // indirect
142+
go.opentelemetry.io/otel v1.40.0 // indirect
143+
go.opentelemetry.io/otel/metric v1.40.0 // indirect
144+
go.opentelemetry.io/otel/trace v1.40.0 // indirect
78145
go.uber.org/atomic v1.11.0 // indirect
79146
go.uber.org/dig v1.19.0 // indirect
80147
go.uber.org/multierr v1.11.0 // indirect
@@ -85,10 +152,11 @@ require (
85152
golang.org/x/sys v0.41.0 // indirect
86153
golang.org/x/text v0.34.0 // indirect
87154
golang.org/x/time v0.14.0 // indirect
88-
google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 // indirect
155+
google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57 // indirect
89156
google.golang.org/genproto/googleapis/rpc v0.0.0-20260217215200-42d3e9bedb6d // indirect
90157
google.golang.org/protobuf v1.36.11 // indirect
91158
gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
159+
gopkg.in/warnings.v0 v0.1.2 // indirect
92160
gopkg.in/yaml.v3 v3.0.1 // indirect
93161
modernc.org/libc v1.68.0 // indirect
94162
modernc.org/mathutil v1.7.1 // indirect

0 commit comments

Comments
 (0)