From a860d5e4c7a7969d66aece2d3dd6e47eab89a1ff Mon Sep 17 00:00:00 2001 From: Martin Catty Date: Sat, 23 May 2026 16:25:13 -0400 Subject: [PATCH] feat: align GitLab probe with probe-core and HTTP control plane Compose core.Probe with entities (users, groups, projects, repositories, code_files), HTTP-only control plane, and go-git repository sync (no git CLI in the distroless image). Add CI/release workflows, English schema, gofmt pre-commit hook, and AGENTS.md aligned with probe-confluence. --- .githooks/pre-commit | 22 ++ .github/dependabot.yml | 44 ++++ .github/workflows/ci.yml | 20 ++ .github/workflows/codeql.yml | 53 +++++ .github/workflows/release-on-semver-tag.yml | 20 ++ .gitignore | 65 ++++++ .gitmodules | 6 + AGENTS.md | 125 ++++++++++ CHANGELOG.md | 26 +++ Dockerfile | 32 +++ Makefile | 123 ++++++++++ README.md | 50 +++- cmd/main.go | 49 ++++ cmd/version.go | 3 + config/probe.example.yml | 47 ++++ config/schema.yml | 181 +++++++++++++++ env.secrets.example | 8 + go.mod | 35 +++ go.sum | 108 +++++++++ internal/config/config.go | 215 ++++++++++++++++++ internal/config/join_blob.go | 28 +++ internal/config/join_blob_test.go | 12 + internal/config/source_view.go | 39 ++++ internal/config/source_view_test.go | 26 +++ internal/gitlab/client.go | 215 ++++++++++++++++++ internal/manager/manager.go | 36 +++ internal/models/code_file.go | 11 + internal/models/gitlab.go | 95 ++++++++ internal/probe/entities/code_files_entity.go | 88 +++++++ internal/probe/entities/groups_entity.go | 47 ++++ internal/probe/entities/helpers.go | 78 +++++++ internal/probe/entities/projects_entity.go | 47 ++++ .../probe/entities/repositories_entity.go | 60 +++++ internal/probe/entities/users_entity.go | 47 ++++ internal/probe/probe.go | 109 +++++++++ internal/repositories/codefiles.go | 136 +++++++++++ internal/repositories/sync.go | 161 +++++++++++++ scripts/install-git-hooks.sh | 9 + 38 files changed, 2474 insertions(+), 2 deletions(-) create mode 100755 .githooks/pre-commit create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/codeql.yml create mode 100644 .github/workflows/release-on-semver-tag.yml create mode 100644 .gitignore create mode 100644 .gitmodules create mode 100644 AGENTS.md create mode 100644 CHANGELOG.md create mode 100644 Dockerfile create mode 100644 Makefile create mode 100644 cmd/main.go create mode 100644 cmd/version.go create mode 100644 config/probe.example.yml create mode 100644 config/schema.yml create mode 100644 env.secrets.example create mode 100644 go.mod create mode 100644 go.sum create mode 100644 internal/config/config.go create mode 100644 internal/config/join_blob.go create mode 100644 internal/config/join_blob_test.go create mode 100644 internal/config/source_view.go create mode 100644 internal/config/source_view_test.go create mode 100644 internal/gitlab/client.go create mode 100644 internal/manager/manager.go create mode 100644 internal/models/code_file.go create mode 100644 internal/models/gitlab.go create mode 100644 internal/probe/entities/code_files_entity.go create mode 100644 internal/probe/entities/groups_entity.go create mode 100644 internal/probe/entities/helpers.go create mode 100644 internal/probe/entities/projects_entity.go create mode 100644 internal/probe/entities/repositories_entity.go create mode 100644 internal/probe/entities/users_entity.go create mode 100644 internal/probe/probe.go create mode 100644 internal/repositories/codefiles.go create mode 100644 internal/repositories/sync.go create mode 100755 scripts/install-git-hooks.sh diff --git a/.githooks/pre-commit b/.githooks/pre-commit new file mode 100755 index 0000000..7042d0f --- /dev/null +++ b/.githooks/pre-commit @@ -0,0 +1,22 @@ +#!/usr/bin/env sh +# Same check as fluid-pub/actions go-workload-ci "Verify formatting". +set -e + +repo_root="$(git rev-parse --show-toplevel 2>/dev/null)" || { + echo "pre-commit: not inside a Git work tree" >&2 + exit 1 +} + +cd "$repo_root" || exit 1 + +if ! command -v gofmt >/dev/null 2>&1; then + echo "pre-commit: gofmt not found in PATH" >&2 + exit 1 +fi + +unformatted="$(gofmt -l .)" +if [ -n "$unformatted" ]; then + echo "pre-commit: gofmt would reformat these files (run: gofmt -w .):" >&2 + echo "$unformatted" >&2 + exit 1 +fi diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..b169abc --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,44 @@ +version: 2 +updates: + - package-ecosystem: gomod + directory: "/" + schedule: + interval: weekly + day: monday + target-branch: develop + open-pull-requests-limit: 5 + commit-message: + prefix: chore(deps) + labels: + - dependencies + - go + + - package-ecosystem: docker + directory: "/" + schedule: + interval: weekly + day: monday + target-branch: develop + open-pull-requests-limit: 5 + commit-message: + prefix: chore(deps) + labels: + - dependencies + - docker + + - package-ecosystem: github-actions + directory: "/" + schedule: + interval: weekly + day: monday + target-branch: develop + open-pull-requests-limit: 5 + commit-message: + prefix: chore(deps) + labels: + - dependencies + - github-actions + groups: + github-actions: + patterns: + - "*" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..1f6f566 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,20 @@ +name: CI + +on: + pull_request: + push: + branches: + - main + - develop + +permissions: + contents: read + +jobs: + ci: + uses: fluid-pub/actions/.github/workflows/go-workload-ci.yml@v1 + with: + workload_kind: probe + core_repository: fluid-pub/probe-core + core_ref: develop + go_version: "1.25" diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..0c82f4b --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,53 @@ +name: CodeQL + +on: + push: + branches: + - develop + - main + pull_request: + branches: + - develop + - main + schedule: + - cron: "27 3 * * 1" + +permissions: + actions: read + contents: read + security-events: write + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + + - name: Checkout shared core + uses: actions/checkout@v6 + with: + repository: fluid-pub/probe-core + ref: develop + path: core + + - name: Align go.mod replace for CI layout + run: | + go mod edit -replace fluid/probes/core=./core + go mod download + + - uses: actions/setup-go@v6 + with: + go-version: "1.25" + cache-dependency-path: go.sum + + - name: Initialize CodeQL + uses: github/codeql-action/init@v4 + with: + languages: go + + - name: Autobuild + uses: github/codeql-action/autobuild@v4 + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v4 diff --git a/.github/workflows/release-on-semver-tag.yml b/.github/workflows/release-on-semver-tag.yml new file mode 100644 index 0000000..955837e --- /dev/null +++ b/.github/workflows/release-on-semver-tag.yml @@ -0,0 +1,20 @@ +name: Release on semver tag + +on: + push: + tags: + - "[0-9]+.[0-9]+.[0-9]+" + - "[0-9]+.[0-9]+.[0-9]+-*" + +permissions: + contents: write + packages: write + +jobs: + release: + uses: fluid-pub/actions/.github/workflows/go-workload-release.yml@v1 + with: + workload_kind: probe + binary_name: gitlab-probe + core_repository: fluid-pub/probe-core + core_ref: develop diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..76024e5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,65 @@ +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Dependency directories (remove the comment below to include it) +# vendor/ + +# Go workspace file +go.work + +# Build directory +build/ +gitlab-probe +gitlab-agent + +# State directory +state/ + +# Data directory (repositories clone, etc.) +data/ + +# IDE files +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS generated files +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# Logs +*.log + +# Environment variables +.env +.env.local +env.secrets + +# Local probe config (use probe.example.yml as template) +config/probe.yml + +# Coverage reports +coverage.out +coverage.html + +# Temporary files +*.tmp +*.temp + diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..9a5b3cb --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +# Required when this module is the root of fluid-pub/probe-gitlab on GitHub. +# Monorepo dev can use replace => ../core in go.mod without initializing this submodule. +[submodule "core"] + path = core + url = https://github.com/fluid-pub/probe-core.git + branch = develop diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..e9d1470 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,125 @@ +--- +name: gitlab-probe +description: Expert Go developer for the Fluid GitLab probe (inventory, repository sync, RAG code_files) +--- + +You are an expert Go developer working on the **Fluid GitLab probe** (`fluid-pub/probe-gitlab`): a **probe** (not an execution agent) that collects GitLab data and pushes entities to the Fluid control plane over HTTP (`/probes`). + +## Persona + +- Integrate with **GitLab API v4** and optional **local repository mirrors** for file indexing. +- Compose **`fluid/probes/core`** for lifecycle, state files, control plane HTTP, schema push, and `runtime_config` merge. +- Write idiomatic Go: explicit errors, English log messages, no customer-specific URLs or names in versioned files. + +## Tech stack + +- **Go 1.25+** (see `go.mod`; CI uses Go 1.25 for go-git dependencies). +- **`fluid/probes/core`** via submodule `core/` (or `replace ../core` in monorepo dev). +- **YAML v3** for config and local state snapshots. +- **go-git** (`github.com/go-git/go-git/v5`) for clone/fetch/checkout — **never** `exec.Command("git", ...)`. +- **Distroless** runtime image: static binary only, **no `git` binary** in the container. + +## Repository layout + +| Path | Role | +|------|------| +| `core/` | Submodule → `fluid-pub/probe-core` | +| `cmd/main.go` | Entrypoint, signals, optional `MergedConfigProvider` | +| `cmd/version.go` | Semver aligned with release tag | +| `internal/probe/probe.go` | Composes `*core.Probe`, registers entities | +| `internal/probe/entities/` | Per-entity `Refresh` / `Save` (users, groups, projects, repositories, code_files) | +| `internal/manager/` | Thin wrapper over `core/state.Manager` | +| `internal/gitlab/` | GitLab REST client | +| `internal/repositories/` | go-git sync + filesystem scan for `code_files` | +| `internal/config/` | YAML load, validation, `source_view` URL helpers | +| `internal/models/` | Entity structs | +| `config/probe.example.yml` | Generic template (versioned) | +| `config/schema.yml` | Entity contract (embedded in Docker image) | +| `config/probe.yml` | Local config (gitignored) | +| `env.secrets.example` | `GITLAB_TOKEN`, `FLUID_CONTROLPLANE_*` | +| `.github/workflows/` | CI / release via `fluid-pub/actions` | + +Gitignored at runtime: `state/`, `data/repositories/`, `env.secrets`. + +## Architecture + +1. **`main`** loads `config/probe.yml`, wraps config in `core.MergedConfigProvider` when control plane is configured. +2. **`probe.NewProbe`** builds GitLab client, `manager.NewManager`, `core.NewProbe`, registers entities listed in `data.entities`. +3. Each entity runs on its own **`refresh_interval`** (goroutine per entity in probe-core). +4. **`SaveEntity`** persists YAML under `state.dir` and enqueues ingest to the control plane when connected. +5. **`repositories` / `code_files`**: go-git shallow clone under `data.repositories.base_dir`, then walk files (respect `index_files`, RAG on `fields.content.rag`). + +Entities: `users`, `groups`, `projects`, `repositories`, `code_files` — see `config/schema.yml`. + +## Configuration (current shape) + +```yaml +probe: + name: gitlab + version: "0.1.0" +gitlab: + url: "https://gitlab.example.com" + token: "${GITLAB_TOKEN}" + api_version: "v4" + group_id: "your-group-path" +data: + include_subgroups: true + entities: + - name: users + refresh_interval: "15m" + # ... groups, projects, repositories, code_files (fields.content.rag for RAG) + repositories: + base_dir: "data/repositories" + repos: [] +state: + dir: state + format: yaml + cleanup_interval: 60 +controlplane: + base_url: "${FLUID_CONTROLPLANE_HTTP_BASE}" + api_version: "v1" + parameters: + organization_uuid: "${FLUID_CONTROLPLANE_ORGANIZATION_UUID}" + token: "${FLUID_CONTROLPLANE_TOKEN}" +``` + +Operational tuning (entities, intervals, repo list) can also come from control plane **`runtime_config`**. + +## Commands + +```bash +./scripts/install-git-hooks.sh +git submodule update --init --recursive +cp config/probe.example.yml config/probe.yml +cp env.secrets.example env.secrets +source env.secrets +make dev # go run ./cmd +make test +make fmt +``` + +Release: semver tag without `v`, matching `cmd/version.go` → `ghcr.io/fluid-pub/probe-gitlab:`. + +## Standards + +**Naming:** packages lowercase (`probe`, `config`, `gitlab`); exported types PascalCase; YAML field names snake_case per schema. + +**Errors:** wrap with `fmt.Errorf("...: %w", err)`; never ignore errors without reason. + +**Logging:** English messages via `log` / `log.Printf`. + +**Git operations:** only **go-git** in `internal/repositories/sync.go`; auth via `plumbing/transport/http.BasicAuth` (`oauth2` + PAT for GitLab HTTPS). + +**Secrets:** never commit tokens; use `env.secrets` and `${VAR}` in YAML. + +## Boundaries + +- **Always:** extend behavior via new or updated **entities** and probe-core hooks; keep generic examples in `probe.example.yml`; update `CHANGELOG.md` for notable changes. +- **Ask first:** new `go.mod` dependencies; schema-breaking field renames; changing control plane ingest shape. +- **Never:** `exec.Command("git", ...)`; customer-specific paths in versioned files; WebSocket control plane transport; commit `state/`, `data/`, or `probe.yml`. + +## References + +- Human-oriented: [README.md](README.md) +- Workspace rule (monorepo): `probes-no-git-cli-docker.mdc` — no git CLI in probe images +- Sibling reference: `fluid-pub/probe-confluence` (same probe-core + HTTP pattern) diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..d2cfc40 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,26 @@ +# Changelog — probe-gitlab + +All notable changes to **fluid-pub/probe-gitlab** are documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +Tag naming: `0.y.z` (no `v` prefix). Align `cmd/version.go` with the tag before release. + +## [Unreleased] + +### Changed + +- Repository sync uses **go-git** instead of the `git` CLI (no `git` binary required in the distroless image). + +## [0.1.0] - 2026-05-23 + +### Added + +- **probe-core** integration: entity-based lifecycle (`users`, `groups`, `projects`, `repositories`, `code_files`), HTTP control plane (`/probes`), `runtime_config` merge, schema in image. +- CI/CD via `fluid-pub/actions` (test, release on semver tag, GHCR image `ghcr.io/fluid-pub/probe-gitlab`). +- Git repository sync and `code_files` indexing for RAG. + +### Changed + +- Configuration uses `state` (replacing `output`) and per-entity `refresh_interval` (replacing global probe refresh). diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..08ee8bd --- /dev/null +++ b/Dockerfile @@ -0,0 +1,32 @@ +# syntax=docker/dockerfile:1 +# Fluid GitLab probe — see code/actions/templates/Dockerfile.go-workload + +FROM golang:1.26-bookworm AS build + +ARG BINARY_NAME=gitlab-probe +ARG VERSION=0.0.0 +ARG TARGETOS=linux +ARG TARGETARCH=amd64 + +WORKDIR /src + +COPY go.mod go.sum ./ +COPY core ./core +COPY cmd ./cmd +COPY internal ./internal + +RUN go mod download + +RUN CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} \ + go build -ldflags "-s -w -X main.Version=${VERSION}" \ + -o /out/workload ./cmd + +FROM gcr.io/distroless/static-debian12:nonroot + +COPY --from=build /out/workload /usr/local/bin/workload +COPY config/schema.yml /etc/fluid/config/schema.yml + +USER nonroot:nonroot + +ENTRYPOINT ["/usr/local/bin/workload"] +CMD ["-config", "/etc/fluid/config/config.yaml"] diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a7945fe --- /dev/null +++ b/Makefile @@ -0,0 +1,123 @@ +# Makefile for Fluid GitLab Probe + +# Variables +BINARY_NAME=gitlab-probe +BUILD_DIR=build +CONFIG_DIR=config +STATE_DIR=state + +GO=go +GOOS?=$(shell go env GOOS) +GOARCH?=$(shell go env GOARCH) + +VERSION?=0.1.0 +BUILD_TIME=$(shell date -u '+%Y-%m-%d_%H:%M:%S') +GIT_COMMIT=$(shell git rev-parse --short HEAD 2>/dev/null || echo "unknown") + +LDFLAGS=-ldflags "-X main.Version=${VERSION} -X main.BuildTime=${BUILD_TIME} -X main.GitCommit=${GIT_COMMIT}" + +.PHONY: all build clean run test deps help + +all: clean build + +deps: + @echo "Installing dependencies..." + $(GO) mod download + $(GO) mod tidy + +build: deps + @echo "Building GitLab probe..." + @mkdir -p $(BUILD_DIR) + $(GO) build $(LDFLAGS) -o $(BUILD_DIR)/$(BINARY_NAME) cmd/main.go + @echo "Probe built in $(BUILD_DIR)/$(BINARY_NAME)" + +build-all: deps + @echo "Building for all platforms..." + @mkdir -p $(BUILD_DIR) + + GOOS=linux GOARCH=amd64 $(GO) build $(LDFLAGS) -o $(BUILD_DIR)/$(BINARY_NAME)-linux-amd64 cmd/main.go + + GOOS=darwin GOARCH=amd64 $(GO) build $(LDFLAGS) -o $(BUILD_DIR)/$(BINARY_NAME)-darwin-amd64 cmd/main.go + GOOS=darwin GOARCH=arm64 $(GO) build $(LDFLAGS) -o $(BUILD_DIR)/$(BINARY_NAME)-darwin-arm64 cmd/main.go + + # Windows + GOOS=windows GOARCH=amd64 $(GO) build $(LDFLAGS) -o $(BUILD_DIR)/$(BINARY_NAME)-windows-amd64.exe cmd/main.go + + @echo "Build completed for all platforms" + +clean: + @echo "Cleaning build files..." + @rm -rf $(BUILD_DIR) + @rm -rf $(STATE_DIR) + +run: build + @echo "Starting GitLab probe..." + @cd $(BUILD_DIR) && ./$(BINARY_NAME) + +run-config: build + @echo "Starting GitLab probe with custom configuration..." + @cd $(BUILD_DIR) && ./$(BINARY_NAME) -config ../$(CONFIG_DIR)/probe.yml + +dev: deps + @echo "Starting in development mode..." + $(GO) run ./cmd + +test: deps + @echo "Running tests..." + $(GO) test -v ./... + +# Check code with golangci-lint +lint: + @echo "Checking code with golangci-lint..." + @if command -v golangci-lint >/dev/null 2>&1; then \ + golangci-lint run; \ + else \ + echo "golangci-lint not installed. Installing..."; \ + go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest; \ + golangci-lint run; \ + fi + +# Format code +fmt: + @echo "Formatting code..." + $(GO) fmt ./... + +# Check vulnerabilities +security: + @echo "Checking vulnerabilities..." + @if command -v gosec >/dev/null 2>&1; then \ + gosec ./...; \ + else \ + echo "gosec not installed. Installing..."; \ + go install github.com/securecodewarrior/gosec/v2/cmd/gosec@latest; \ + gosec ./...; \ + fi + +# Create distribution package +package: build + @echo "Creating distribution package..." + @mkdir -p $(BUILD_DIR)/package + @cp -r $(CONFIG_DIR) $(BUILD_DIR)/package/ + @cp $(BUILD_DIR)/$(BINARY_NAME) $(BUILD_DIR)/package/ + @cp README.md $(BUILD_DIR)/package/ + @cd $(BUILD_DIR) && tar -czf $(BINARY_NAME)-$(VERSION).tar.gz package/ + @echo "Package created: $(BUILD_DIR)/$(BINARY_NAME)-$(VERSION).tar.gz" + +# Show help +help: + @echo "Available commands:" + @echo " deps - Install dependencies" + @echo " build - Build the probe" + @echo " build-all - Build for all platforms" + @echo " clean - Clean build files" + @echo " run - Run the probe" + @echo " run-config - Run with custom configuration" + @echo " dev - Development mode" + @echo " test - Run tests" + @echo " test-structure- Test file structure and rotation" + @echo " test-coverage - Tests with coverage" + @echo " lint - Check code" + @echo " fmt - Format code" + @echo " security - Check vulnerabilities" + @echo " package - Create distribution package" + @echo " help - Show this help" diff --git a/README.md b/README.md index 22f1c99..206bea2 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,48 @@ -# probe-gitlab -Fluid GitLab probe +# fluid-pub/probe-gitlab + +Fluid probe for GitLab: collects **users**, **groups**, **projects**, cloned **repositories**, and **code_files** for RAG; pushes entities to the control plane over HTTP (`/probes`). + +## Repository layout + +| Path | Role | +|------|------| +| `core/` | Git submodule → [`fluid-pub/probe-core`](https://github.com/fluid-pub/probe-core) | +| `cmd/` | Entrypoint and `cmd/version.go` (semver for releases) | +| `internal/` | GitLab API client, entities, repository sync | +| `config/probe.example.yml` | Configuration template | +| `config/schema.yml` | Entity schema (shipped in the Docker image; pushed to the control plane on connect) | +| `.github/workflows/` | CI and release via [`fluid-pub/actions`](https://github.com/fluid-pub/actions) | + +## Local development + +One-time per clone, enable the same **`gofmt`** check as CI: + +```bash +./scripts/install-git-hooks.sh +``` + +```bash +git submodule update --init --recursive +cp config/probe.example.yml config/probe.yml +cp env.secrets.example env.secrets +# Set GITLAB_TOKEN and control plane values in env.secrets (never commit that file). +source env.secrets +make dev +``` + +Credentials and GitLab URLs come from **your** `env.secrets` and local `config/probe.yml` only. Runtime snapshots and git clones use `state/` and `data/repositories/` (gitignored). + +## Changelog + +Release notes: [CHANGELOG.md](CHANGELOG.md). + +## Releases + +Push a semver tag **without** `v` (e.g. `0.1.0`) matching `var Version` in `cmd/version.go`. The release workflow publishes: + +- `ghcr.io/fluid-pub/probe-gitlab:` +- GitHub Release asset `gitlab-probe-linux-amd64` and `SHA256SUMS.txt` + +## Control plane + +Enroll as a probe with `agent_type: gitlab`. Operational tuning (`data.entities`, `fields.*.rag`, `data.repositories.repos`, intervals) belongs in **runtime_config** on the probe record. The schema contract is **`config/schema.yml`** (image semver); Kubernetes/GitOps should mount only bootstrap YAML (see **`fluid-workload`** `config.schemaInImage`). diff --git a/cmd/main.go b/cmd/main.go new file mode 100644 index 0000000..ea6a56c --- /dev/null +++ b/cmd/main.go @@ -0,0 +1,49 @@ +package main + +import ( + "flag" + "log" + "os" + "os/signal" + "syscall" + + "fluid/probes/core" + "fluid/probes/core/state" + "fluid/probes/gitlab/internal/config" + "fluid/probes/gitlab/internal/probe" +) + +func main() { + configPath := flag.String("config", "config/probe.yml", "path to probe YAML config") + flag.Parse() + + cfg, err := config.LoadConfig(*configPath) + if err != nil { + log.Fatalf("load config: %v", err) + } + + var probeConfig state.ConfigProvider = cfg + if cfg.GetControlplane() != nil { + merged := core.NewMergedConfigProvider(cfg) + merged.SetRAGFieldAllowlist(cfg.RAGFieldAllowlist) + probeConfig = merged + } + + gitlabProbe, err := probe.NewProbe(probeConfig) + if err != nil { + log.Fatalf("init probe: %v", err) + } + + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) + + if err := gitlabProbe.Start(); err != nil { + log.Fatalf("start probe: %v", err) + } + + log.Println("GitLab probe running; Ctrl+C to stop") + + <-sigChan + gitlabProbe.Stop() + log.Println("GitLab probe stopped") +} diff --git a/cmd/version.go b/cmd/version.go new file mode 100644 index 0000000..3ba27b7 --- /dev/null +++ b/cmd/version.go @@ -0,0 +1,3 @@ +package main + +var Version = "0.1.0" diff --git a/config/probe.example.yml b/config/probe.example.yml new file mode 100644 index 0000000..6751bf3 --- /dev/null +++ b/config/probe.example.yml @@ -0,0 +1,47 @@ +# GitLab probe — copy to config/probe.yml and set env vars (see env.secrets.example). +probe: + name: "gitlab" + version: "0.1.0" + +gitlab: + url: "https://gitlab.example.com" + token: "${GITLAB_TOKEN}" + api_version: "v4" + installation_type: "saas" + group_id: "your-group-path" + +data: + include_subgroups: true + entities: + - name: users + refresh_interval: "15m" + - name: groups + refresh_interval: "15m" + - name: projects + refresh_interval: "15m" + - name: repositories + refresh_interval: "1h" + - name: code_files + refresh_interval: "1h" + fields: + content: + rag: true + repositories: + base_dir: "data/repositories" + source_view_base_url: "https://gitlab.example.com/{path}/-/blob/{ref}" + repos: + - url: "https://gitlab.example.com/group/project" + branch: "main" + index_files: true + +state: + dir: "state" + format: "yaml" + cleanup_interval: 60 + +controlplane: + base_url: "${FLUID_CONTROLPLANE_HTTP_BASE}" + api_version: "v1" + parameters: + organization_uuid: "${FLUID_CONTROLPLANE_ORGANIZATION_UUID}" + token: "${FLUID_CONTROLPLANE_TOKEN}" diff --git a/config/schema.yml b/config/schema.yml new file mode 100644 index 0000000..f146438 --- /dev/null +++ b/config/schema.yml @@ -0,0 +1,181 @@ +entities: + users: + description: "GitLab users" + fields: + id: + type: "integer" + description: "Unique user ID" + primary: true + username: + type: "string" + description: "Unique username" + email: + type: "string" + format: "email" + description: "User email address" + name: + type: "string" + max_length: 255 + description: "Full name" + state: + type: "string" + enum: ["active", "blocked"] + default: "active" + description: "Account state" + bio: + type: "string" + description: "User biography" + external: + type: "boolean" + default: false + description: "Indicates if user is external" + created_at: + type: "string" + format: "datetime" + description: "Account creation date" + last_sign_in_at: + type: "string" + format: "datetime" + description: "Last sign in date" + confirmed_at: + type: "string" + format: "datetime" + description: "Account confirmation date" + groups_names: + type: "array" + items: + type: "string" + description: "Names of groups the user belongs to" + computed: true + + groups: + description: "GitLab groups" + fields: + id: + type: "integer" + description: "Unique group ID" + primary: true + name: + type: "string" + description: "Group name" + path: + type: "string" + description: "Group path" + description: + type: "string" + description: "Group description" + visibility: + type: "string" + enum: ["private", "internal", "public"] + description: "Group visibility" + created_at: + type: "string" + format: "datetime" + description: "Group creation date" + parent_id: + type: "integer" + nullable: true + description: "Parent group ID" + + projects: + description: "GitLab projects" + fields: + id: + type: "integer" + description: "Unique project ID" + primary: true + name: + type: "string" + description: "Project name" + path: + type: "string" + description: "Project path" + description: + type: "string" + description: "Project description" + visibility: + type: "string" + enum: ["private", "internal", "public"] + description: "Project visibility" + created_at: + type: "string" + format: "datetime" + description: "Project creation date" + namespace: + type: "object" + description: "Project namespace (group or user)" + properties: + id: + type: "integer" + description: "Namespace ID" + name: + type: "string" + description: "Namespace name" + path: + type: "string" + description: "Namespace path" + kind: + type: "string" + description: "Namespace type (group or user)" + full_path: + type: "string" + description: "Full namespace path" + parent_id: + type: "integer" + nullable: true + description: "Parent namespace ID" + default_branch: + type: "string" + description: "Default project branch" + + repositories: + description: "Cloned Git repositories (synced locally)" + fields: + url: + type: "string" + description: "Repository URL (unique)" + primary: true + branch: + type: "string" + description: "Tracked branch" + path: + type: "string" + description: "Local directory name" + name: + type: "string" + description: "Repository slug/name" + last_sync_at: + type: "string" + format: "datetime" + description: "Last sync timestamp" + source_view_base_url: + type: "string" + description: "Resolved GitLab blob URL prefix for view-source links (file path not included)" + rag: + type: "object" + description: "Path-based RAG rules (glob patterns, facets); configure per repo under data.repositories.repos[].rag in probe.yml" + + code_files: + description: "Text files indexed for RAG (scanned from cloned repositories)" + fields: + id: + type: "string" + description: "Stable identifier (repository hash + file path)" + primary: true + repo_url: + type: "string" + description: "Git repository URL" + file_path: + type: "string" + description: "Path relative to the repository root" + content: + type: "string" + description: "Text content (RAG when fields.content.rag is enabled on the code_files entity)" + usable_in_rag: true + title: + type: "string" + description: "Display title (usually the file name)" + source_url: + type: "string" + nullable: true + description: "Absolute GitLab blob URL for the file (set by the probe at scan time)" diff --git a/env.secrets.example b/env.secrets.example new file mode 100644 index 0000000..219dc3a --- /dev/null +++ b/env.secrets.example @@ -0,0 +1,8 @@ +# Copy to env.secrets (gitignored). Never commit env.secrets. +# Usage: source env.secrets + +export GITLAB_TOKEN="your-gitlab-api-token" + +export FLUID_CONTROLPLANE_HTTP_BASE="https://your-controlplane.example" +export FLUID_CONTROLPLANE_ORGANIZATION_UUID="your-organization-uuid" +export FLUID_CONTROLPLANE_TOKEN="your-probe-connection-token" diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..bda7a21 --- /dev/null +++ b/go.mod @@ -0,0 +1,35 @@ +module fluid/probes/gitlab + +go 1.25.0 + +require ( + fluid/probes/core v0.1.0 + github.com/go-git/go-git/v5 v5.10.0 + gopkg.in/yaml.v3 v3.0.1 +) + +require ( + dario.cat/mergo v1.0.0 // indirect + github.com/Microsoft/go-winio v0.6.2 // indirect + github.com/ProtonMail/go-crypto v1.1.6 // indirect + github.com/acomagu/bufpipe v1.0.4 // indirect + github.com/cloudflare/circl v1.6.3 // indirect + github.com/cyphar/filepath-securejoin v0.6.1 // indirect + github.com/emirpasic/gods v1.18.1 // indirect + github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect + github.com/go-git/go-billy/v5 v5.6.2 // indirect + github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect + github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect + github.com/kevinburke/ssh_config v1.2.0 // indirect + github.com/klauspost/cpuid/v2 v2.3.0 // indirect + github.com/pjbgf/sha1cd v0.6.0 // indirect + github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect + github.com/skeema/knownhosts v1.3.1 // indirect + github.com/xanzy/ssh-agent v0.3.3 // indirect + golang.org/x/crypto v0.50.0 // indirect + golang.org/x/net v0.53.0 // indirect + golang.org/x/sys v0.43.0 // indirect + gopkg.in/warnings.v0 v0.1.2 // indirect +) + +replace fluid/probes/core => ../core diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..5d3c217 --- /dev/null +++ b/go.sum @@ -0,0 +1,108 @@ +dario.cat/mergo v1.0.0 h1:AGCNq9Evsj31mOgNPcLyXc+4PNABt905YmuqPYYpBWk= +dario.cat/mergo v1.0.0/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= +github.com/Microsoft/go-winio v0.5.2/go.mod h1:WpS1mjBmmwHBEWmogvA2mj8546UReBk4v8QkMxJ6pZY= +github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= +github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= +github.com/ProtonMail/go-crypto v1.1.6 h1:ZcV+Ropw6Qn0AX9brlQLAUXfqLBc7Bl+f/DmNxpLfdw= +github.com/ProtonMail/go-crypto v1.1.6/go.mod h1:rA3QumHc/FZ8pAHreoekgiAbzpNsfQAosU5td4SnOrE= +github.com/acomagu/bufpipe v1.0.4 h1:e3H4WUzM3npvo5uv95QuJM3cQspFNtFBzvJ2oNjKIDQ= +github.com/acomagu/bufpipe v1.0.4/go.mod h1:mxdxdup/WdsKVreO5GpW4+M/1CE2sMG4jeGJ2sYmHc4= +github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8= +github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4= +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= +github.com/cloudflare/circl v1.6.3 h1:9GPOhQGF9MCYUeXyMYlqTR6a5gTrgR/fBLXvUgtVcg8= +github.com/cloudflare/circl v1.6.3/go.mod h1:2eXP6Qfat4O/Yhh8BznvKnJ+uzEoTQ6jVKJRn81BiS4= +github.com/cyphar/filepath-securejoin v0.6.1 h1:5CeZ1jPXEiYt3+Z6zqprSAgSWiggmpVyciv8syjIpVE= +github.com/cyphar/filepath-securejoin v0.6.1/go.mod h1:A8hd4EnAeyujCJRrICiOWqjS1AX0a9kM5XL+NwKoYSc= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/elazarl/goproxy v0.0.0-20230808193330-2592e75ae04a h1:mATvB/9r/3gvcejNsXKSkQ6lcIaNec2nyfOdlTBR2lU= +github.com/elazarl/goproxy v0.0.0-20230808193330-2592e75ae04a/go.mod h1:Ro8st/ElPeALwNFlcTpWmkr6IoMFfkjXAvTHpevnDsM= +github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= +github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ= +github.com/gliderlabs/ssh v0.3.5 h1:OcaySEmAQJgyYcArR+gGGTHCyE7nvhEMTlYY+Dp8CpY= +github.com/gliderlabs/ssh v0.3.5/go.mod h1:8XB4KraRrX39qHhT6yxPsHedjA08I/uBVwj4xC+/+z4= +github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 h1:+zs/tPmkDkHx3U66DAb0lQFJrpS6731Oaa12ikc+DiI= +github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376/go.mod h1:an3vInlBmSxCcxctByoQdvwPiA7DTK7jaaFDBTtu0ic= +github.com/go-git/go-billy/v5 v5.6.2 h1:6Q86EsPXMa7c3YZ3aLAQsMA0VlWmy43r6FHqa/UNbRM= +github.com/go-git/go-billy/v5 v5.6.2/go.mod h1:rcFC2rAsp/erv7CMz9GczHcuD0D32fWzH+MJAU+jaUU= +github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399 h1:eMje31YglSBqCdIqdhKBW8lokaMrL3uTkpGYlE2OOT4= +github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399/go.mod h1:1OCfN199q1Jm3HZlxleg+Dw/mwps2Wbk9frAWm+4FII= +github.com/go-git/go-git/v5 v5.10.0 h1:F0x3xXrAWmhwtzoCokU4IMPcBdncG+HAAqi9FcOOjbQ= +github.com/go-git/go-git/v5 v5.10.0/go.mod h1:1FOZ/pQnqw24ghP2n7cunVl0ON55BsjPYvhWHvZGhoo= +github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 h1:f+oWsMOmNPc8JmEHVZIycC7hBoQxHH9pNKQORJNozsQ= +github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8/go.mod h1:wcDNUvekVysuuOpQKo3191zZyTpiI6se1N1ULghS0sw= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A= +github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo= +github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4= +github.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM= +github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y= +github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/matryer/is v1.2.0 h1:92UTHpy8CDwaJ08GqLDzhhuixiBUUD1p3AU6PHddz4A= +github.com/matryer/is v1.2.0/go.mod h1:2fLPjFQM9rhQ15aVEtbuwhJinnOqrmgXPNdZsdwlWXA= +github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k= +github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY= +github.com/pjbgf/sha1cd v0.6.0 h1:3WJ8Wz8gvDz29quX1OcEmkAlUg9diU4GxJHqs0/XiwU= +github.com/pjbgf/sha1cd v0.6.0/go.mod h1:lhpGlyHLpQZoxMv8HcgXvZEhcGs0PG/vsZnEJ7H0iCM= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= +github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN3Uc8sB6B/s6Z4t2xvBgU1htSHuq8= +github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4= +github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= +github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8= +github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM= +github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw= +golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI= +golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q= +golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8= +golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= +golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.53.0 h1:d+qAbo5L0orcWAr0a9JweQpjXF19LMXJE8Ey7hwOdUA= +golang.org/x/net v0.53.0/go.mod h1:JvMuJH7rrdiCfbeHoo3fCQU24Lf5JJwT9W3sJFulfgs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI= +golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.42.0 h1:UiKe+zDFmJobeJ5ggPwOshJIVt6/Ft0rcfrXZDLWAWY= +golang.org/x/term v0.42.0/go.mod h1:Dq/D+snpsbazcBG5+F9Q1n2rXV8Ma+71xEjTRufARgY= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg= +golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/warnings.v0 v0.1.2 h1:wFXVbFY8DY5/xOe1ECiWdKCzZlxgshcYVNkBHstARME= +gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/config/config.go b/internal/config/config.go new file mode 100644 index 0000000..c3f44f2 --- /dev/null +++ b/internal/config/config.go @@ -0,0 +1,215 @@ +package config + +import ( + "fmt" + "log" + "os" + "path/filepath" + "strings" + + "fluid/probes/core" + "fluid/probes/gitlab/internal/models" + + "gopkg.in/yaml.v3" +) + +// Config is the GitLab probe configuration. +type Config struct { + Probe core.ProbeConfig `yaml:"probe"` + GitLab GitLabConfig `yaml:"gitlab"` + Data DataConfig `yaml:"data"` + State core.StateConfig `yaml:"state"` + Controlplane *core.ControlplaneConfig `yaml:"controlplane,omitempty"` + RAGFieldAllowlist core.RAGFieldSet `yaml:"-"` +} + +// GitLabConfig contains GitLab API settings. +type GitLabConfig struct { + URL string `yaml:"url"` + Token string `yaml:"token"` + APIVersion string `yaml:"api_version"` + InstallationType string `yaml:"installation_type,omitempty"` + GroupID string `yaml:"group_id,omitempty"` +} + +// DataConfig extends the core entity list with GitLab-specific collection options. +type DataConfig struct { + Entities []core.EntityConfig `yaml:"entities"` + IncludeSubgroups bool `yaml:"include_subgroups"` + Repositories *RepositoriesConfig `yaml:"repositories,omitempty"` +} + +// RepositoriesConfig configures git clone/sync for configured repos. +type RepositoriesConfig struct { + BaseDir string `yaml:"base_dir"` + SourceViewBaseURL string `yaml:"source_view_base_url"` + Repos []RepoConfig `yaml:"repos"` +} + +// RepoConfig describes a repository to clone or update. +type RepoConfig struct { + URL string `yaml:"url"` + Branch string `yaml:"branch"` + Path string `yaml:"path,omitempty"` + IndexFiles bool `yaml:"index_files"` + MaxFileBytes int `yaml:"max_file_bytes"` + MaxFilesPerRepo int `yaml:"max_files_per_repo"` + SourceViewBaseURL string `yaml:"source_view_base_url"` + Rag *models.RagFacetRules `yaml:"rag,omitempty"` +} + +// LoadConfig loads and validates configuration from a YAML file. +func LoadConfig(configPath string) (*Config, error) { + data, err := os.ReadFile(configPath) + if err != nil { + return nil, fmt.Errorf("read configuration file: %w", err) + } + + var config Config + if err := yaml.Unmarshal(data, &config); err != nil { + return nil, fmt.Errorf("parse configuration: %w", err) + } + + if err := config.resolveEnvironmentVariables(); err != nil { + return nil, fmt.Errorf("resolve environment variables: %w", err) + } + + if err := config.Validate(); err != nil { + return nil, fmt.Errorf("invalid configuration: %w", err) + } + + schemaPath := filepath.Join(filepath.Dir(configPath), "schema.yml") + schemaData, err := os.ReadFile(schemaPath) + if err != nil { + return nil, fmt.Errorf("schema.yml required (same directory as config) to validate fields.*.rag: %w", err) + } + allowed, err := core.ParseRAGFieldSetFromSchemaYAML(schemaData) + if err != nil { + return nil, fmt.Errorf("schema.yml (RAG): %w", err) + } + if err := core.ValidateRAGEntityFields(config.Data.Entities, allowed); err != nil { + return nil, err + } + config.RAGFieldAllowlist = allowed + + return &config, nil +} + +// Validate checks required fields and defaults. +func (c *Config) Validate() error { + if c.Probe.Name == "" { + return fmt.Errorf("probe name is missing") + } + if c.GitLab.URL == "" { + return fmt.Errorf("gitlab url is missing") + } + if c.GitLab.Token == "" { + return fmt.Errorf("gitlab token is missing") + } + if len(c.Data.Entities) == 0 { + return fmt.Errorf("at least one entity must be configured") + } + if c.State.Dir == "" { + return fmt.Errorf("state directory is missing") + } + if c.State.CleanupInterval <= 0 { + c.State.CleanupInterval = 60 + } + + if c.HasEntity("repositories") || c.HasEntity("code_files") { + if c.Data.Repositories == nil { + return fmt.Errorf("data.repositories is required when repositories or code_files entities are enabled") + } + if c.Data.Repositories.BaseDir == "" { + c.Data.Repositories.BaseDir = "data/repositories" + } + for i := range c.Data.Repositories.Repos { + r := &c.Data.Repositories.Repos[i] + if r.URL == "" { + return fmt.Errorf("repositories.repos[%d].url is required", i) + } + if r.IndexFiles { + if r.MaxFileBytes <= 0 { + r.MaxFileBytes = 512 * 1024 + } + if r.MaxFilesPerRepo <= 0 { + r.MaxFilesPerRepo = 2000 + } + } + } + } + + return nil +} + +// HasEntity reports whether name is listed in data.entities. +func (c *Config) HasEntity(name string) bool { + for _, e := range c.Data.Entities { + if e.Name == name { + return true + } + } + return false +} + +func (c *Config) resolveEnvironmentVariables() error { + if c.GitLab.Token != "" { + if resolved, isEnvVar := resolveEnvVar(c.GitLab.Token); isEnvVar { + if resolved == "" { + return fmt.Errorf("GITLAB_TOKEN environment variable is not defined") + } + c.GitLab.Token = resolved + } + } + + if c.Controlplane != nil { + if c.Controlplane.BaseURL != "" { + if resolved, isEnvVar := resolveEnvVar(c.Controlplane.BaseURL); isEnvVar { + if resolved == "" { + log.Printf("Warning: environment variable not defined for base_url, controlplane disabled") + c.Controlplane = nil + return nil + } + c.Controlplane.BaseURL = resolved + } + } + + if c.Controlplane.Parameters == nil { + log.Printf("Warning: controlplane parameters missing, controlplane disabled") + c.Controlplane = nil + return nil + } + + if c.Controlplane.Parameters.OrganizationUUID != "" { + if resolved, isEnvVar := resolveEnvVar(c.Controlplane.Parameters.OrganizationUUID); isEnvVar && resolved != "" { + c.Controlplane.Parameters.OrganizationUUID = resolved + } + } + if c.Controlplane.Parameters.Token != "" { + if resolved, isEnvVar := resolveEnvVar(c.Controlplane.Parameters.Token); isEnvVar && resolved != "" { + c.Controlplane.Parameters.Token = resolved + } + } + if c.Controlplane.Parameters.OrganizationUUID == "" || c.Controlplane.Parameters.Token == "" { + log.Printf("Warning: controlplane parameters incomplete, controlplane disabled") + c.Controlplane = nil + } + } + + return nil +} + +func resolveEnvVar(value string) (string, bool) { + if !strings.HasPrefix(value, "${") || !strings.HasSuffix(value, "}") { + return value, false + } + envVar := strings.TrimPrefix(strings.TrimSuffix(value, "}"), "${") + return os.Getenv(envVar), true +} + +func (c *Config) GetProbeName() string { return c.Probe.Name } +func (c *Config) GetProbeVersion() string { return c.Probe.Version } +func (c *Config) GetStateDir() string { return c.State.Dir } +func (c *Config) GetCleanupInterval() int { return c.State.CleanupInterval } +func (c *Config) GetEntities() []core.EntityConfig { return c.Data.Entities } +func (c *Config) GetControlplane() *core.ControlplaneConfig { return c.Controlplane } diff --git a/internal/config/join_blob.go b/internal/config/join_blob.go new file mode 100644 index 0000000..533d115 --- /dev/null +++ b/internal/config/join_blob.go @@ -0,0 +1,28 @@ +package config + +import ( + "net/url" + "strings" +) + +// JoinBlobDisplayURL appends path segments (percent-encoded per segment) to a GitLab blob base URL. +// base must be the resolved prefix ending before the file path (e.g. …/-/blob/main). +func JoinBlobDisplayURL(base, filePath string) string { + base = strings.TrimRight(strings.TrimSpace(base), "/") + filePath = strings.TrimSpace(filePath) + if base == "" || filePath == "" { + return "" + } + parts := strings.Split(filePath, "/") + var b strings.Builder + b.WriteString(base) + for _, p := range parts { + p = strings.TrimSpace(p) + if p == "" { + continue + } + b.WriteByte('/') + b.WriteString(url.PathEscape(p)) + } + return b.String() +} diff --git a/internal/config/join_blob_test.go b/internal/config/join_blob_test.go new file mode 100644 index 0000000..06f1660 --- /dev/null +++ b/internal/config/join_blob_test.go @@ -0,0 +1,12 @@ +package config + +import "testing" + +func TestJoinBlobDisplayURL(t *testing.T) { + base := "https://gitlab.com/foo/bar/-/blob/main" + got := JoinBlobDisplayURL(base, "infra/aws/main.tf") + want := "https://gitlab.com/foo/bar/-/blob/main/infra/aws/main.tf" + if got != want { + t.Fatalf("got %q want %q", got, want) + } +} diff --git a/internal/config/source_view.go b/internal/config/source_view.go new file mode 100644 index 0000000..d236696 --- /dev/null +++ b/internal/config/source_view.go @@ -0,0 +1,39 @@ +package config + +import ( + "net/url" + "strings" +) + +// DefaultSourceViewTemplate is used when neither global nor repo override is set. +// Placeholders: {path} = GitLab project path (group/sub/project), {ref} = branch. +const DefaultSourceViewTemplate = "https://gitlab.com/{path}/-/blob/{ref}" + +// ResolveSourceViewBaseURL expands a template for "View source" links (GitLab blob URL without file path). +// repoOverride wins over globalTemplate when non-empty after trim. +func ResolveSourceViewBaseURL(globalTemplate, repoOverride, repoURL, branch string) string { + tmpl := strings.TrimSpace(repoOverride) + if tmpl == "" { + tmpl = strings.TrimSpace(globalTemplate) + } + if tmpl == "" { + tmpl = DefaultSourceViewTemplate + } + path := gitLabProjectPathFromURL(repoURL) + ref := strings.TrimSpace(branch) + if ref == "" { + ref = "main" + } + s := strings.ReplaceAll(tmpl, "{path}", path) + s = strings.ReplaceAll(s, "{ref}", ref) + return strings.TrimSpace(s) +} + +func gitLabProjectPathFromURL(repoURL string) string { + u, err := url.Parse(strings.TrimSpace(repoURL)) + if err != nil || u.Scheme == "" || u.Host == "" { + return "" + } + p := strings.TrimPrefix(u.Path, "/") + return strings.TrimSuffix(p, ".git") +} diff --git a/internal/config/source_view_test.go b/internal/config/source_view_test.go new file mode 100644 index 0000000..1a830bc --- /dev/null +++ b/internal/config/source_view_test.go @@ -0,0 +1,26 @@ +package config + +import "testing" + +func TestResolveSourceViewBaseURL(t *testing.T) { + u := "https://gitlab.com/acme-corp/platform/documentation" + got := ResolveSourceViewBaseURL("", "", u, "develop") + want := "https://gitlab.com/acme-corp/platform/documentation/-/blob/develop" + if got != want { + t.Fatalf("default template: got %q want %q", got, want) + } + + custom := "https://gitlab.com/{path}/-/tree/{ref}" + got2 := ResolveSourceViewBaseURL(custom, "", u, "main") + want2 := "https://gitlab.com/acme-corp/platform/documentation/-/tree/main" + if got2 != want2 { + t.Fatalf("custom template: got %q want %q", got2, want2) + } + + override := "https://example.com/{path}/raw/{ref}" + got3 := ResolveSourceViewBaseURL("ignored", override, u, "x") + want3 := "https://example.com/acme-corp/platform/documentation/raw/x" + if got3 != want3 { + t.Fatalf("repo override: got %q want %q", got3, want3) + } +} diff --git a/internal/gitlab/client.go b/internal/gitlab/client.go new file mode 100644 index 0000000..e5a21ed --- /dev/null +++ b/internal/gitlab/client.go @@ -0,0 +1,215 @@ +package gitlab + +import ( + "encoding/json" + "fmt" + "net/http" + "time" + + "fluid/probes/gitlab/internal/config" + "fluid/probes/gitlab/internal/models" +) + +// Client represents a GitLab client +type Client struct { + baseURL string + token string + apiVersion string + groupID string + includeSubgroups bool + httpClient *http.Client +} + +// NewClient creates a new GitLab client +func NewClient(config *config.GitLabConfig, includeSubgroups bool) *Client { + return &Client{ + baseURL: config.URL, + token: config.Token, + apiVersion: config.APIVersion, + groupID: config.GroupID, + includeSubgroups: includeSubgroups, + httpClient: &http.Client{ + Timeout: 30 * time.Second, + }, + } +} + +// GetUsers retrieves members of the specified group +func (c *Client) GetUsers() ([]models.User, error) { + url := fmt.Sprintf("%s/api/%s/groups/%s/members?per_page=100", c.baseURL, c.apiVersion, c.groupID) + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, fmt.Errorf("error creating request: %w", err) + } + + req.Header.Set("Authorization", "Bearer "+c.token) + req.Header.Set("Content-Type", "application/json") + + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("HTTP request error: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("HTTP error: %d", resp.StatusCode) + } + + var users []models.User + if err := json.NewDecoder(resp.Body).Decode(&users); err != nil { + return nil, fmt.Errorf("error decoding users response: %w", err) + } + + // Since we're getting users from a specific group, we can set their group names + // without making additional API calls + for i := range users { + users[i].GroupsNames = []string{c.groupID} + } + + return users, nil +} + +// GetGroups retrieves the specified group +func (c *Client) GetGroups() ([]models.Group, error) { + url := fmt.Sprintf("%s/api/%s/groups/%s", c.baseURL, c.apiVersion, c.groupID) + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, fmt.Errorf("error creating request: %w", err) + } + + req.Header.Set("Authorization", "Bearer "+c.token) + req.Header.Set("Content-Type", "application/json") + + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("HTTP request error: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("HTTP error: %d", resp.StatusCode) + } + + // Decode as a single group object, not an array + var group models.Group + if err := json.NewDecoder(resp.Body).Decode(&group); err != nil { + return nil, fmt.Errorf("error decoding group response: %w", err) + } + + return []models.Group{group}, nil +} + +// GetProjects retrieves projects from the specified group and optionally from all subgroups +func (c *Client) GetProjects() ([]models.Project, error) { + var allProjects []models.Project + + // Get projects from the main group + projects, err := c.getProjectsFromGroup(c.groupID) + if err != nil { + return nil, fmt.Errorf("error getting projects from main group: %w", err) + } + allProjects = append(allProjects, projects...) + + // If recursive mode is enabled, get projects from all subgroups + if c.includeSubgroups { + subgroupProjects, err := c.getProjectsRecursively(c.groupID) + if err != nil { + return nil, fmt.Errorf("error getting projects from subgroups: %w", err) + } + allProjects = append(allProjects, subgroupProjects...) + } + + return allProjects, nil +} + +// getProjectsFromGroup retrieves projects from a specific group +func (c *Client) getProjectsFromGroup(groupID string) ([]models.Project, error) { + url := fmt.Sprintf("%s/api/%s/groups/%s/projects?per_page=100", c.baseURL, c.apiVersion, groupID) + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, fmt.Errorf("error creating request: %w", err) + } + + req.Header.Set("Authorization", "Bearer "+c.token) + req.Header.Set("Content-Type", "application/json") + + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("HTTP request error: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("HTTP error: %d", resp.StatusCode) + } + + var projects []models.Project + if err := json.NewDecoder(resp.Body).Decode(&projects); err != nil { + return nil, fmt.Errorf("error decoding projects response: %w", err) + } + + return projects, nil +} + +// getProjectsRecursively retrieves projects from all subgroups recursively +func (c *Client) getProjectsRecursively(groupID string) ([]models.Project, error) { + var allProjects []models.Project + + // Get subgroups of the current group + subgroups, err := c.getSubgroups(groupID) + if err != nil { + return nil, fmt.Errorf("error getting subgroups for group %s: %w", groupID, err) + } + + // For each subgroup, get its projects and recursively get projects from its subgroups + for _, subgroup := range subgroups { + // Get projects from this subgroup using its ID + projects, err := c.getProjectsFromGroup(fmt.Sprintf("%d", subgroup.ID)) + if err != nil { + return nil, fmt.Errorf("error getting projects from subgroup %s (ID: %d): %w", subgroup.Path, subgroup.ID, err) + } + allProjects = append(allProjects, projects...) + + // Recursively get projects from subgroups of this subgroup using its ID + subProjects, err := c.getProjectsRecursively(fmt.Sprintf("%d", subgroup.ID)) + if err != nil { + return nil, fmt.Errorf("error getting projects from subgroups of %s (ID: %d): %w", subgroup.Path, subgroup.ID, err) + } + allProjects = append(allProjects, subProjects...) + } + + return allProjects, nil +} + +// getSubgroups retrieves subgroups of a specific group +func (c *Client) getSubgroups(groupID string) ([]models.Group, error) { + url := fmt.Sprintf("%s/api/%s/groups/%s/subgroups?per_page=100", c.baseURL, c.apiVersion, groupID) + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, fmt.Errorf("error creating request: %w", err) + } + + req.Header.Set("Authorization", "Bearer "+c.token) + req.Header.Set("Content-Type", "application/json") + + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("HTTP request error: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("HTTP error: %d", resp.StatusCode) + } + + var subgroups []models.Group + if err := json.NewDecoder(resp.Body).Decode(&subgroups); err != nil { + return nil, fmt.Errorf("error decoding subgroups response: %w", err) + } + + return subgroups, nil +} diff --git a/internal/manager/manager.go b/internal/manager/manager.go new file mode 100644 index 0000000..2fe9093 --- /dev/null +++ b/internal/manager/manager.go @@ -0,0 +1,36 @@ +package manager + +import ( + "fluid/probes/core/controlplane" + "fluid/probes/core/state" +) + +// Manager delegates state persistence and control plane push to probe-core. +type Manager struct { + coreManager *state.Manager +} + +// NewManager accepts state.ConfigProvider (*config.Config or *core.MergedConfigProvider). +func NewManager(cfg state.ConfigProvider) (*Manager, error) { + coreManager, err := state.NewManager(cfg) + if err != nil { + return nil, err + } + return &Manager{coreManager: coreManager}, nil +} + +func (m *Manager) Stop() { + m.coreManager.Stop() +} + +func (m *Manager) SaveEntity(entityName string, data interface{}) error { + return m.coreManager.SaveEntity(entityName, data) +} + +func (m *Manager) GetPushManager() *controlplane.PushManager { + return m.coreManager.GetPushManager() +} + +func (m *Manager) SetConfigCallbacks(getVersion controlplane.ConfigVersionFunc, onChanged controlplane.ConfigChangedCallback) { + m.coreManager.SetConfigCallbacks(getVersion, onChanged) +} diff --git a/internal/models/code_file.go b/internal/models/code_file.go new file mode 100644 index 0000000..f108297 --- /dev/null +++ b/internal/models/code_file.go @@ -0,0 +1,11 @@ +package models + +// CodeFile is a text file under a cloned repository, pushed for RAG indexing on the control plane. +type CodeFile struct { + ID string `json:"id"` + RepoURL string `json:"repo_url"` + FilePath string `json:"file_path"` + Content string `json:"content"` + Title string `json:"title,omitempty"` + SourceURL string `json:"source_url,omitempty"` +} diff --git a/internal/models/gitlab.go b/internal/models/gitlab.go new file mode 100644 index 0000000..b3c7480 --- /dev/null +++ b/internal/models/gitlab.go @@ -0,0 +1,95 @@ +package models + +import "time" + +// User represents a GitLab user +type User struct { + ID int `json:"id"` + Username string `json:"username"` + Email string `json:"email"` + Name string `json:"name"` + State string `json:"state"` + Bio string `json:"bio"` + External bool `json:"external"` + CreatedAt time.Time `json:"created_at"` + LastSignInAt *time.Time `json:"last_sign_in_at"` + ConfirmedAt *time.Time `json:"confirmed_at"` + GroupsNames []string `json:"groups_names,omitempty"` +} + +// Group represents a GitLab group +type Group struct { + ID int `json:"id"` + Name string `json:"name"` + Path string `json:"path"` + Description string `json:"description"` + Visibility string `json:"visibility"` + CreatedAt time.Time `json:"created_at"` + ParentID *int `json:"parent_id"` +} + +// Namespace represents a GitLab namespace (group or user) +type Namespace struct { + ID int `json:"id"` + Name string `json:"name"` + Path string `json:"path"` + Kind string `json:"kind"` + FullPath string `json:"full_path"` + ParentID *int `json:"parent_id"` +} + +// Project represents a GitLab project +type Project struct { + ID int `json:"id"` + Name string `json:"name"` + Path string `json:"path"` + Description string `json:"description"` + Visibility string `json:"visibility"` + CreatedAt time.Time `json:"created_at"` + Namespace Namespace `json:"namespace"` + DefaultBranch string `json:"default_branch"` +} + +// State represents the complete GitLab state +type State struct { + Probe string `yaml:"probe"` + Timestamp time.Time `yaml:"timestamp"` + Version string `yaml:"version"` + Data StateData `yaml:"data"` +} + +// StateData contains the state data +type StateData struct { + Entities StateEntities `yaml:"entities"` +} + +// StateEntities contains the state entities +type StateEntities struct { + Users []User `yaml:"users,omitempty"` + Groups []Group `yaml:"groups,omitempty"` + Projects []Project `yaml:"projects,omitempty"` + Repositories []Repository `yaml:"repositories,omitempty"` + CodeFiles []CodeFile `yaml:"code_files,omitempty"` +} + +// Repository represents a cloned repository (entity pushed to control plane) +type Repository struct { + URL string `json:"url"` + Branch string `json:"branch"` + Path string `json:"path"` // local directory name + Name string `json:"name"` // slug (path or derived from URL) + LastSyncAt time.Time `json:"last_sync_at"` + SourceViewBaseURL string `json:"source_view_base_url,omitempty"` // resolved blob URL prefix (no trailing slash before file path) + Rag *RagFacetRules `json:"rag,omitempty"` +} + +// RagFacetRules configures path-based facet tags for RAG (merged with file path on the control plane). +type RagFacetRules struct { + Rules []RagRule `yaml:"rules" json:"rules"` +} + +// RagRule is a single glob match and facet key/values to merge. +type RagRule struct { + Match string `yaml:"match" json:"match"` + Facets map[string]string `yaml:"facets" json:"facets"` +} diff --git a/internal/probe/entities/code_files_entity.go b/internal/probe/entities/code_files_entity.go new file mode 100644 index 0000000..d06c3bb --- /dev/null +++ b/internal/probe/entities/code_files_entity.go @@ -0,0 +1,88 @@ +package entities + +import ( + "fmt" + "log" + "path/filepath" + + "fluid/probes/core" + "fluid/probes/core/state" + "fluid/probes/gitlab/internal/config" + "fluid/probes/gitlab/internal/models" + "fluid/probes/gitlab/internal/repositories" +) + +type CodeFilesEntity struct { + cfg state.ConfigProvider +} + +func NewCodeFilesEntity(cfg state.ConfigProvider) *CodeFilesEntity { + return &CodeFilesEntity{cfg: cfg} +} + +func (e *CodeFilesEntity) Name() string { return "code_files" } + +func (e *CodeFilesEntity) Refresh(client core.Client) (interface{}, error) { + _ = client + cfg := getGitLabConfig(e.cfg) + if cfg == nil || cfg.Data.Repositories == nil { + return []models.CodeFile{}, nil + } + reposCfg := cfg.Data.Repositories + if len(reposCfg.Repos) == 0 { + return []models.CodeFile{}, nil + } + + baseDir, err := filepath.Abs(reposCfg.BaseDir) + if err != nil { + return nil, fmt.Errorf("repositories base_dir: %w", err) + } + + if err := repositories.Sync(baseDir, reposCfg.Repos, cfg.GitLab.URL, cfg.GitLab.Token); err != nil { + return nil, fmt.Errorf("sync before code_files scan: %w", err) + } + + all := make([]models.CodeFile, 0) + ragEnabled := codeFilesRAGEnabled(e.cfg) + + for _, r := range reposCfg.Repos { + if !r.IndexFiles { + continue + } + blobBase := config.ResolveSourceViewBaseURL( + reposCfg.SourceViewBaseURL, + r.SourceViewBaseURL, + r.URL, + r.Branch, + ) + files, err := repositories.ScanCodeFiles(baseDir, r, blobBase, r.MaxFileBytes, r.MaxFilesPerRepo) + if err != nil { + log.Printf("code_files: scan %s: %v", r.URL, err) + continue + } + if !ragEnabled { + for i := range files { + files[i].Content = "" + } + } + all = append(all, files...) + } + + log.Printf("Indexed %d code_files", len(all)) + return all, nil +} + +func (e *CodeFilesEntity) Save(stateManager core.StateManager, data interface{}) error { + files, ok := data.([]models.CodeFile) + if !ok { + return fmt.Errorf("invalid data type for code_files entity") + } + if len(files) == 0 { + return nil + } + if err := stateManager.SaveEntity(e.Name(), files); err != nil { + return fmt.Errorf("save code_files state: %w", err) + } + log.Printf("Code files state saved (%d files)", len(files)) + return nil +} diff --git a/internal/probe/entities/groups_entity.go b/internal/probe/entities/groups_entity.go new file mode 100644 index 0000000..b68ca41 --- /dev/null +++ b/internal/probe/entities/groups_entity.go @@ -0,0 +1,47 @@ +package entities + +import ( + "fmt" + "log" + + "fluid/probes/core" + "fluid/probes/core/state" + "fluid/probes/gitlab/internal/gitlab" + "fluid/probes/gitlab/internal/models" +) + +type GroupsEntity struct { + cfg state.ConfigProvider +} + +func NewGroupsEntity(cfg state.ConfigProvider) *GroupsEntity { + return &GroupsEntity{cfg: cfg} +} + +func (e *GroupsEntity) Name() string { return "groups" } + +func (e *GroupsEntity) Refresh(client core.Client) (interface{}, error) { + gitlabClient, ok := client.(*gitlab.Client) + if !ok { + return nil, fmt.Errorf("invalid client type for groups entity, expected *gitlab.Client") + } + log.Printf("Fetching GitLab groups...") + groups, err := gitlabClient.GetGroups() + if err != nil { + return nil, fmt.Errorf("fetch groups: %w", err) + } + log.Printf("Fetched %d groups", len(groups)) + return groups, nil +} + +func (e *GroupsEntity) Save(stateManager core.StateManager, data interface{}) error { + groups, ok := data.([]models.Group) + if !ok { + return fmt.Errorf("invalid data type for groups entity") + } + if err := stateManager.SaveEntity(e.Name(), groups); err != nil { + return fmt.Errorf("save groups state: %w", err) + } + log.Printf("Groups state saved") + return nil +} diff --git a/internal/probe/entities/helpers.go b/internal/probe/entities/helpers.go new file mode 100644 index 0000000..a895698 --- /dev/null +++ b/internal/probe/entities/helpers.go @@ -0,0 +1,78 @@ +package entities + +import ( + "strings" + "time" + + "fluid/probes/core" + "fluid/probes/core/state" + "fluid/probes/gitlab/internal/config" + "fluid/probes/gitlab/internal/models" +) + +func getGitLabConfig(cfg state.ConfigProvider) *config.Config { + if c, ok := cfg.(*config.Config); ok { + return c + } + if m, ok := cfg.(*core.MergedConfigProvider); ok { + if c, ok := m.Local().(*config.Config); ok { + return c + } + } + return nil +} + +func codeFilesRAGEnabled(cfg state.ConfigProvider) bool { + if cfg == nil { + return false + } + for _, ec := range cfg.GetEntities() { + if ec.Name == "code_files" && ec.FieldRAG("content") { + return true + } + } + return false +} + +func buildRepositoriesState(reposCfg *config.RepositoriesConfig) []models.Repository { + if reposCfg == nil { + return nil + } + now := time.Now() + out := make([]models.Repository, 0, len(reposCfg.Repos)) + for _, r := range reposCfg.Repos { + name := r.Path + if name == "" { + name = repoNameFromURL(r.URL) + } + repo := models.Repository{ + URL: r.URL, + Branch: r.Branch, + Path: name, + Name: name, + LastSyncAt: now, + } + if r.Rag != nil && len(r.Rag.Rules) > 0 { + repo.Rag = r.Rag + } + base := config.ResolveSourceViewBaseURL( + reposCfg.SourceViewBaseURL, + r.SourceViewBaseURL, + r.URL, + r.Branch, + ) + if base != "" { + repo.SourceViewBaseURL = base + } + out = append(out, repo) + } + return out +} + +func repoNameFromURL(repoURL string) string { + s := strings.TrimSuffix(repoURL, ".git") + if idx := strings.LastIndex(s, "/"); idx >= 0 { + return s[idx+1:] + } + return s +} diff --git a/internal/probe/entities/projects_entity.go b/internal/probe/entities/projects_entity.go new file mode 100644 index 0000000..608d44d --- /dev/null +++ b/internal/probe/entities/projects_entity.go @@ -0,0 +1,47 @@ +package entities + +import ( + "fmt" + "log" + + "fluid/probes/core" + "fluid/probes/core/state" + "fluid/probes/gitlab/internal/gitlab" + "fluid/probes/gitlab/internal/models" +) + +type ProjectsEntity struct { + cfg state.ConfigProvider +} + +func NewProjectsEntity(cfg state.ConfigProvider) *ProjectsEntity { + return &ProjectsEntity{cfg: cfg} +} + +func (e *ProjectsEntity) Name() string { return "projects" } + +func (e *ProjectsEntity) Refresh(client core.Client) (interface{}, error) { + gitlabClient, ok := client.(*gitlab.Client) + if !ok { + return nil, fmt.Errorf("invalid client type for projects entity, expected *gitlab.Client") + } + log.Printf("Fetching GitLab projects...") + projects, err := gitlabClient.GetProjects() + if err != nil { + return nil, fmt.Errorf("fetch projects: %w", err) + } + log.Printf("Fetched %d projects", len(projects)) + return projects, nil +} + +func (e *ProjectsEntity) Save(stateManager core.StateManager, data interface{}) error { + projects, ok := data.([]models.Project) + if !ok { + return fmt.Errorf("invalid data type for projects entity") + } + if err := stateManager.SaveEntity(e.Name(), projects); err != nil { + return fmt.Errorf("save projects state: %w", err) + } + log.Printf("Projects state saved") + return nil +} diff --git a/internal/probe/entities/repositories_entity.go b/internal/probe/entities/repositories_entity.go new file mode 100644 index 0000000..1ca5aac --- /dev/null +++ b/internal/probe/entities/repositories_entity.go @@ -0,0 +1,60 @@ +package entities + +import ( + "fmt" + "log" + "path/filepath" + + "fluid/probes/core" + "fluid/probes/core/state" + "fluid/probes/gitlab/internal/models" + "fluid/probes/gitlab/internal/repositories" +) + +type RepositoriesEntity struct { + cfg state.ConfigProvider +} + +func NewRepositoriesEntity(cfg state.ConfigProvider) *RepositoriesEntity { + return &RepositoriesEntity{cfg: cfg} +} + +func (e *RepositoriesEntity) Name() string { return "repositories" } + +func (e *RepositoriesEntity) Refresh(client core.Client) (interface{}, error) { + _ = client + cfg := getGitLabConfig(e.cfg) + if cfg == nil || cfg.Data.Repositories == nil { + return []models.Repository{}, nil + } + reposCfg := cfg.Data.Repositories + if len(reposCfg.Repos) == 0 { + return []models.Repository{}, nil + } + + baseDir, err := filepath.Abs(reposCfg.BaseDir) + if err != nil { + return nil, fmt.Errorf("repositories base_dir: %w", err) + } + + log.Printf("Syncing %d Git repositories under %s", len(reposCfg.Repos), baseDir) + if err := repositories.Sync(baseDir, reposCfg.Repos, cfg.GitLab.URL, cfg.GitLab.Token); err != nil { + return nil, fmt.Errorf("sync repositories: %w", err) + } + + repos := buildRepositoriesState(reposCfg) + log.Printf("Built repositories entity (%d entries)", len(repos)) + return repos, nil +} + +func (e *RepositoriesEntity) Save(stateManager core.StateManager, data interface{}) error { + repos, ok := data.([]models.Repository) + if !ok { + return fmt.Errorf("invalid data type for repositories entity") + } + if err := stateManager.SaveEntity(e.Name(), repos); err != nil { + return fmt.Errorf("save repositories state: %w", err) + } + log.Printf("Repositories state saved") + return nil +} diff --git a/internal/probe/entities/users_entity.go b/internal/probe/entities/users_entity.go new file mode 100644 index 0000000..57431cf --- /dev/null +++ b/internal/probe/entities/users_entity.go @@ -0,0 +1,47 @@ +package entities + +import ( + "fmt" + "log" + + "fluid/probes/core" + "fluid/probes/core/state" + "fluid/probes/gitlab/internal/gitlab" + "fluid/probes/gitlab/internal/models" +) + +type UsersEntity struct { + cfg state.ConfigProvider +} + +func NewUsersEntity(cfg state.ConfigProvider) *UsersEntity { + return &UsersEntity{cfg: cfg} +} + +func (e *UsersEntity) Name() string { return "users" } + +func (e *UsersEntity) Refresh(client core.Client) (interface{}, error) { + gitlabClient, ok := client.(*gitlab.Client) + if !ok { + return nil, fmt.Errorf("invalid client type for users entity, expected *gitlab.Client") + } + log.Printf("Fetching GitLab users...") + users, err := gitlabClient.GetUsers() + if err != nil { + return nil, fmt.Errorf("fetch users: %w", err) + } + log.Printf("Fetched %d users", len(users)) + return users, nil +} + +func (e *UsersEntity) Save(stateManager core.StateManager, data interface{}) error { + users, ok := data.([]models.User) + if !ok { + return fmt.Errorf("invalid data type for users entity") + } + if err := stateManager.SaveEntity(e.Name(), users); err != nil { + return fmt.Errorf("save users state: %w", err) + } + log.Printf("Users state saved") + return nil +} diff --git a/internal/probe/probe.go b/internal/probe/probe.go new file mode 100644 index 0000000..27c8fa6 --- /dev/null +++ b/internal/probe/probe.go @@ -0,0 +1,109 @@ +package probe + +import ( + "fmt" + "log" + + "fluid/probes/core" + "fluid/probes/core/state" + "fluid/probes/gitlab/internal/config" + "fluid/probes/gitlab/internal/gitlab" + "fluid/probes/gitlab/internal/manager" + "fluid/probes/gitlab/internal/probe/entities" +) + +type Probe struct { + *core.Probe + config *config.Config + client *gitlab.Client +} + +func getGitLabConfig(cfg state.ConfigProvider) *config.Config { + if c, ok := cfg.(*config.Config); ok { + return c + } + if m, ok := cfg.(*core.MergedConfigProvider); ok { + if c, ok := m.Local().(*config.Config); ok { + return c + } + } + return nil +} + +// NewProbe accepts state.ConfigProvider (*config.Config or *core.MergedConfigProvider). +func NewProbe(cfg state.ConfigProvider) (*Probe, error) { + gitlabCfg := getGitLabConfig(cfg) + if gitlabCfg == nil { + return nil, fmt.Errorf("NewProbe requires *config.Config or *core.MergedConfigProvider with *config.Config as local") + } + + client := gitlab.NewClient(&gitlabCfg.GitLab, gitlabCfg.Data.IncludeSubgroups) + stateManager, err := manager.NewManager(cfg) + if err != nil { + return nil, err + } + + coreProbe := core.NewProbe(cfg, client, stateManager) + a := &Probe{ + Probe: coreProbe, + config: gitlabCfg, + client: client, + } + + registerEntities(coreProbe, cfg) + + if merged, ok := cfg.(*core.MergedConfigProvider); ok && stateManager.GetPushManager() != nil { + stateManager.SetConfigCallbacks( + merged.GetConfigVersion, + func(runtimeJSON []byte, configVersion string) { + runtime, version, err := core.ParseRuntimeConfig(runtimeJSON) + if err != nil { + log.Printf("Parse runtime config on reload: %v", err) + return + } + if version != "" { + configVersion = version + } + if err := merged.SetRemote(runtime, configVersion); err != nil { + log.Printf("Set remote config on reload: %v", err) + return + } + if err := coreProbe.ReloadConfig(); err != nil { + log.Printf("ReloadConfig failed: %v", err) + } + }, + ) + } + + return a, nil +} + +func registerEntities(coreProbe *core.Probe, cfg state.ConfigProvider) { + factories := map[string]func() core.Entity{ + "users": func() core.Entity { return entities.NewUsersEntity(cfg) }, + "groups": func() core.Entity { return entities.NewGroupsEntity(cfg) }, + "projects": func() core.Entity { return entities.NewProjectsEntity(cfg) }, + "repositories": func() core.Entity { return entities.NewRepositoriesEntity(cfg) }, + "code_files": func() core.Entity { return entities.NewCodeFilesEntity(cfg) }, + } + + for _, ec := range cfg.GetEntities() { + factory, ok := factories[ec.Name] + if !ok { + log.Printf("Warning: unknown entity %q, skipping registration", ec.Name) + continue + } + coreProbe.RegisterEntity(factory()) + } +} + +func (a *Probe) Start() error { + return a.Probe.Start() +} + +func (a *Probe) GetStatus() map[string]interface{} { + status := a.Probe.GetStatus() + status["gitlab_url"] = a.config.GitLab.URL + status["include_subgroups"] = a.config.Data.IncludeSubgroups + return status +} diff --git a/internal/repositories/codefiles.go b/internal/repositories/codefiles.go new file mode 100644 index 0000000..2ad7b8d --- /dev/null +++ b/internal/repositories/codefiles.go @@ -0,0 +1,136 @@ +package repositories + +import ( + "crypto/sha256" + "encoding/hex" + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + "strings" + + "fluid/probes/gitlab/internal/config" + "fluid/probes/gitlab/internal/models" +) + +var errWalkStop = errors.New("walk: file limit reached") + +var textSuffixes = map[string]struct{}{ + ".tf": {}, ".tfvars": {}, ".md": {}, ".markdown": {}, + ".yml": {}, ".yaml": {}, ".json": {}, ".sh": {}, ".bash": {}, + ".ex": {}, ".exs": {}, ".go": {}, ".rs": {}, + ".js": {}, ".ts": {}, ".tsx": {}, ".jsx": {}, + ".py": {}, ".toml": {}, ".hcl": {}, +} + +var skipDirNames = map[string]struct{}{ + ".git": {}, "node_modules": {}, ".terraform": {}, "vendor": {}, "dist": {}, "build": {}, +} + +// ScanCodeFiles walks a cloned repo and returns text files for RAG (bounded by size/count). +// blobBaseURL is the resolved GitLab blob prefix (…/-/blob/{ref}); if empty, SourceURL is left empty. +func ScanCodeFiles(baseDir string, repo config.RepoConfig, blobBaseURL string, maxBytes, maxFiles int) ([]models.CodeFile, error) { + dirName := repo.Path + if dirName == "" { + dirName = repoNameFromURL(repo.URL) + } + root := filepath.Join(baseDir, dirName) + + info, err := os.Stat(root) + if err != nil || !info.IsDir() { + return nil, fmt.Errorf("repo directory %s: %w", root, err) + } + + repoURL := strings.TrimSpace(repo.URL) + out := make([]models.CodeFile, 0, 64) + n := 0 + + err = filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if d.IsDir() { + name := d.Name() + if _, skip := skipDirNames[name]; skip { + return filepath.SkipDir + } + return nil + } + if n >= maxFiles { + return errWalkStop + } + rel, err := filepath.Rel(root, path) + if err != nil { + return err + } + rel = filepath.ToSlash(rel) + if !wantFile(rel, maxBytes) { + return nil + } + data, err := os.ReadFile(path) + if err != nil { + return nil + } + if len(data) > maxBytes { + return nil + } + if !looksText(data) { + return nil + } + id := codeFileID(repoURL, rel) + sourceURL := "" + if blobBaseURL != "" { + sourceURL = config.JoinBlobDisplayURL(blobBaseURL, rel) + } + out = append(out, models.CodeFile{ + ID: id, + RepoURL: repoURL, + FilePath: rel, + Content: string(data), + Title: filepath.Base(rel), + SourceURL: sourceURL, + }) + n++ + return nil + }) + + if err != nil && !errors.Is(err, errWalkStop) { + return nil, err + } + return out, nil +} + +func wantFile(rel string, maxBytes int) bool { + if maxBytes <= 0 { + maxBytes = 512 * 1024 + } + ext := strings.ToLower(filepath.Ext(rel)) + if ext == "" { + return false + } + _, ok := textSuffixes[ext] + return ok +} + +func looksText(b []byte) bool { + if len(b) == 0 { + return false + } + // Reject obvious binary: NUL in first 8k + n := len(b) + if n > 8192 { + n = 8192 + } + for i := 0; i < n; i++ { + if b[i] == 0 { + return false + } + } + return true +} + +func codeFileID(repoURL, filePath string) string { + h := sha256.Sum256([]byte(repoURL + "\x00" + filePath)) + return hex.EncodeToString(h[:]) +} diff --git a/internal/repositories/sync.go b/internal/repositories/sync.go new file mode 100644 index 0000000..05f10cf --- /dev/null +++ b/internal/repositories/sync.go @@ -0,0 +1,161 @@ +package repositories + +import ( + "fmt" + "log" + "net/url" + "os" + "path/filepath" + "strings" + + "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing" + githttp "github.com/go-git/go-git/v5/plumbing/transport/http" + + "fluid/probes/gitlab/internal/config" +) + +// Sync runs clone or pull for each configured repository under baseDir. +// gitlabBaseURL and token are used to build authenticated HTTPS URLs for private repos (e.g. https://oauth2:TOKEN@host/...). +// If token is empty, URLs are used as-is (public repos or SSH). +func Sync(baseDir string, repos []config.RepoConfig, gitlabBaseURL, token string) error { + if err := os.MkdirAll(baseDir, 0o755); err != nil { + return fmt.Errorf("create base dir %s: %w", baseDir, err) + } + + auth := gitHTTPAuth(token) + + for _, r := range repos { + if err := syncOne(baseDir, r, gitlabBaseURL, token, auth); err != nil { + log.Printf("repositories: sync %s: %v", r.URL, err) + // continue with other repos + } + } + + return nil +} + +func gitHTTPAuth(token string) *githttp.BasicAuth { + if token == "" { + return nil + } + return &githttp.BasicAuth{Username: "oauth2", Password: token} +} + +func syncOne(baseDir string, r config.RepoConfig, gitlabBaseURL, token string, auth *githttp.BasicAuth) error { + cloneURL := r.URL + if token != "" && isGitLabURL(r.URL, gitlabBaseURL) { + cloneURL = injectToken(r.URL, token) + } + + dirName := r.Path + if dirName == "" { + dirName = repoNameFromURL(r.URL) + } + dest := filepath.Join(baseDir, dirName) + + if _, err := os.Stat(filepath.Join(dest, ".git")); err == nil { + return pull(dest, r.Branch, auth) + } + return clone(cloneURL, dest, r.Branch, auth) +} + +func isGitLabURL(repoURL, gitlabBaseURL string) bool { + base := strings.TrimSuffix(gitlabBaseURL, "/") + return strings.HasPrefix(strings.TrimSuffix(repoURL, "/"), base) || + strings.HasPrefix(strings.TrimSuffix(repoURL, ".git"), base) +} + +func injectToken(repoURL, token string) string { + u, err := url.Parse(repoURL) + if err != nil { + return repoURL + } + u.User = url.UserPassword("oauth2", token) + return u.String() +} + +func redactURLForLog(raw string) string { + u, err := url.Parse(raw) + if err != nil { + return "[url]" + } + u.User = nil + out := u.String() + if out == "" { + return "[url]" + } + return out +} + +func repoNameFromURL(repoURL string) string { + s := strings.TrimSuffix(repoURL, ".git") + if idx := strings.LastIndex(s, "/"); idx >= 0 { + return s[idx+1:] + } + return s +} + +func clone(cloneURL, dest, branch string, auth *githttp.BasicAuth) error { + opts := &git.CloneOptions{ + URL: cloneURL, + Auth: auth, + SingleBranch: true, + Depth: 1, + } + if branch != "" { + opts.ReferenceName = plumbing.NewBranchReferenceName(branch) + } + + if _, err := git.PlainClone(dest, false, opts); err != nil { + return fmt.Errorf("clone repository: %w", err) + } + log.Printf("repositories: cloned %s -> %s", redactURLForLog(cloneURL), dest) + return nil +} + +func pull(dest, branch string, auth *githttp.BasicAuth) error { + repo, err := git.PlainOpen(dest) + if err != nil { + return fmt.Errorf("open repository: %w", err) + } + + remote, err := repo.Remote("origin") + if err != nil { + return fmt.Errorf("remote origin: %w", err) + } + if err := remote.Fetch(&git.FetchOptions{Auth: auth}); err != nil { + return fmt.Errorf("fetch origin: %w", err) + } + + hash, err := resolvePullCommitHash(repo, branch) + if err != nil { + return err + } + + worktree, err := repo.Worktree() + if err != nil { + return fmt.Errorf("worktree: %w", err) + } + if err := worktree.Reset(&git.ResetOptions{Mode: git.HardReset, Commit: hash}); err != nil { + return fmt.Errorf("reset worktree: %w", err) + } + + log.Printf("repositories: updated %s", dest) + return nil +} + +func resolvePullCommitHash(repo *git.Repository, branch string) (plumbing.Hash, error) { + if branch != "" { + ref, err := repo.Reference(plumbing.NewRemoteReferenceName("origin", branch), true) + if err != nil { + return plumbing.ZeroHash, fmt.Errorf("resolve origin/%s: %w", branch, err) + } + return ref.Hash(), nil + } + head, err := repo.Head() + if err != nil { + return plumbing.ZeroHash, fmt.Errorf("resolve HEAD: %w", err) + } + return head.Hash(), nil +} diff --git a/scripts/install-git-hooks.sh b/scripts/install-git-hooks.sh new file mode 100755 index 0000000..ecfa954 --- /dev/null +++ b/scripts/install-git-hooks.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env sh +# Point Git at versioned hooks under .githooks (per-clone config, not committed). +set -euo pipefail + +cd "$(dirname "$0")/.." || exit 1 + +git config core.hooksPath .githooks +chmod +x .githooks/pre-commit 2>/dev/null || true +echo "Configured this clone: core.hooksPath=.githooks (pre-commit runs gofmt -l ., same as CI)."