diff --git a/.semaphore/semaphore.yml b/.semaphore/semaphore.yml index 5cade0f21..a96132f93 100644 --- a/.semaphore/semaphore.yml +++ b/.semaphore/semaphore.yml @@ -2634,7 +2634,7 @@ blocks: - name: "MCP Server: Provision Prod Image" dependencies: [] run: - when: "change_in('/mcp_server', {pipeline_file: 'ignore', default_branch: 'main'})" + when: "change_in('/mcp_server', {pipeline_file: 'ignore', default_branch: 'main'}) or change_in('/docs/docs', {default_branch: 'main'}) or change_in('/docs/versioned_docs', {default_branch: 'main'})" task: env_vars: - name: DOCKER_BUILDKIT @@ -2653,7 +2653,7 @@ blocks: - name: "MCP Server: Deployment Preconditions" dependencies: ["MCP Server: Provision Prod Image"] run: - when: "change_in('/mcp_server', {pipeline_file: 'ignore', default_branch: 'main'})" + when: "change_in('/mcp_server', {pipeline_file: 'ignore', default_branch: 'main'}) or change_in('/docs/docs', {default_branch: 'main'}) or change_in('/docs/versioned_docs', {default_branch: 'main'})" task: env_vars: - name: DOCKER_BUILDKIT @@ -2669,7 +2669,7 @@ blocks: jobs: - name: "Check code" commands: - - make check.go.code + - make check.go.code CHECK_CODE_OPTS='--config-options "-exclude-generated"' - name: "Check dependencies" commands: - make check.go.deps @@ -2680,7 +2680,7 @@ blocks: - name: "MCP Server: QA" dependencies: ["MCP Server: Provision Prod Image"] run: - when: "change_in('/mcp_server', {pipeline_file: 'ignore', default_branch: 'main'})" + when: "change_in('/mcp_server', {pipeline_file: 'ignore', default_branch: 'main'}) or change_in('/docs/docs', {default_branch: 'main'}) or change_in('/docs/versioned_docs', {default_branch: 'main'})" task: env_vars: - name: DOCKER_BUILDKIT diff --git a/mcp_server/.gitignore b/mcp_server/.gitignore new file mode 100644 index 000000000..3852a654e --- /dev/null +++ b/mcp_server/.gitignore @@ -0,0 +1,6 @@ +# Binaries (in root directory only) +/mcp_server +/indexer + +# Generated search index +/index/ diff --git a/mcp_server/Dockerfile b/mcp_server/Dockerfile index 03dbf1304..46ced42c8 100644 --- a/mcp_server/Dockerfile +++ b/mcp_server/Dockerfile @@ -11,14 +11,14 @@ ENV CGO_ENABLED=0 \ GOOS=linux \ GOARCH=amd64 -COPY go.mod go.sum ./ +COPY mcp_server/go.mod mcp_server/go.sum ./ RUN --mount=type=cache,target=/go/pkg/mod \ --mount=type=cache,target=/root/.cache/go-build \ go mod download -COPY cmd ./cmd -COPY pkg ./pkg -COPY test ./test +COPY mcp_server/cmd ./cmd +COPY mcp_server/pkg ./pkg +COPY mcp_server/test ./test FROM base AS dev RUN --mount=type=cache,target=/root/.cache/go-build \ @@ -27,14 +27,24 @@ CMD ["sh", "-c", "while sleep 1000; do :; done"] FROM base AS builder RUN --mount=type=cache,target=/root/.cache/go-build \ - go build -o /tmp/mcp_server ./cmd/mcp_server + go build -o /tmp/mcp_server ./cmd/mcp_server && \ + go build -o /tmp/indexer ./cmd/indexer + +# Build the documentation search index +# Docs are expected at /docs in this stage (copied from build context or external source) +FROM builder AS indexer +COPY docs /docs +RUN /tmp/indexer -docs=/docs -output=/tmp/docssearch_index FROM alpine:${ALPINE_VERSION} AS runner RUN adduser -D -H -s /sbin/nologin appuser -USER appuser WORKDIR /app COPY --from=builder /tmp/mcp_server /usr/local/bin/mcp_server +COPY --from=indexer --chown=appuser:appuser /tmp/docssearch_index /app/docssearch/index +COPY --from=indexer --chown=appuser:appuser /docs /app/docssearch/docs + +USER appuser EXPOSE 3001 diff --git a/mcp_server/Makefile b/mcp_server/Makefile index 80aac66bb..d0a574afa 100644 --- a/mcp_server/Makefile +++ b/mcp_server/Makefile @@ -3,13 +3,14 @@ include ../Makefile APP_NAME=mcp_server APP_ENV=prod CHECK_CODE_OPTS?=--config-options "-exclude-generated" +DOCKER_BUILD_PATH=.. INTERNAL_API_BRANCH?=master TMP_REPO_DIR ?= /tmp/internal_api INTERNAL_API_MODULES?=include/internal_api/status,include/internal_api/response_status,plumber_w_f.workflow,plumber.pipeline,server_farm.job,loghub,loghub2,user,repository_integrator,rbac,organization,projecthub,feature,artifacthub PROTOC_IMAGE?=golang:1.24-alpine -.PHONY: tidy test test.setup lint pb.gen dev.run +.PHONY: tidy test test.setup lint pb.gen dev.run index tidy: go mod tidy @@ -46,3 +47,10 @@ dev.run: echo "air not found, falling back to go run"; \ MCP_USE_STUBS=true go run ./cmd/mcp_server -http :3001; \ fi + +# Build the documentation search index +# Usage: make index DOCS_ROOT=../docs INDEX_PATH=./index +DOCS_ROOT ?= ../docs +INDEX_PATH ?= ./index +index: + go run ./cmd/indexer -docs=$(DOCS_ROOT) -output=$(INDEX_PATH) diff --git a/mcp_server/cmd/indexer/main.go b/mcp_server/cmd/indexer/main.go new file mode 100644 index 000000000..56b1ba0f9 --- /dev/null +++ b/mcp_server/cmd/indexer/main.go @@ -0,0 +1,58 @@ +package main + +import ( + "flag" + "fmt" + "log" + "os" + + "github.com/semaphoreio/semaphore/mcp_server/pkg/docssearch/indexer" + "github.com/semaphoreio/semaphore/mcp_server/pkg/docssearch/loader" +) + +func main() { + docsRoot := flag.String("docs", "../docs", "Path to docs directory") + outputPath := flag.String("output", "./index", "Path for output index") + flag.Parse() + + // Remove existing index if present + if _, err := os.Stat(*outputPath); err == nil { + log.Printf("Removing existing index at %s", *outputPath) + if err := os.RemoveAll(*outputPath); err != nil { + log.Fatalf("Failed to remove existing index: %v", err) + } + } + + log.Printf("Loading docs from %s", *docsRoot) + l := loader.New(*docsRoot) + docs, err := l.LoadAll() + if err != nil { + log.Fatalf("Failed to load docs: %v", err) + } + log.Printf("Loaded %d documents", len(docs)) + + // Print some stats + versionCounts := make(map[string]int) + docTypeCounts := make(map[string]int) + for _, d := range docs { + versionCounts[d.Version]++ + docTypeCounts[d.DocType]++ + } + fmt.Println("\nDocuments by version:") + for v, c := range versionCounts { + fmt.Printf(" %s: %d\n", v, c) + } + fmt.Println("\nDocuments by type:") + for t, c := range docTypeCounts { + fmt.Printf(" %s: %d\n", t, c) + } + fmt.Println() + + log.Printf("Building index at %s", *outputPath) + idx := indexer.New(*outputPath) + if err := idx.BuildIndex(docs); err != nil { + log.Fatalf("Failed to build index: %v", err) + } + + log.Printf("Index built successfully with %d documents", len(docs)) +} diff --git a/mcp_server/cmd/mcp_server/main.go b/mcp_server/cmd/mcp_server/main.go index 5b26de483..073be4aaf 100644 --- a/mcp_server/cmd/mcp_server/main.go +++ b/mcp_server/cmd/mcp_server/main.go @@ -17,10 +17,12 @@ import ( "github.com/mark3labs/mcp-go/server" "github.com/sirupsen/logrus" + "github.com/semaphoreio/semaphore/mcp_server/pkg/config" "github.com/semaphoreio/semaphore/mcp_server/pkg/internalapi" "github.com/semaphoreio/semaphore/mcp_server/pkg/logging" "github.com/semaphoreio/semaphore/mcp_server/pkg/prompts" "github.com/semaphoreio/semaphore/mcp_server/pkg/tools" + "github.com/semaphoreio/semaphore/mcp_server/pkg/tools/docs" "github.com/semaphoreio/semaphore/mcp_server/pkg/tools/jobs" "github.com/semaphoreio/semaphore/mcp_server/pkg/tools/organizations" "github.com/semaphoreio/semaphore/mcp_server/pkg/tools/pipelines" @@ -77,6 +79,8 @@ func main() { bootstrapLog := logging.ForComponent("bootstrap") if strings.EqualFold(os.Getenv("MCP_USE_STUBS"), "true") { bootstrapLog.Info("using stubbed internal API clients (MCP_USE_STUBS=true)") + config.SetDevMode(true) + bootstrapLog.Info("dev mode enabled - skipping X-Semaphore-User-ID validation") provider = support.New() } else { cfg, err := internalapi.LoadConfig() @@ -116,6 +120,7 @@ func main() { // Register prompts for agent configuration guidance prompts.Register(srv) + docs.Register(srv) mux := http.NewServeMux() streamable := server.NewStreamableHTTPServer( diff --git a/mcp_server/go.mod b/mcp_server/go.mod index 6f4f876aa..c8dab55a2 100644 --- a/mcp_server/go.mod +++ b/mcp_server/go.mod @@ -4,6 +4,7 @@ go 1.25 require ( github.com/allegro/bigcache/v3 v3.1.0 + github.com/blevesearch/bleve/v2 v2.5.5 github.com/eko/gocache/lib/v4 v4.2.2 github.com/eko/gocache/store/bigcache/v4 v4.2.3 github.com/golang/protobuf v1.5.4 @@ -12,7 +13,7 @@ require ( github.com/mark3labs/mcp-go v0.41.1 github.com/renderedtext/go-watchman v0.0.0-20221222100224-451a6f3c8d92 github.com/sirupsen/logrus v1.9.3 - github.com/stretchr/testify v1.9.0 + github.com/stretchr/testify v1.10.0 google.golang.org/genproto/googleapis/rpc v0.0.0-20251014184007-4626949a642f google.golang.org/grpc v1.75.1 google.golang.org/protobuf v1.36.10 @@ -22,13 +23,37 @@ require ( require gopkg.in/alexcesaro/statsd.v2 v2.0.0 // indirect require ( + github.com/RoaringBitmap/roaring/v2 v2.4.5 // indirect github.com/bahlo/generic-list-go v0.2.0 // indirect github.com/beorn7/perks v1.0.1 // indirect + github.com/bits-and-blooms/bitset v1.22.0 // indirect + github.com/blevesearch/bleve_index_api v1.2.11 // indirect + github.com/blevesearch/geo v0.2.4 // indirect + github.com/blevesearch/go-faiss v1.0.26 // indirect + github.com/blevesearch/go-porterstemmer v1.0.3 // indirect + github.com/blevesearch/gtreap v0.1.1 // indirect + github.com/blevesearch/mmap-go v1.0.4 // indirect + github.com/blevesearch/scorch_segment_api/v2 v2.3.13 // indirect + github.com/blevesearch/segment v0.9.1 // indirect + github.com/blevesearch/snowballstem v0.9.0 // indirect + github.com/blevesearch/upsidedown_store_api v1.0.2 // indirect + github.com/blevesearch/vellum v1.1.0 // indirect + github.com/blevesearch/zapx/v11 v11.4.2 // indirect + github.com/blevesearch/zapx/v12 v12.4.2 // indirect + github.com/blevesearch/zapx/v13 v13.4.2 // indirect + github.com/blevesearch/zapx/v14 v14.4.2 // indirect + github.com/blevesearch/zapx/v15 v15.4.2 // indirect + github.com/blevesearch/zapx/v16 v16.2.7 // indirect github.com/buger/jsonparser v1.1.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect + github.com/golang/snappy v0.0.4 // indirect github.com/invopop/jsonschema v0.13.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect github.com/mailru/easyjson v0.7.7 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/mschoch/smat v0.2.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/prometheus/client_golang v1.19.0 // indirect github.com/prometheus/client_model v0.6.1 // indirect @@ -37,6 +62,7 @@ require ( github.com/spf13/cast v1.7.1 // indirect github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect github.com/yosida95/uritemplate/v3 v3.0.2 // indirect + go.etcd.io/bbolt v1.4.0 // indirect golang.org/x/exp v0.0.0-20240416160154-fe59bbe5cc7f // indirect golang.org/x/net v0.41.0 // indirect golang.org/x/sync v0.15.0 // indirect diff --git a/mcp_server/go.sum b/mcp_server/go.sum index 32258902c..1c11ad5e5 100644 --- a/mcp_server/go.sum +++ b/mcp_server/go.sum @@ -1,9 +1,50 @@ +github.com/RoaringBitmap/roaring/v2 v2.4.5 h1:uGrrMreGjvAtTBobc0g5IrW1D5ldxDQYe2JW2gggRdg= +github.com/RoaringBitmap/roaring/v2 v2.4.5/go.mod h1:FiJcsfkGje/nZBZgCu0ZxCPOKD/hVXDS2dXi7/eUFE0= github.com/allegro/bigcache/v3 v3.1.0 h1:H2Vp8VOvxcrB91o86fUSVJFqeuz8kpyyB02eH3bSzwk= github.com/allegro/bigcache/v3 v3.1.0/go.mod h1:aPyh7jEvrog9zAwx5N7+JUQX5dZTSGpxF1LAR4dr35I= github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk= github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= +github.com/bits-and-blooms/bitset v1.22.0 h1:Tquv9S8+SGaS3EhyA+up3FXzmkhxPGjQQCkcs2uw7w4= +github.com/bits-and-blooms/bitset v1.22.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= +github.com/blevesearch/bleve/v2 v2.5.5 h1:lzC89QUCco+y1qBnJxGqm4AbtsdsnlUvq0kXok8n3C8= +github.com/blevesearch/bleve/v2 v2.5.5/go.mod h1:t5WoESS5TDteTdnjhhvpA1BpLYErOBX2IQViTMLK7wo= +github.com/blevesearch/bleve_index_api v1.2.11 h1:bXQ54kVuwP8hdrXUSOnvTQfgK0KI1+f9A0ITJT8tX1s= +github.com/blevesearch/bleve_index_api v1.2.11/go.mod h1:rKQDl4u51uwafZxFrPD1R7xFOwKnzZW7s/LSeK4lgo0= +github.com/blevesearch/geo v0.2.4 h1:ECIGQhw+QALCZaDcogRTNSJYQXRtC8/m8IKiA706cqk= +github.com/blevesearch/geo v0.2.4/go.mod h1:K56Q33AzXt2YExVHGObtmRSFYZKYGv0JEN5mdacJJR8= +github.com/blevesearch/go-faiss v1.0.26 h1:4dRLolFgjPyjkaXwff4NfbZFdE/dfywbzDqporeQvXI= +github.com/blevesearch/go-faiss v1.0.26/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk= +github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo= +github.com/blevesearch/go-porterstemmer v1.0.3/go.mod h1:angGc5Ht+k2xhJdZi511LtmxuEf0OVpvUUNrwmM1P7M= +github.com/blevesearch/gtreap v0.1.1 h1:2JWigFrzDMR+42WGIN/V2p0cUvn4UP3C4Q5nmaZGW8Y= +github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgYICSZ3w0tYk= +github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc= +github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs= +github.com/blevesearch/scorch_segment_api/v2 v2.3.13 h1:ZPjv/4VwWvHJZKeMSgScCapOy8+DdmsmRyLmSB88UoY= +github.com/blevesearch/scorch_segment_api/v2 v2.3.13/go.mod h1:ENk2LClTehOuMS8XzN3UxBEErYmtwkE7MAArFTXs9Vc= +github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+jTfSU= +github.com/blevesearch/segment v0.9.1/go.mod h1:zN21iLm7+GnBHWTao9I+Au/7MBiL8pPFtJBJTsk6kQw= +github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s= +github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs= +github.com/blevesearch/upsidedown_store_api v1.0.2 h1:U53Q6YoWEARVLd1OYNc9kvhBMGZzVrdmaozG2MfoB+A= +github.com/blevesearch/upsidedown_store_api v1.0.2/go.mod h1:M01mh3Gpfy56Ps/UXHjEO/knbqyQ1Oamg8If49gRwrQ= +github.com/blevesearch/vellum v1.1.0 h1:CinkGyIsgVlYf8Y2LUQHvdelgXr6PYuvoDIajq6yR9w= +github.com/blevesearch/vellum v1.1.0/go.mod h1:QgwWryE8ThtNPxtgWJof5ndPfx0/YMBh+W2weHKPw8Y= +github.com/blevesearch/zapx/v11 v11.4.2 h1:l46SV+b0gFN+Rw3wUI1YdMWdSAVhskYuvxlcgpQFljs= +github.com/blevesearch/zapx/v11 v11.4.2/go.mod h1:4gdeyy9oGa/lLa6D34R9daXNUvfMPZqUYjPwiLmekwc= +github.com/blevesearch/zapx/v12 v12.4.2 h1:fzRbhllQmEMUuAQ7zBuMvKRlcPA5ESTgWlDEoB9uQNE= +github.com/blevesearch/zapx/v12 v12.4.2/go.mod h1:TdFmr7afSz1hFh/SIBCCZvcLfzYvievIH6aEISCte58= +github.com/blevesearch/zapx/v13 v13.4.2 h1:46PIZCO/ZuKZYgxI8Y7lOJqX3Irkc3N8W82QTK3MVks= +github.com/blevesearch/zapx/v13 v13.4.2/go.mod h1:knK8z2NdQHlb5ot/uj8wuvOq5PhDGjNYQQy0QDnopZk= +github.com/blevesearch/zapx/v14 v14.4.2 h1:2SGHakVKd+TrtEqpfeq8X+So5PShQ5nW6GNxT7fWYz0= +github.com/blevesearch/zapx/v14 v14.4.2/go.mod h1:rz0XNb/OZSMjNorufDGSpFpjoFKhXmppH9Hi7a877D8= +github.com/blevesearch/zapx/v15 v15.4.2 h1:sWxpDE0QQOTjyxYbAVjt3+0ieu8NCE0fDRaFxEsp31k= +github.com/blevesearch/zapx/v15 v15.4.2/go.mod h1:1pssev/59FsuWcgSnTa0OeEpOzmhtmr/0/11H0Z8+Nw= +github.com/blevesearch/zapx/v16 v16.2.7 h1:xcgFRa7f/tQXOwApVq7JWgPYSlzyUMmkuYa54tMDuR0= +github.com/blevesearch/zapx/v16 v16.2.7/go.mod h1:murSoCJPCk25MqURrcJaBQ1RekuqSCSfMjXH4rHyA14= github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs= github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= @@ -23,13 +64,18 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/invopop/jsonschema v0.13.0 h1:KvpoAJWEjR3uD9Kbm2HWJmqsEaHt8lBUpd0qHcIi21E= github.com/invopop/jsonschema v0.13.0/go.mod h1:ffZ5Km5SWWRAIN6wbDXItl95euhFz2uON45H2qjYt+0= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= @@ -38,6 +84,13 @@ github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0 github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mark3labs/mcp-go v0.41.1 h1:w78eWfiQam2i8ICL7AL0WFiq7KHNJQ6UB53ZVtH4KGA= github.com/mark3labs/mcp-go v0.41.1/go.mod h1:T7tUa2jO6MavG+3P25Oy/jR7iCeJPHImCZHRymCn39g= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM= +github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU= @@ -57,13 +110,16 @@ github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVs github.com/spf13/cast v1.7.1 h1:cuNEagBQEHWN1FnbGEjCXL2szYEXqfJPbP2HNUaca9Y= github.com/spf13/cast v1.7.1/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= -github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc= github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw= github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4= github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= +go.etcd.io/bbolt v1.4.0 h1:TU77id3TnN/zKr7CO/uk+fBCwF2jGcMuw2B/FMAzYIk= +go.etcd.io/bbolt v1.4.0/go.mod h1:AsD+OCi/qPN1giOX1aiLAha3o1U8rAz65bvN4j0sRuk= go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ= @@ -84,6 +140,7 @@ golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw= golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA= golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8= golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= @@ -103,5 +160,6 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/mcp_server/pkg/config/config.go b/mcp_server/pkg/config/config.go new file mode 100644 index 000000000..d41671e70 --- /dev/null +++ b/mcp_server/pkg/config/config.go @@ -0,0 +1,19 @@ +// Package config provides global configuration for the MCP server. +package config + +var ( + // devMode skips authentication checks when true (for local development) + devMode = false + // DevUserID is the user ID used when running in dev mode + DevUserID = "00000000-0000-0000-0000-000000000000" +) + +// SetDevMode enables or disables dev mode (skips auth checks) +func SetDevMode(enabled bool) { + devMode = enabled +} + +// IsDevMode returns true if dev mode is enabled +func IsDevMode() bool { + return devMode +} diff --git a/mcp_server/pkg/docssearch/client.go b/mcp_server/pkg/docssearch/client.go new file mode 100644 index 000000000..5a83e70ed --- /dev/null +++ b/mcp_server/pkg/docssearch/client.go @@ -0,0 +1,251 @@ +// Package docssearch provides a documentation search client using Bleve. +// This package can be imported directly into other Go services for in-process +// search without requiring gRPC. +package docssearch + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/semaphoreio/semaphore/mcp_server/pkg/docssearch/search" +) + +// SearchResult represents a single search result. +type SearchResult struct { + Path string + Title string + Snippet string + Score float64 + Version string + DocType string // "saas" or "versioned" + Anchor string + Description string +} + +// Document represents a full document. +type Document struct { + Path string + Title string + Body string + Version string + DocType string // "saas" or "versioned" +} + +// SearchOption configures a search request. +type SearchOption func(*search.SearchRequest) + +// WithVersion filters results to a specific version. +func WithVersion(version string) SearchOption { + return func(r *search.SearchRequest) { + r.Version = version + } +} + +// WithDocType filters results to SaaS or versioned docs. +func WithDocType(docType string) SearchOption { + return func(r *search.SearchRequest) { + switch docType { + case "saas": + r.DocType = search.DocTypeSaaS + case "versioned": + r.DocType = search.DocTypeVersioned + } + } +} + +// WithLimit sets the maximum number of results. +func WithLimit(limit int) SearchOption { + return func(r *search.SearchRequest) { + // Clamp to valid int32 range to prevent overflow + if limit < 0 { + limit = 0 + } else if limit > 1000 { + limit = 1000 // Reasonable max for search results + } + r.Limit = int32(limit) // #nosec G115 -- bounds checked above + } +} + +// WithOffset sets the result offset for pagination. +func WithOffset(offset int) SearchOption { + return func(r *search.SearchRequest) { + // Clamp to valid int32 range to prevent overflow + if offset < 0 { + offset = 0 + } else if offset > 10000 { + offset = 10000 // Reasonable max for pagination offset + } + r.Offset = int32(offset) // #nosec G115 -- bounds checked above + } +} + +// WithPathPrefix filters results to a path subtree. +func WithPathPrefix(prefix string) SearchOption { + return func(r *search.SearchRequest) { + r.PathPrefix = prefix + } +} + +// Client provides documentation search capabilities. +type Client struct { + searcher *search.Searcher + docsRoot string +} + +// New creates a new search client. +// indexPath is the path to the Bleve index directory. +// docsRoot is the path to the docs directory for GetDocument. +func New(indexPath, docsRoot string) (*Client, error) { + s, err := search.Open(indexPath) + if err != nil { + return nil, fmt.Errorf("open index: %w", err) + } + return &Client{ + searcher: s, + docsRoot: docsRoot, + }, nil +} + +// Close releases resources. +func (c *Client) Close() error { + if c.searcher != nil { + return c.searcher.Close() + } + return nil +} + +// Search executes a search query with optional filters. +func (c *Client) Search(ctx context.Context, query string, opts ...SearchOption) ([]SearchResult, error) { + req := &search.SearchRequest{ + Query: query, + Limit: 10, + } + for _, opt := range opts { + opt(req) + } + + results, err := c.searcher.Search(req) + if err != nil { + return nil, err + } + + var out []SearchResult + for _, r := range results { + out = append(out, SearchResult{ + Path: r.Path, + Title: r.Title, + Snippet: r.Snippet, + Score: r.Score, + Version: r.Version, + DocType: r.DocType, + Anchor: r.Anchor, + Description: r.Description, + }) + } + return out, nil +} + +// GetDocument retrieves a document by path. +func (c *Client) GetDocument(ctx context.Context, path string) (*Document, error) { + // Validate path doesn't escape docs root + cleanPath := filepath.Clean(path) + if filepath.IsAbs(cleanPath) || (len(cleanPath) > 0 && cleanPath[0] == '.') { + return nil, fmt.Errorf("invalid path") + } + + fullPath := filepath.Join(c.docsRoot, cleanPath) + + // Ensure the resolved path is within docsRoot (prevent path traversal) + absDocsRoot, err := filepath.Abs(c.docsRoot) + if err != nil { + return nil, fmt.Errorf("invalid docs root: %w", err) + } + absFullPath, err := filepath.Abs(fullPath) + if err != nil { + return nil, fmt.Errorf("invalid path: %w", err) + } + if !strings.HasPrefix(absFullPath, absDocsRoot+string(filepath.Separator)) && absFullPath != absDocsRoot { + return nil, fmt.Errorf("invalid path: outside docs root") + } + + content, err := os.ReadFile(absFullPath) // #nosec G304 -- path validated above + if err != nil { + if os.IsNotExist(err) { + return nil, fmt.Errorf("document not found: %s", path) + } + return nil, fmt.Errorf("read document: %w", err) + } + + version, docType := inferVersionAndType(cleanPath) + title := extractTitle(string(content)) + + return &Document{ + Path: cleanPath, + Title: title, + Body: string(content), + Version: version, + DocType: docType, + }, nil +} + +func inferVersionAndType(path string) (string, string) { + // Check for SaaS docs (docs/...) + if len(path) >= 5 && path[:5] == "docs/" { + return "saas", "saas" + } + + // Check for versioned docs (versioned_docs/version-XX/...) + const prefix = "versioned_docs/version-" + if len(path) >= len(prefix) && path[:len(prefix)] == prefix { + rest := path[len(prefix):] + slashIdx := -1 + for i, c := range rest { + if c == '/' { + slashIdx = i + break + } + } + if slashIdx > 0 { + return rest[:slashIdx], "versioned" + } + return rest, "versioned" + } + + return "", "" +} + +func extractTitle(content string) string { + lines := splitLines(content) + inFrontMatter := false + for _, line := range lines { + if line == "---" { + inFrontMatter = !inFrontMatter + continue + } + if inFrontMatter { + continue + } + if len(line) > 2 && line[0] == '#' && line[1] == ' ' { + return line[2:] + } + } + return "" +} + +func splitLines(s string) []string { + var lines []string + start := 0 + for i := 0; i < len(s); i++ { + if s[i] == '\n' { + lines = append(lines, s[start:i]) + start = i + 1 + } + } + if start < len(s) { + lines = append(lines, s[start:]) + } + return lines +} diff --git a/mcp_server/pkg/docssearch/indexer/indexer.go b/mcp_server/pkg/docssearch/indexer/indexer.go new file mode 100644 index 000000000..28c96e6e9 --- /dev/null +++ b/mcp_server/pkg/docssearch/indexer/indexer.go @@ -0,0 +1,101 @@ +package indexer + +import ( + "fmt" + "strings" + + "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/mapping" + "github.com/semaphoreio/semaphore/mcp_server/pkg/docssearch/loader" +) + +// IndexedDocument is the structure stored in Bleve index. +type IndexedDocument struct { + Path string `json:"path"` + Title string `json:"title"` + Body string `json:"body"` + Description string `json:"description"` + Version string `json:"version"` + DocType string `json:"doc_type"` + Headings string `json:"headings"` // joined with newlines for full-text search +} + +// Indexer creates and manages Bleve indexes. +type Indexer struct { + indexPath string +} + +// New creates a new Indexer that will write to the given path. +func New(indexPath string) *Indexer { + return &Indexer{indexPath: indexPath} +} + +// BuildIndex creates a new Bleve index from the given documents. +func (i *Indexer) BuildIndex(docs []loader.Document) error { + m := newIndexMapping() + + index, err := bleve.New(i.indexPath, m) + if err != nil { + return fmt.Errorf("create index: %w", err) + } + defer index.Close() + + batch := index.NewBatch() + for _, doc := range docs { + idoc := IndexedDocument{ + Path: doc.Path, + Title: doc.Title, + Body: doc.Body, + Description: doc.Description, + Version: doc.Version, + DocType: doc.DocType, + Headings: strings.Join(doc.Headings, "\n"), + } + if err := batch.Index(doc.Path, idoc); err != nil { + return fmt.Errorf("index document %s: %w", doc.Path, err) + } + } + + if err := index.Batch(batch); err != nil { + return fmt.Errorf("batch index: %w", err) + } + + return nil +} + +func newIndexMapping() mapping.IndexMapping { + // Create field mappings + textFieldMapping := bleve.NewTextFieldMapping() + textFieldMapping.Store = true + textFieldMapping.IncludeTermVectors = true + + keywordFieldMapping := bleve.NewKeywordFieldMapping() + keywordFieldMapping.Store = true + + // Title with boost + titleFieldMapping := bleve.NewTextFieldMapping() + titleFieldMapping.Store = true + titleFieldMapping.IncludeTermVectors = true + + // Description with boost + descFieldMapping := bleve.NewTextFieldMapping() + descFieldMapping.Store = true + descFieldMapping.IncludeTermVectors = true + + // Create document mapping + docMapping := bleve.NewDocumentMapping() + docMapping.AddFieldMappingsAt("path", keywordFieldMapping) + docMapping.AddFieldMappingsAt("title", titleFieldMapping) + docMapping.AddFieldMappingsAt("body", textFieldMapping) + docMapping.AddFieldMappingsAt("description", descFieldMapping) + docMapping.AddFieldMappingsAt("version", keywordFieldMapping) + docMapping.AddFieldMappingsAt("doc_type", keywordFieldMapping) + docMapping.AddFieldMappingsAt("headings", textFieldMapping) + + // Create index mapping + indexMapping := bleve.NewIndexMapping() + indexMapping.DefaultMapping = docMapping + indexMapping.DefaultAnalyzer = "en" + + return indexMapping +} diff --git a/mcp_server/pkg/docssearch/loader/loader.go b/mcp_server/pkg/docssearch/loader/loader.go new file mode 100644 index 000000000..0d412863a --- /dev/null +++ b/mcp_server/pkg/docssearch/loader/loader.go @@ -0,0 +1,240 @@ +package loader + +import ( + "bufio" + "os" + "path/filepath" + "regexp" + "strings" + + "gopkg.in/yaml.v3" +) + +// Document represents a parsed documentation file. +type Document struct { + Path string // relative path from docs root + Title string // extracted from markdown h1 + Body string // markdown content (front matter stripped) + Description string // from front matter + Version string // "saas", "CE", "EE", etc. + DocType string // "saas" or "versioned" + Headings []string // h2/h3 headings for better search +} + +// FrontMatter represents YAML front matter in docs. +type FrontMatter struct { + Description string `yaml:"description"` + Title string `yaml:"title"` +} + +var ( + headingH1Re = regexp.MustCompile(`^#\s+(.+)$`) + headingH2Re = regexp.MustCompile(`^##\s+(.+)$`) + headingH3Re = regexp.MustCompile(`^###\s+(.+)$`) + // Matches self-closing JSX/MDX components: + jsxSelfClosingRe = regexp.MustCompile(`<[A-Z][a-zA-Z]*[^>]*/\s*>`) + // Matches opening JSX tag: + jsxOpenTagRe = regexp.MustCompile(`<([A-Z][a-zA-Z]*)(?:\s[^>]*)?>`) +) + +// Loader walks the docs directory and parses markdown files. +type Loader struct { + docsRoot string +} + +// New creates a new Loader for the given docs root directory. +func New(docsRoot string) *Loader { + return &Loader{docsRoot: docsRoot} +} + +// LoadAll walks the docs directory and returns all parsed documents. +func (l *Loader) LoadAll() ([]Document, error) { + var docs []Document + + // Load SaaS docs from docs/docs/ + saasPath := filepath.Join(l.docsRoot, "docs") + saasDocs, err := l.loadFromPath(saasPath, "saas", "saas") + if err != nil { + return nil, err + } + docs = append(docs, saasDocs...) + + // Load versioned docs from docs/versioned_docs/version-* + versionedPath := filepath.Join(l.docsRoot, "versioned_docs") + entries, err := os.ReadDir(versionedPath) + if err != nil { + // versioned_docs might not exist + if os.IsNotExist(err) { + return docs, nil + } + return nil, err + } + + for _, entry := range entries { + if !entry.IsDir() { + continue + } + name := entry.Name() + if !strings.HasPrefix(name, "version-") { + continue + } + version := strings.TrimPrefix(name, "version-") + vPath := filepath.Join(versionedPath, name) + vDocs, err := l.loadFromPath(vPath, version, "versioned") + if err != nil { + return nil, err + } + docs = append(docs, vDocs...) + } + + return docs, nil +} + +func (l *Loader) loadFromPath(basePath, version, docType string) ([]Document, error) { + var docs []Document + + err := filepath.Walk(basePath, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if info.IsDir() { + return nil + } + if !strings.HasSuffix(path, ".md") { + return nil + } + + doc, err := l.parseFile(path, version, docType) + if err != nil { + // Log and skip malformed files + return nil + } + if doc != nil { + docs = append(docs, *doc) + } + return nil + }) + + return docs, err +} + +func (l *Loader) parseFile(path, version, docType string) (*Document, error) { + content, err := os.ReadFile(path) // #nosec G304 -- path from filepath.Walk constrained to docsRoot + if err != nil { + return nil, err + } + + relPath, err := filepath.Rel(l.docsRoot, path) + if err != nil { + relPath = path + } + + body, frontMatter := parseFrontMatter(string(content)) + title := extractTitle(body, frontMatter) + headings := extractHeadings(body) + + // Strip JSX/MDX components for cleaner search snippets + cleanBody := stripJSXComponents(body) + + return &Document{ + Path: relPath, + Title: title, + Body: cleanBody, + Description: frontMatter.Description, + Version: version, + DocType: docType, + Headings: headings, + }, nil +} + +func parseFrontMatter(content string) (body string, fm FrontMatter) { + lines := strings.Split(content, "\n") + if len(lines) < 3 || strings.TrimSpace(lines[0]) != "---" { + return content, fm + } + + endIdx := -1 + for i := 1; i < len(lines); i++ { + if strings.TrimSpace(lines[i]) == "---" { + endIdx = i + break + } + } + + if endIdx == -1 { + return content, fm + } + + fmContent := strings.Join(lines[1:endIdx], "\n") + _ = yaml.Unmarshal([]byte(fmContent), &fm) + + body = strings.Join(lines[endIdx+1:], "\n") + return strings.TrimSpace(body), fm +} + +func extractTitle(body string, fm FrontMatter) string { + // Try front matter title first + if fm.Title != "" { + return fm.Title + } + + // Extract from first h1 + scanner := bufio.NewScanner(strings.NewReader(body)) + for scanner.Scan() { + line := scanner.Text() + if matches := headingH1Re.FindStringSubmatch(line); len(matches) > 1 { + return strings.TrimSpace(matches[1]) + } + } + + return "" +} + +func extractHeadings(body string) []string { + var headings []string + scanner := bufio.NewScanner(strings.NewReader(body)) + for scanner.Scan() { + line := scanner.Text() + if matches := headingH2Re.FindStringSubmatch(line); len(matches) > 1 { + headings = append(headings, strings.TrimSpace(matches[1])) + } else if matches := headingH3Re.FindStringSubmatch(line); len(matches) > 1 { + headings = append(headings, strings.TrimSpace(matches[1])) + } + } + return headings +} + +// stripJSXComponents removes JSX/MDX components from markdown content. +// This cleans up components like , ..., etc. +func stripJSXComponents(body string) string { + // Remove self-closing components first: + body = jsxSelfClosingRe.ReplaceAllString(body, "") + + // Remove paired components: ... + // Go's regexp doesn't support backreferences, so we find and remove manually + for i := 0; i < 10; i++ { + match := jsxOpenTagRe.FindStringSubmatchIndex(body) + if match == nil { + break + } + + tagName := body[match[2]:match[3]] + openStart := match[0] + openEnd := match[1] + + // Find matching closing tag + closeTag := "" + closeIdx := strings.Index(body[openEnd:], closeTag) + if closeIdx == -1 { + // No closing tag found, just remove the opening tag + body = body[:openStart] + body[openEnd:] + continue + } + + // Remove from open tag to end of close tag + closeEnd := openEnd + closeIdx + len(closeTag) + body = body[:openStart] + body[closeEnd:] + } + + return body +} diff --git a/mcp_server/pkg/docssearch/loader/loader_test.go b/mcp_server/pkg/docssearch/loader/loader_test.go new file mode 100644 index 000000000..a0af07d14 --- /dev/null +++ b/mcp_server/pkg/docssearch/loader/loader_test.go @@ -0,0 +1,312 @@ +package loader + +import ( + "os" + "path/filepath" + "testing" +) + +func TestParseFrontMatter(t *testing.T) { + tests := []struct { + name string + content string + wantBody string + wantDescription string + wantTitle string + }{ + { + name: "with front matter", + content: `--- +description: This is a test doc +title: Test Title +--- + +# Heading + +Body content here.`, + wantBody: "# Heading\n\nBody content here.", + wantDescription: "This is a test doc", + wantTitle: "Test Title", + }, + { + name: "no front matter", + content: `# Just a Heading + +Some content.`, + wantBody: "# Just a Heading\n\nSome content.", + wantDescription: "", + wantTitle: "", + }, + { + name: "empty front matter", + content: `--- +--- + +# Empty FM + +Content.`, + wantBody: "# Empty FM\n\nContent.", + wantDescription: "", + wantTitle: "", + }, + { + name: "unclosed front matter", + content: "---\ndescription: test\n# No closing", + wantBody: "---\ndescription: test\n# No closing", + wantDescription: "", + wantTitle: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + body, fm := parseFrontMatter(tt.content) + if body != tt.wantBody { + t.Errorf("body = %q, want %q", body, tt.wantBody) + } + if fm.Description != tt.wantDescription { + t.Errorf("description = %q, want %q", fm.Description, tt.wantDescription) + } + if fm.Title != tt.wantTitle { + t.Errorf("title = %q, want %q", fm.Title, tt.wantTitle) + } + }) + } +} + +func TestExtractTitle(t *testing.T) { + tests := []struct { + name string + body string + fmTitle string + wantTitle string + }{ + { + name: "title from h1", + body: "# My Document\n\nSome content", + fmTitle: "", + wantTitle: "My Document", + }, + { + name: "title from front matter takes precedence", + body: "# Body Title\n\nContent", + fmTitle: "FM Title", + wantTitle: "FM Title", + }, + { + name: "no title", + body: "Just some text without heading", + fmTitle: "", + wantTitle: "", + }, + { + name: "h2 not picked as title", + body: "## Second Level\n\nContent", + fmTitle: "", + wantTitle: "", + }, + { + name: "title with extra spaces", + body: "# Spaced Title \n\nContent", + fmTitle: "", + wantTitle: "Spaced Title", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fm := FrontMatter{Title: tt.fmTitle} + got := extractTitle(tt.body, fm) + if got != tt.wantTitle { + t.Errorf("extractTitle() = %q, want %q", got, tt.wantTitle) + } + }) + } +} + +func TestExtractHeadings(t *testing.T) { + body := `# Main Title + +## First Section + +Some content. + +### Subsection + +More content. + +## Second Section + +Final content. + +#### Too deep - not included +` + headings := extractHeadings(body) + + want := []string{"First Section", "Subsection", "Second Section"} + if len(headings) != len(want) { + t.Fatalf("got %d headings, want %d", len(headings), len(want)) + } + for i, h := range headings { + if h != want[i] { + t.Errorf("heading[%d] = %q, want %q", i, h, want[i]) + } + } +} + +func TestStripJSXComponents(t *testing.T) { + tests := []struct { + name string + input string + want string + }{ + { + name: "self-closing component", + input: `# Title + + + +Some content.`, + want: `# Title + + + +Some content.`, + }, + { + name: "paired component", + input: `# Title + + +content inside tabs + + +After tabs.`, + want: `# Title + + + +After tabs.`, + }, + { + name: "nested components", + input: ` + +Content A + + +Content B + +`, + want: ``, + }, + { + name: "no components", + input: "Just plain markdown\n\nWith paragraphs.", + want: "Just plain markdown\n\nWith paragraphs.", + }, + { + name: "mixed content", + input: "Start\n\nMiddle\ninner\nEnd", + want: "Start\n\nMiddle\n\nEnd", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := stripJSXComponents(tt.input) + if got != tt.want { + t.Errorf("stripJSXComponents() = %q, want %q", got, tt.want) + } + }) + } +} + +func TestLoaderLoadAll(t *testing.T) { + // Create temp directory structure + tmpDir, err := os.MkdirTemp("", "docssearch-test") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpDir) + + // Create docs/docs structure (SaaS) + saasDir := filepath.Join(tmpDir, "docs") + if err := os.MkdirAll(saasDir, 0755); err != nil { + t.Fatal(err) + } + + saasDoc := `--- +description: SaaS doc description +--- + +# SaaS Document + +Content here. +` + if err := os.WriteFile(filepath.Join(saasDir, "test.md"), []byte(saasDoc), 0644); err != nil { + t.Fatal(err) + } + + // Create versioned_docs structure + versionedDir := filepath.Join(tmpDir, "versioned_docs", "version-CE") + if err := os.MkdirAll(versionedDir, 0755); err != nil { + t.Fatal(err) + } + + versionedDoc := `--- +description: CE doc description +--- + +# CE Document + +CE content. +` + if err := os.WriteFile(filepath.Join(versionedDir, "ce-doc.md"), []byte(versionedDoc), 0644); err != nil { + t.Fatal(err) + } + + // Load docs + loader := New(tmpDir) + docs, err := loader.LoadAll() + if err != nil { + t.Fatalf("LoadAll() error = %v", err) + } + + if len(docs) != 2 { + t.Fatalf("got %d docs, want 2", len(docs)) + } + + // Check SaaS doc + var saas, ce *Document + for i := range docs { + if docs[i].DocType == "saas" { + saas = &docs[i] + } else if docs[i].Version == "CE" { + ce = &docs[i] + } + } + + if saas == nil { + t.Fatal("SaaS doc not found") + } + if saas.Title != "SaaS Document" { + t.Errorf("SaaS title = %q, want %q", saas.Title, "SaaS Document") + } + if saas.Description != "SaaS doc description" { + t.Errorf("SaaS description = %q, want %q", saas.Description, "SaaS doc description") + } + if saas.Version != "saas" { + t.Errorf("SaaS version = %q, want %q", saas.Version, "saas") + } + + if ce == nil { + t.Fatal("CE doc not found") + } + if ce.Title != "CE Document" { + t.Errorf("CE title = %q, want %q", ce.Title, "CE Document") + } + if ce.DocType != "versioned" { + t.Errorf("CE docType = %q, want %q", ce.DocType, "versioned") + } +} diff --git a/mcp_server/pkg/docssearch/search/search.go b/mcp_server/pkg/docssearch/search/search.go new file mode 100644 index 000000000..f503c42a7 --- /dev/null +++ b/mcp_server/pkg/docssearch/search/search.go @@ -0,0 +1,222 @@ +package search + +import ( + "fmt" + "html" + "strings" + + "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/search" + "github.com/blevesearch/bleve/v2/search/query" +) + +// DocType distinguishes SaaS docs from versioned self-hosted docs. +type DocType int + +const ( + DocTypeUnspecified DocType = iota + DocTypeSaaS + DocTypeVersioned +) + +// SearchRequest represents a search query with optional filters. +type SearchRequest struct { + Query string + Version string // optional version filter (e.g. "saas", "CE", "EE") + DocType DocType // optional filter + Limit int32 // default from config + Offset int32 + PathPrefix string // optional filter to a subtree +} + +// Result represents a search result. +type Result struct { + Path string + Title string + Snippet string + Score float64 + Version string + DocType string + Anchor string + Description string +} + +// Searcher provides full-text search over the docs index. +type Searcher struct { + index bleve.Index +} + +// Open opens an existing Bleve index at the given path in read-only mode. +// Read-only mode is required for deployments where the index is baked into +// a read-only container filesystem (e.g., Kubernetes). +func Open(indexPath string) (*Searcher, error) { + index, err := bleve.OpenUsing(indexPath, map[string]interface{}{ + "read_only": true, + }) + if err != nil { + return nil, fmt.Errorf("open index: %w", err) + } + return &Searcher{index: index}, nil +} + +// Close closes the index. +func (s *Searcher) Close() error { + return s.index.Close() +} + +// Search executes a search query with optional filters. +func (s *Searcher) Search(req *SearchRequest) ([]Result, error) { + q := buildQuery(req) + + limit := int(req.Limit) + if limit <= 0 { + limit = 10 + } + offset := int(req.Offset) + if offset < 0 { + offset = 0 + } + + searchReq := bleve.NewSearchRequestOptions(q, limit, offset, false) + searchReq.Fields = []string{"path", "title", "description", "version", "doc_type"} + searchReq.Highlight = bleve.NewHighlightWithStyle("html") + searchReq.Highlight.AddField("body") + searchReq.Highlight.AddField("title") + searchReq.Highlight.AddField("description") + + searchResult, err := s.index.Search(searchReq) + if err != nil { + return nil, fmt.Errorf("search: %w", err) + } + + var results []Result + for _, hit := range searchResult.Hits { + r := Result{ + Path: getString(hit.Fields, "path"), + Title: getString(hit.Fields, "title"), + Version: getString(hit.Fields, "version"), + DocType: getString(hit.Fields, "doc_type"), + Score: hit.Score, + } + + // Build snippet from highlights or description + r.Snippet = buildSnippet(hit, getString(hit.Fields, "description")) + + results = append(results, r) + } + + return results, nil +} + +func buildQuery(req *SearchRequest) query.Query { + // Create text query across searchable fields + titleQuery := query.NewMatchQuery(req.Query) + descQuery := query.NewMatchQuery(req.Query) + headingsQuery := query.NewMatchQuery(req.Query) + bodyQuery := query.NewMatchQuery(req.Query) + + textQuery := bleve.NewDisjunctionQuery( + boostQuery(titleQuery, "title", 3.0), + boostQuery(descQuery, "description", 2.0), + boostQuery(headingsQuery, "headings", 1.5), + bodyQuery, + ) + + // If no filters, return text query directly + if req.Version == "" && req.DocType == DocTypeUnspecified && req.PathPrefix == "" { + return textQuery + } + + // Build boolean query with filters + boolQuery := bleve.NewBooleanQuery() + boolQuery.AddMust(textQuery) + + if req.Version != "" { + versionQuery := bleve.NewTermQuery(req.Version) + versionQuery.SetField("version") + boolQuery.AddMust(versionQuery) + } + + if req.DocType != DocTypeUnspecified { + docTypeStr := docTypeToString(req.DocType) + docTypeQuery := bleve.NewTermQuery(docTypeStr) + docTypeQuery.SetField("doc_type") + boolQuery.AddMust(docTypeQuery) + } + + if req.PathPrefix != "" { + prefixQuery := bleve.NewPrefixQuery(req.PathPrefix) + prefixQuery.SetField("path") + boolQuery.AddMust(prefixQuery) + } + + return boolQuery +} + +func boostQuery(q *query.MatchQuery, field string, boost float64) query.Query { + q.SetField(field) + q.SetBoost(boost) + return q +} + +func docTypeToString(dt DocType) string { + switch dt { + case DocTypeSaaS: + return "saas" + case DocTypeVersioned: + return "versioned" + default: + return "" + } +} + +func getString(fields map[string]interface{}, key string) string { + if v, ok := fields[key]; ok { + if s, ok := v.(string); ok { + return s + } + } + return "" +} + +func buildSnippet(hit *search.DocumentMatch, description string) string { + // Try to get highlighted fragments + var fragments []string + + if hit.Fragments != nil { + for _, fieldFragments := range hit.Fragments { + fragments = append(fragments, fieldFragments...) + } + } + + if len(fragments) > 0 { + // Strip HTML tags from highlight and join + snippet := strings.Join(fragments, " ... ") + snippet = stripHTMLTags(snippet) + if len(snippet) > 300 { + snippet = snippet[:300] + "..." + } + return snippet + } + + // Fall back to description + if description != "" { + if len(description) > 300 { + return description[:300] + "..." + } + return description + } + + return "" +} + +func stripHTMLTags(s string) string { + // Replace highlight markers with markdown bold + s = strings.ReplaceAll(s, "", "**") + s = strings.ReplaceAll(s, "", "**") + + // Decode HTML entities (e.g., < → <, > → >, " → ") + s = html.UnescapeString(s) + + return s +} diff --git a/mcp_server/pkg/docssearch/search/search_test.go b/mcp_server/pkg/docssearch/search/search_test.go new file mode 100644 index 000000000..fae8722a2 --- /dev/null +++ b/mcp_server/pkg/docssearch/search/search_test.go @@ -0,0 +1,281 @@ +package search + +import ( + "os" + "path/filepath" + "testing" + + blevesearch "github.com/blevesearch/bleve/v2/search" + "github.com/semaphoreio/semaphore/mcp_server/pkg/docssearch/indexer" + "github.com/semaphoreio/semaphore/mcp_server/pkg/docssearch/loader" +) + +func TestDocTypeToString(t *testing.T) { + tests := []struct { + input DocType + want string + }{ + {DocTypeSaaS, "saas"}, + {DocTypeVersioned, "versioned"}, + {DocTypeUnspecified, ""}, + } + + for _, tt := range tests { + got := docTypeToString(tt.input) + if got != tt.want { + t.Errorf("docTypeToString(%v) = %q, want %q", tt.input, got, tt.want) + } + } +} + +func TestGetString(t *testing.T) { + fields := map[string]interface{}{ + "title": "Test Title", + "count": 42, + "version": "saas", + } + + tests := []struct { + key string + want string + }{ + {"title", "Test Title"}, + {"version", "saas"}, + {"missing", ""}, + {"count", ""}, // not a string + } + + for _, tt := range tests { + got := getString(fields, tt.key) + if got != tt.want { + t.Errorf("getString(%q) = %q, want %q", tt.key, got, tt.want) + } + } +} + +func TestStripHTMLTags(t *testing.T) { + tests := []struct { + input string + want string + }{ + {"highlighted", "**highlighted**"}, + {"no tags here", "no tags here"}, + {"one and two", "**one** and **two**"}, + {"", ""}, + // HTML entity decoding + {"<div>content</div>", "
content
"}, + {""quoted"", `"quoted"`}, + {"& ampersand", "& ampersand"}, + {"<Component />", "****"}, + } + + for _, tt := range tests { + got := stripHTMLTags(tt.input) + if got != tt.want { + t.Errorf("stripHTMLTags(%q) = %q, want %q", tt.input, got, tt.want) + } + } +} + +func TestBuildSnippet(t *testing.T) { + t.Run("with fragments", func(t *testing.T) { + hit := &blevesearch.DocumentMatch{ + Fragments: map[string][]string{ + "body": {"fragment one", "fragment two"}, + }, + } + got := buildSnippet(hit, "fallback description") + want := "fragment **one** ... fragment **two**" + if got != want { + t.Errorf("buildSnippet() = %q, want %q", got, want) + } + }) + + t.Run("fallback to description", func(t *testing.T) { + hit := &blevesearch.DocumentMatch{} + got := buildSnippet(hit, "fallback description") + if got != "fallback description" { + t.Errorf("buildSnippet() = %q, want %q", got, "fallback description") + } + }) + + t.Run("truncate long description", func(t *testing.T) { + hit := &blevesearch.DocumentMatch{} + longDesc := string(make([]byte, 400)) + for i := range longDesc { + longDesc = longDesc[:i] + "x" + longDesc[i+1:] + } + got := buildSnippet(hit, longDesc) + if len(got) != 303 { // 300 + "..." + t.Errorf("buildSnippet() len = %d, want 303", len(got)) + } + }) + + t.Run("empty", func(t *testing.T) { + hit := &blevesearch.DocumentMatch{} + got := buildSnippet(hit, "") + if got != "" { + t.Errorf("buildSnippet() = %q, want empty", got) + } + }) +} + +func TestSearchIntegration(t *testing.T) { + // Create temp directory for index + tmpDir, err := os.MkdirTemp("", "docssearch-test") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpDir) + + indexPath := filepath.Join(tmpDir, "index") + + // Create test documents + docs := []loader.Document{ + { + Path: "docs/getting-started/quickstart.md", + Title: "Quickstart Guide", + Body: "Learn how to set up your first pipeline quickly.", + Description: "Get started with Semaphore in minutes", + Version: "saas", + DocType: "saas", + Headings: []string{"Prerequisites", "Step 1", "Step 2"}, + }, + { + Path: "docs/reference/pipeline-yaml.md", + Title: "Pipeline YAML Reference", + Body: "Complete reference for pipeline configuration.", + Description: "Pipeline YAML syntax and options", + Version: "saas", + DocType: "saas", + Headings: []string{"Syntax", "Examples"}, + }, + { + Path: "versioned_docs/version-CE/getting-started/install.md", + Title: "Installation Guide", + Body: "How to install Semaphore CE on your infrastructure.", + Description: "Install Semaphore Community Edition", + Version: "CE", + DocType: "versioned", + Headings: []string{"Requirements", "Docker", "Kubernetes"}, + }, + } + + // Build index + idx := indexer.New(indexPath) + if err := idx.BuildIndex(docs); err != nil { + t.Fatalf("BuildIndex() error = %v", err) + } + + // Open searcher + searcher, err := Open(indexPath) + if err != nil { + t.Fatalf("Open() error = %v", err) + } + defer searcher.Close() + + t.Run("basic search", func(t *testing.T) { + req := &SearchRequest{ + Query: "pipeline", + Limit: 10, + } + results, err := searcher.Search(req) + if err != nil { + t.Fatalf("Search() error = %v", err) + } + if len(results) == 0 { + t.Error("Search() returned no results") + } + // Pipeline YAML Reference should be in results + found := false + for _, r := range results { + if r.Title == "Pipeline YAML Reference" { + found = true + break + } + } + if !found { + t.Error("Expected 'Pipeline YAML Reference' in results") + } + }) + + t.Run("search with version filter", func(t *testing.T) { + req := &SearchRequest{ + Query: "install", + Version: "CE", + Limit: 10, + } + results, err := searcher.Search(req) + if err != nil { + t.Fatalf("Search() error = %v", err) + } + for _, r := range results { + if r.Version != "CE" { + t.Errorf("Result version = %q, want CE", r.Version) + } + } + }) + + t.Run("search with doc type filter", func(t *testing.T) { + req := &SearchRequest{ + Query: "guide", + DocType: DocTypeSaaS, + Limit: 10, + } + results, err := searcher.Search(req) + if err != nil { + t.Fatalf("Search() error = %v", err) + } + for _, r := range results { + if r.DocType != "saas" { + t.Errorf("Result doc_type = %q, want saas", r.DocType) + } + } + }) + + t.Run("search with path prefix filter", func(t *testing.T) { + req := &SearchRequest{ + Query: "guide", + PathPrefix: "docs/getting-started", + Limit: 10, + } + results, err := searcher.Search(req) + if err != nil { + t.Fatalf("Search() error = %v", err) + } + prefix := "docs/getting-started" + for _, r := range results { + if len(r.Path) < len(prefix) || r.Path[:len(prefix)] != prefix { + t.Errorf("Result path = %q, expected prefix %s", r.Path, prefix) + } + } + }) + + t.Run("search with limit", func(t *testing.T) { + req := &SearchRequest{ + Query: "semaphore", + Limit: 1, + } + results, err := searcher.Search(req) + if err != nil { + t.Fatalf("Search() error = %v", err) + } + if len(results) > 1 { + t.Errorf("Search() returned %d results, want <= 1", len(results)) + } + }) + + t.Run("no results", func(t *testing.T) { + req := &SearchRequest{ + Query: "xyznonexistent", + Limit: 10, + } + results, err := searcher.Search(req) + if err != nil { + t.Fatalf("Search() error = %v", err) + } + if len(results) != 0 { + t.Errorf("Search() returned %d results, want 0", len(results)) + } + }) +} diff --git a/mcp_server/pkg/tools/docs/register.go b/mcp_server/pkg/tools/docs/register.go new file mode 100644 index 000000000..00106f949 --- /dev/null +++ b/mcp_server/pkg/tools/docs/register.go @@ -0,0 +1,40 @@ +package docs + +import ( + "os" + + "github.com/mark3labs/mcp-go/server" + "github.com/semaphoreio/semaphore/mcp_server/pkg/docssearch" + "github.com/semaphoreio/semaphore/mcp_server/pkg/logging" +) + +const ( + defaultIndexPath = "/app/docssearch/index" + defaultDocsRoot = "/app/docssearch/docs" +) + +// Register adds the documentation search tool to the MCP server. +func Register(s *server.MCPServer) { + indexPath := os.Getenv("DOCSSEARCH_INDEX_PATH") + if indexPath == "" { + indexPath = defaultIndexPath + } + + docsRoot := os.Getenv("DOCSSEARCH_DOCS_ROOT") + if docsRoot == "" { + docsRoot = defaultDocsRoot + } + + client, err := docssearch.New(indexPath, docsRoot) + if err != nil { + logging.ForComponent("docs"). + WithError(err). + Warn("Failed to initialize docssearch client, docs_search tool will not be available") + return + } + + s.AddTool(newSearchTool(searchToolName, searchFullDescription()), searchHandler(client)) + + // Register docs as a resource template (not a tool) for reading full content + s.AddResourceTemplate(newDocsResourceTemplate(), docsResourceHandler(client)) +} diff --git a/mcp_server/pkg/tools/docs/resource.go b/mcp_server/pkg/tools/docs/resource.go new file mode 100644 index 000000000..4ecbb7df7 --- /dev/null +++ b/mcp_server/pkg/tools/docs/resource.go @@ -0,0 +1,141 @@ +package docs + +import ( + "context" + "fmt" + "path/filepath" + "regexp" + "strings" + + "github.com/mark3labs/mcp-go/mcp" + "github.com/semaphoreio/semaphore/mcp_server/pkg/docssearch" +) + +// markdownLinkRegex matches markdown links: [text](url) +// Does not match image links: ![alt](url) +var markdownLinkRegex = regexp.MustCompile(`\[([^\]]*)\]\(([^)]+)\)`) + +const ( + docsResourceURIPrefix = "semaphore-docs://" +) + +// newDocsResourceTemplate creates a resource template for Semaphore documentation. +func newDocsResourceTemplate() mcp.ResourceTemplate { + return mcp.NewResourceTemplate( + docsResourceURIPrefix+"{+path}", + "Semaphore Documentation", + mcp.WithTemplateDescription(`Access Semaphore CI/CD documentation files. + +Use this resource to read the full content of documentation files found via docs_search. + +URI format: semaphore-docs://{path} +Example: semaphore-docs://docs/using-semaphore/pipelines.md + +The path should match paths returned by the docs_search tool.`), + mcp.WithTemplateMIMEType("text/markdown"), + ) +} + +// docsResourceHandler returns a handler for reading documentation resources. +func docsResourceHandler(client *docssearch.Client) func(ctx context.Context, request mcp.ReadResourceRequest) ([]mcp.ResourceContents, error) { + return func(ctx context.Context, request mcp.ReadResourceRequest) ([]mcp.ResourceContents, error) { + uri := request.Params.URI + + // Extract path from URI + if !strings.HasPrefix(uri, docsResourceURIPrefix) { + return nil, fmt.Errorf("invalid URI: must start with %s", docsResourceURIPrefix) + } + + path := strings.TrimPrefix(uri, docsResourceURIPrefix) + + // Strip fragment/anchor if present (e.g., "docs/foo.md#section" -> "docs/foo.md") + if idx := strings.Index(path, "#"); idx != -1 { + path = path[:idx] + } + + if path == "" { + return nil, fmt.Errorf("invalid URI: path cannot be empty") + } + + // Basic path validation - no parent directory traversal + if strings.Contains(path, "..") { + return nil, fmt.Errorf("invalid path: parent directory traversal not allowed") + } + + doc, err := client.GetDocument(ctx, path) + if err != nil { + return nil, fmt.Errorf("document not found: %w", err) + } + + // Rewrite relative links to absolute semaphore-docs:// URIs + body := rewriteRelativeLinks(doc.Body, path) + + return []mcp.ResourceContents{ + mcp.TextResourceContents{ + URI: uri, + MIMEType: "text/markdown", + Text: body, + }, + }, nil + } +} + +// rewriteRelativeLinks converts relative markdown links to absolute semaphore-docs:// URIs. +// This allows agents to follow links by using the resource URI directly. +func rewriteRelativeLinks(content, currentPath string) string { + dir := filepath.Dir(currentPath) + + return markdownLinkRegex.ReplaceAllStringFunc(content, func(match string) string { + // Check if this is an image link (preceded by !) + // We need to check the character before the match in the original content + idx := strings.Index(content, match) + if idx > 0 && content[idx-1] == '!' { + return match // Leave image links as-is + } + + // Extract the link parts + submatch := markdownLinkRegex.FindStringSubmatch(match) + if len(submatch) != 3 { + return match + } + + linkText := submatch[1] + linkPath := submatch[2] + + // Skip absolute URLs, anchors, and mailto links + if strings.HasPrefix(linkPath, "http://") || + strings.HasPrefix(linkPath, "https://") || + strings.HasPrefix(linkPath, "mailto:") || + strings.HasPrefix(linkPath, "#") { + return match + } + + // Handle anchor in the link path + var anchor string + if anchorIdx := strings.Index(linkPath, "#"); anchorIdx != -1 { + anchor = linkPath[anchorIdx:] + linkPath = linkPath[:anchorIdx] + } + + // Resolve relative path + var resolvedPath string + if strings.HasPrefix(linkPath, "/") { + // Absolute path within docs - strip leading slash + resolvedPath = strings.TrimPrefix(linkPath, "/") + } else { + // Relative path - resolve against current directory + resolvedPath = filepath.Join(dir, linkPath) + resolvedPath = filepath.Clean(resolvedPath) + } + + // Add .md extension if missing and path doesn't have an extension + if filepath.Ext(resolvedPath) == "" { + resolvedPath += ".md" + } + + // Build the new URI + newURI := docsResourceURIPrefix + resolvedPath + anchor + + return fmt.Sprintf("[%s](%s)", linkText, newURI) + }) +} diff --git a/mcp_server/pkg/tools/docs/resource_test.go b/mcp_server/pkg/tools/docs/resource_test.go new file mode 100644 index 000000000..ecbfcf390 --- /dev/null +++ b/mcp_server/pkg/tools/docs/resource_test.go @@ -0,0 +1,100 @@ +package docs + +import ( + "strings" + "testing" +) + +func TestRewriteRelativeLinks(t *testing.T) { + tests := []struct { + name string + content string + currentPath string + expected string + }{ + { + name: "absolute URL unchanged", + content: "[link](https://example.com)", + currentPath: "docs/guide.md", + expected: "[link](https://example.com)", + }, + { + name: "mailto unchanged", + content: "[email](mailto:test@example.com)", + currentPath: "docs/guide.md", + expected: "[email](mailto:test@example.com)", + }, + { + name: "anchor-only link unchanged", + content: "[section](#section)", + currentPath: "docs/guide.md", + expected: "[section](#section)", + }, + { + name: "relative link rewritten", + content: "[other](other.md)", + currentPath: "docs/guide.md", + expected: "[other](semaphore-docs://docs/other.md)", + }, + { + name: "relative link with anchor preserved", + content: "[section](other.md#section)", + currentPath: "docs/guide.md", + expected: "[section](semaphore-docs://docs/other.md#section)", + }, + { + name: "absolute path in docs", + content: "[abs](/reference/api.md)", + currentPath: "docs/guide.md", + expected: "[abs](semaphore-docs://reference/api.md)", + }, + { + name: "path without extension gets .md", + content: "[noext](other)", + currentPath: "docs/guide.md", + expected: "[noext](semaphore-docs://docs/other.md)", + }, + { + name: "image link unchanged", + content: "![image](image.png)", + currentPath: "docs/guide.md", + expected: "![image](image.png)", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := rewriteRelativeLinks(tt.content, tt.currentPath) + if result != tt.expected { + t.Errorf("rewriteRelativeLinks(%q, %q) = %q, want %q", + tt.content, tt.currentPath, result, tt.expected) + } + }) + } +} + +func TestStripFragmentFromPath(t *testing.T) { + // This tests the logic that should be in docsResourceHandler + // to strip fragments before looking up files + tests := []struct { + input string + expected string + }{ + {"docs/guide.md", "docs/guide.md"}, + {"docs/guide.md#section", "docs/guide.md"}, + {"docs/guide.md#section-with-dashes", "docs/guide.md"}, + {"docs/path/to/file.md#anchor", "docs/path/to/file.md"}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + path := tt.input + if idx := strings.Index(path, "#"); idx != -1 { + path = path[:idx] + } + if path != tt.expected { + t.Errorf("strip fragment from %q = %q, want %q", tt.input, path, tt.expected) + } + }) + } +} diff --git a/mcp_server/pkg/tools/docs/search_tool.go b/mcp_server/pkg/tools/docs/search_tool.go new file mode 100644 index 000000000..ee399f25a --- /dev/null +++ b/mcp_server/pkg/tools/docs/search_tool.go @@ -0,0 +1,227 @@ +package docs + +import ( + "context" + "fmt" + "strings" + + "github.com/mark3labs/mcp-go/mcp" + "github.com/mark3labs/mcp-go/server" + "github.com/semaphoreio/semaphore/mcp_server/pkg/docssearch" + "github.com/semaphoreio/semaphore/mcp_server/pkg/tools/internal/shared" +) + +const ( + searchToolName = "docs_search" + defaultLimit = 10 + maxLimit = 30 +) + +func searchFullDescription() string { + return `Search Semaphore documentation for information about CI/CD features, configuration, and usage. + +Use this when you need to answer: +- "How do I configure pipelines in Semaphore?" +- "What are the available environment variables?" +- "How do I set up caching?" +- "What is the YAML syntax for promotions?" + +Parameters: +- query (required): Search term to find in the documentation +- limit (optional): Number of results to return (1-30, default 10) +- version (optional): Filter by doc version. Use "saas" for cloud docs (default), or "CE"/"EE" for self-hosted versioned docs (e.g., "CE", "EE", "CE-1.4", "EE-1.4") + +After finding relevant documents, read the full content using the resource URI: +semaphore-docs://{path} + +Example: semaphore-docs://docs/using-semaphore/pipelines.md + +Response includes: +- File path and title for each matching document +- Snippet showing where the search term appears +- Score indicating relevance + +Examples: +1. Search for pipeline configuration: + docs_search(query="pipeline yaml") + +2. Find information about caching: + docs_search(query="cache dependencies") + +3. Look up environment variables: + docs_search(query="environment variables", limit=5) + +4. Search self-hosted CE docs: + docs_search(query="install", version="CE")` +} + +func newSearchTool(name, description string) mcp.Tool { + return mcp.NewTool( + name, + mcp.WithDescription(description), + mcp.WithString("query", + mcp.Required(), + mcp.Description("Search term to find in Semaphore documentation."), + ), + mcp.WithNumber("limit", + mcp.Description("Number of results to return (1-30). Defaults to 10."), + mcp.Min(1), + mcp.Max(float64(maxLimit)), + mcp.DefaultNumber(float64(defaultLimit)), + ), + mcp.WithString("version", + mcp.Description("Filter by doc version: 'saas' (default, cloud docs) or CE/EE versions for self-hosted (e.g., 'CE', 'EE', 'CE-1.4', 'EE-1.4')."), + ), + mcp.WithReadOnlyHintAnnotation(true), + mcp.WithIdempotentHintAnnotation(true), + ) +} + +func searchHandler(client *docssearch.Client) server.ToolHandlerFunc { + return func(ctx context.Context, req mcp.CallToolRequest) (*mcp.CallToolResult, error) { + tracker := shared.TrackToolExecution(ctx, searchToolName, "") + defer tracker.Cleanup() + + // Validate user ID header for future rate-limiting + _, err := shared.ExtractUserID(req.Header.Get("X-Semaphore-User-ID")) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf(`%v + +Authentication is required to use this tool. + +Troubleshooting: +- Ensure requests pass through the auth proxy +- Verify the header value is a UUID (xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx)`, err)), nil + } + + queryRaw, err := req.RequireString("query") + if err != nil { + return mcp.NewToolResultError("Missing required argument: query. Provide a search term to find in the documentation."), nil + } + + query, err := shared.SanitizeDocsSearchQuery(queryRaw, "query") + if err != nil { + return mcp.NewToolResultError(err.Error()), nil + } + + limit := req.GetInt("limit", defaultLimit) + if limit <= 0 { + limit = defaultLimit + } else if limit > maxLimit { + limit = maxLimit + } + + // Build search options + opts := []docssearch.SearchOption{ + docssearch.WithLimit(limit), + } + + // Infer doc_type from version parameter + version := req.GetString("version", "") + if version == "" || version == "saas" { + // Default to saas docs + opts = append(opts, docssearch.WithDocType("saas")) + } else if strings.HasPrefix(version, "CE") || strings.HasPrefix(version, "EE") { + // CE/EE versions are versioned (self-hosted) docs + opts = append(opts, docssearch.WithDocType("versioned")) + opts = append(opts, docssearch.WithVersion(version)) + } else { + // Unknown version format, try as-is with versioned doc_type + opts = append(opts, docssearch.WithDocType("versioned")) + opts = append(opts, docssearch.WithVersion(version)) + } + + results, err := client.Search(ctx, query, opts...) + if err != nil { + return mcp.NewToolResultError(fmt.Sprintf(`Documentation search failed: %v + +Possible causes: +- Search index not available +- Invalid search query`, err)), nil + } + + structuredResult := formatStructuredResult(results, query) + markdown := formatMarkdown(results, query, limit) + markdown = shared.TruncateResponse(markdown, shared.MaxResponseChars) + + tracker.MarkSuccess() + return &mcp.CallToolResult{ + Content: []mcp.Content{ + mcp.NewTextContent(markdown), + }, + StructuredContent: structuredResult, + }, nil + } +} + +type searchResult struct { + Results []docResult `json:"results"` + Total int `json:"total"` + Query string `json:"query"` +} + +type docResult struct { + Path string `json:"path"` + Title string `json:"title"` + Snippet string `json:"snippet,omitempty"` + Score float64 `json:"score"` + Version string `json:"version"` + DocType string `json:"doc_type"` +} + +func formatStructuredResult(results []docssearch.SearchResult, query string) searchResult { + docs := make([]docResult, 0, len(results)) + for _, r := range results { + docs = append(docs, docResult{ + Path: r.Path, + Title: r.Title, + Snippet: r.Snippet, + Score: r.Score, + Version: r.Version, + DocType: r.DocType, + }) + } + return searchResult{ + Results: docs, + Total: len(results), + Query: query, + } +} + +func formatMarkdown(results []docssearch.SearchResult, query string, limit int) string { + mb := shared.NewMarkdownBuilder() + + header := fmt.Sprintf("Documentation Search Results (%d found)", len(results)) + mb.H1(header) + + if len(results) == 0 { + mb.Paragraph(fmt.Sprintf("No documentation found matching '%s'.", query)) + mb.Paragraph("**Suggestions:**") + mb.ListItem("Try different search terms") + mb.ListItem("Use more general keywords") + mb.ListItem("Check spelling") + return mb.String() + } + + for idx, r := range results { + if idx > 0 { + mb.Line() + } + + title := r.Title + if title == "" { + title = r.Path + } + mb.H2(title) + mb.KeyValue("Path", r.Path) + mb.KeyValue("Version", r.Version) + mb.KeyValue("Type", r.DocType) + + if r.Snippet != "" { + mb.Paragraph("**Match:**") + mb.Paragraph(fmt.Sprintf("> %s", strings.TrimSpace(r.Snippet))) + } + } + + return mb.String() +} diff --git a/mcp_server/pkg/tools/internal/shared/validation.go b/mcp_server/pkg/tools/internal/shared/validation.go index cc2fd23b7..c5e6d90e9 100644 --- a/mcp_server/pkg/tools/internal/shared/validation.go +++ b/mcp_server/pkg/tools/internal/shared/validation.go @@ -5,6 +5,8 @@ import ( "regexp" "strings" "unicode/utf8" + + "github.com/semaphoreio/semaphore/mcp_server/pkg/config" ) var ( @@ -15,6 +17,20 @@ var ( repositoryURLPattern = regexp.MustCompile(`^[A-Za-z0-9][A-Za-z0-9:/._?#=&%+\-@]*$`) ) +// ExtractUserID gets the user ID from the X-Semaphore-User-ID header. +// In dev mode, returns DevUserID without validation. +func ExtractUserID(headerValue string) (string, error) { + if config.IsDevMode() { + return config.DevUserID, nil + } + + userID := strings.ToLower(strings.TrimSpace(headerValue)) + if err := ValidateUUID(userID, "x-semaphore-user-id header"); err != nil { + return "", err + } + return userID, nil +} + // ValidateUUID ensures a string is a valid UUID format. func ValidateUUID(value, fieldName string) error { value = strings.ToLower(strings.TrimSpace(value)) @@ -134,6 +150,32 @@ func SanitizeSearchQuery(raw, fieldName string) (string, error) { return value, nil } +// SanitizeDocsSearchQuery validates and sanitizes search queries for GitHub docs search. +// It strips GitHub search operators to prevent query injection. +func SanitizeDocsSearchQuery(raw, fieldName string) (string, error) { + value := strings.TrimSpace(raw) + if value == "" { + return "", fmt.Errorf("%s is required", fieldName) + } + if utf8.RuneCountInString(value) > 256 { + return "", fmt.Errorf("%s must not exceed 256 characters", fieldName) + } + if hasControlRune(value) { + return "", fmt.Errorf("%s contains control characters", fieldName) + } + + // Remove GitHub search operators that could alter the query scope + operators := []string{"repo:", "path:", "user:", "org:", "language:", "filename:", "extension:"} + lower := strings.ToLower(value) + for _, op := range operators { + if strings.Contains(lower, op) { + return "", fmt.Errorf("%s must not contain search operators like %s", fieldName, op) + } + } + + return value, nil +} + // SanitizeRepositoryURLFilter restricts repository_url filters to URL-safe characters. func SanitizeRepositoryURLFilter(raw, fieldName string) (string, error) { value := strings.TrimSpace(raw) diff --git a/mcp_server/pkg/tools/testresults/testresults.go b/mcp_server/pkg/tools/testresults/testresults.go index 8d6494753..166576c63 100644 --- a/mcp_server/pkg/tools/testresults/testresults.go +++ b/mcp_server/pkg/tools/testresults/testresults.go @@ -12,8 +12,10 @@ import ( "github.com/semaphoreio/semaphore/mcp_server/pkg/authz" artifacthubpb "github.com/semaphoreio/semaphore/mcp_server/pkg/internal_api/artifacthub" + pipelinepb "github.com/semaphoreio/semaphore/mcp_server/pkg/internal_api/plumber.pipeline" projecthubenum "github.com/semaphoreio/semaphore/mcp_server/pkg/internal_api/projecthub" projecthubpb "github.com/semaphoreio/semaphore/mcp_server/pkg/internal_api/projecthub" + jobpb "github.com/semaphoreio/semaphore/mcp_server/pkg/internal_api/server_farm.job" "github.com/semaphoreio/semaphore/mcp_server/pkg/internalapi" "github.com/semaphoreio/semaphore/mcp_server/pkg/logging" "github.com/semaphoreio/semaphore/mcp_server/pkg/tools/internal/clients" @@ -141,6 +143,10 @@ test_results_signed_url(scope="job", job_id="11111111-2222-3333-4444-55555555555 ensureTracker("") return mcp.NewToolResultError(err.Error()), nil } + if job.GetState() != jobpb.Job_FINISHED { + ensureTracker("") + return mcp.NewToolResultError(fmt.Sprintf("job is not finished (current state: %s). Test results are only available after the job completes. Use jobs_describe to check job status.", job.GetState().String())), nil + } orgID = strings.TrimSpace(job.GetOrganizationId()) projectID = strings.TrimSpace(job.GetProjectId()) if err := shared.ValidateUUID(orgID, "job organization_id"); err != nil { @@ -177,6 +183,10 @@ test_results_signed_url(scope="pipeline", pipeline_id="...")`), nil return mcp.NewToolResultError(err.Error()), nil } pipeline := pipelineResp.GetPipeline() + if pipeline.GetState() != pipelinepb.Pipeline_DONE { + ensureTracker("") + return mcp.NewToolResultError(fmt.Sprintf("pipeline is not done (current state: %s). Test results are only available after the pipeline completes. Use pipelines_describe to check pipeline status.", pipeline.GetState().String())), nil + } orgID = strings.TrimSpace(pipeline.GetOrganizationId()) projectID = strings.TrimSpace(pipeline.GetProjectId()) pipelineWorkflowID := strings.TrimSpace(pipeline.GetWfId()) @@ -335,7 +345,7 @@ func resolveResultArtifact(ctx context.Context, api internalapi.Provider, storeI } } - return resultArtifact{}, fmt.Errorf("no test result artifacts found in `%s`", strings.TrimSuffix(listingDir, "/")) + return resultArtifact{}, fmt.Errorf("no test result artifacts found in `%s`. Test reports may not be configured for this project. Use the docs_search tool with query 'test reports setup' to learn how to configure test reports", strings.TrimSuffix(listingDir, "/")) } func listPath(ctx context.Context, api internalapi.Provider, artifactID, directory string) ([]*artifacthubpb.ListItem, error) { diff --git a/mcp_server/pkg/tools/testresults/testresults_test.go b/mcp_server/pkg/tools/testresults/testresults_test.go index f8c2b7317..5ae71d1a0 100644 --- a/mcp_server/pkg/tools/testresults/testresults_test.go +++ b/mcp_server/pkg/tools/testresults/testresults_test.go @@ -482,9 +482,14 @@ type pipelineStub struct { orgID string projectID string workflowID string + state pipelinepb.Pipeline_State } func (p *pipelineStub) Describe(ctx context.Context, req *pipelinepb.DescribeRequest, opts ...grpc.CallOption) (*pipelinepb.DescribeResponse, error) { + state := p.state + if state == 0 { + state = pipelinepb.Pipeline_DONE // default to DONE for test convenience + } return &pipelinepb.DescribeResponse{ ResponseStatus: &pipelinepb.ResponseStatus{Code: pipelinepb.ResponseStatus_OK}, Pipeline: &pipelinepb.Pipeline{ @@ -492,6 +497,7 @@ func (p *pipelineStub) Describe(ctx context.Context, req *pipelinepb.DescribeReq WfId: p.workflowID, ProjectId: p.projectID, OrganizationId: p.orgID, + State: state, }, }, nil } @@ -500,15 +506,21 @@ type jobStub struct { jobpb.JobServiceClient orgID string projectID string + state jobpb.Job_State } func (j *jobStub) Describe(ctx context.Context, req *jobpb.DescribeRequest, opts ...grpc.CallOption) (*jobpb.DescribeResponse, error) { + state := j.state + if state == 0 { + state = jobpb.Job_FINISHED // default to FINISHED for test convenience + } return &jobpb.DescribeResponse{ Status: &responsepb.ResponseStatus{Code: responsepb.ResponseStatus_OK}, Job: &jobpb.Job{ Id: req.GetJobId(), OrganizationId: j.orgID, ProjectId: j.projectID, + State: state, }, }, nil }