diff --git a/.custom-gcl.yml b/.custom-gcl.yml deleted file mode 100644 index 6ef11b3..0000000 --- a/.custom-gcl.yml +++ /dev/null @@ -1,11 +0,0 @@ -# This file configures golangci-lint with module plugins. -# When you run 'make lint', it will automatically build a custom golangci-lint binary -# with all the plugins listed below. -# -# See: https://golangci-lint.run/plugins/module-plugins/ -version: v2.8.0 -plugins: - # logcheck validates structured logging calls and parameters (e.g., balanced key-value pairs) - - module: "sigs.k8s.io/logtools" - import: "sigs.k8s.io/logtools/logcheck/gclplugin" - version: latest diff --git a/.github/.release-please-manifest.json b/.github/.release-please-manifest.json index 8fd55a4..69d89c8 100644 --- a/.github/.release-please-manifest.json +++ b/.github/.release-please-manifest.json @@ -1,4 +1,4 @@ { - ".": "0.1.2" + ".": "0.1.0" } \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index 00bd102..0000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -name: "Bug report" -about: "Report a reproducible issue in an operator, transport, SDK, Engram, or Impulse" -labels: ["kind/bug", "status/triage"] ---- - -## Component(s) -- [ ] bobrapet (Story/StoryRun controllers) -- [ ] bobravoz-grpc (transport operator) -- [ ] bubu-sdk-go -- [ ] Engram (name it below) -- [ ] Impulse (name it below) -- [ ] Docs / website - -If Engram/Impulse: -``` -name: -version/tag: -execution mode (job / deployment / impulse): -``` - -## What happened? -Tell us what broke. Include the Story/StoryRun status, the expected behaviour, and what you observed instead. - -## Minimal reproduction -1. Inputs/Story snippet (YAML or JSON) -2. Commands you ran (`kubectl`, `make`, etc.) -3. Cluster details (Kubernetes version, Kind/Minikube/managed cluster) - -``` -apiVersion: stories.bubustack.io/v1alpha1 -kind: Story -metadata: - name: example -spec: - ... -``` - -## Logs & traces -- `kubectl logs` for controllers or Engrams (set `BUBU_DEBUG=true` if possible) -- Relevant excerpts from `storyrun` / `steprun` status -- TransportBinding / bobravoz logs if streaming is impacted - -## Additional context -Anything else we should know? For example, custom overrides, secrets/providers, or recent upgrades. diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml deleted file mode 100644 index 49d4175..0000000 --- a/.github/ISSUE_TEMPLATE/config.yml +++ /dev/null @@ -1,8 +0,0 @@ -blank_issues_enabled: false -contact_links: - - name: Security reports - url: https://github.com/bubustack/bubu-sdk-go/security/advisories/new - about: Please use our private reporting channel for vulnerabilities. - - name: Architecture & roadmap discussions - url: https://github.com/orgs/bubustack/discussions - about: For proposals that span multiple repositories, start a discussion before opening issues. diff --git a/.github/ISSUE_TEMPLATE/docs_request.md b/.github/ISSUE_TEMPLATE/docs_request.md deleted file mode 100644 index fc0e3e2..0000000 --- a/.github/ISSUE_TEMPLATE/docs_request.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -name: "Docs / community update" -about: "Request a documentation, support, or example update" -labels: ["kind/docs", "status/triage"] ---- - -## Area -- [ ] Operator docs / architecture -- [ ] Engram or Impulse README (name it) -- [ ] SDK reference (Go examples) -- [ ] Website / tutorials / blog -- [ ] Community health file (Code of Conduct, Security, Support, etc.) - -## What needs to change? -Link to the existing page/file and tell us what is missing or incorrect. - -## Source of truth -Add the CRD, code snippet, or log output that proves the correct behaviour so we can update the doc with confidence. - -## Suggested fix (optional) -Share wording, diagrams, or commands that would resolve the issue. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md deleted file mode 100644 index 7b0d506..0000000 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -name: "Feature request" -about: "Pitch a new capability for the operator, transport, SDK, Engram, or docs" -labels: ["kind/feature", "status/triage"] ---- - -## Problem statement -What workflow or operational gap are you trying to solve? Include scale, latency, tenancy, or compliance constraints if relevant. - -## Proposed change -Describe the behaviour you’d like to see. If this affects CRDs, Engram templates, or SDK APIs, list the new fields and defaults. - -``` -apiVersion: catalog.bubustack.io/v1alpha1 -kind: EngramTemplate -spec: - with: - newField: ... -``` - -## Affected component(s) -- [ ] bobrapet -- [ ] bobravoz-grpc -- [ ] bubu-sdk-go -- [ ] Engram (name it) -- [ ] Impulse (name it) -- [ ] Docs / website - -## Alternatives considered -What did you try already? Examples: custom Engram, CEL policy, external controller, different transport, etc. - -## Additional context -Links, design docs, screenshots, or related issues/discussions. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md deleted file mode 100644 index 8556ab4..0000000 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ /dev/null @@ -1,33 +0,0 @@ -## Summary - - -## Type of change -- [ ] Bug fix -- [ ] New feature -- [ ] Breaking change -- [ ] Documentation / examples -- [ ] Refactor / chore - -## Related issues - - -## How was this tested? -- [ ] Unit tests -- [ ] Integration tests -- [ ] E2E / Kind tests -- [ ] Manual verification -Commands / notes: -```bash -# paste commands you ran (or explain why not) -``` - -## Docs / CRDs impact -- [ ] Docs updated (README/Guides/Support) -- [ ] CRD or Engram/Impulse schema changes -- [ ] Not applicable - -## Checklist -- [ ] Lint/tests ran locally -- [ ] Updated Engram.yaml/CRDs/manifests when behaviour changed -- [ ] Added changelog/release note if required -- [ ] No secrets or sensitive data committed diff --git a/.github/labels.yml b/.github/labels.yml deleted file mode 100644 index c641696..0000000 --- a/.github/labels.yml +++ /dev/null @@ -1,75 +0,0 @@ -- name: "kind/bug" - color: "d73a4a" - description: "Unexpected behaviour or regression that needs fixing." -- name: "kind/feature" - color: "0e8a16" - description: "New functionality or enhancement request." -- name: "kind/docs" - color: "c5def5" - description: "Documentation, examples, or community health updates." -- name: "kind/refactor" - color: "5319e7" - description: "Code health, cleanup, or non-functional improvements." -- name: "kind/tests" - color: "fbca04" - description: "Testing, CI, or verification-only changes." -- name: "kind/chore" - color: "bfd4f2" - description: "Maintenance, dependency bumps, or release automation." -- name: "dependencies" - color: "0366d6" - description: "Dependency updates raised by automation such as Dependabot." -- name: "area/operator" - color: "0b4f6c" - description: "Bobrapet controller or CRD-level change." -- name: "area/transport" - color: "0b4f6c" - description: "Bobravoz gRPC transport changes." -- name: "area/sdk" - color: "0b4f6c" - description: "Bubu SDK or shared runtime work." -- name: "area/engram" - color: "0b4f6c" - description: "Specific Engram implementation or template change." -- name: "area/impulse" - color: "0b4f6c" - description: "Impulse templates, webhook ingestion, or trigger paths." -- name: "area/docs" - color: "0b4f6c" - description: "Docs site, READMEs, or knowledge base updates." -- name: "priority/critical" - color: "b60205" - description: "Production-impacting issue that needs immediate attention." -- name: "priority/high" - color: "d93f0b" - description: "Important issue to schedule soon." -- name: "priority/medium" - color: "fbca04" - description: "Normal priority item." -- name: "priority/low" - color: "cfd3d7" - description: "Nice-to-have or backlog item." -- name: "status/triage" - color: "ededed" - description: "Issue has not been reviewed yet." -- name: "status/needs-info" - color: "f9d0c4" - description: "Waiting on more information from the reporter." -- name: "status/in-progress" - color: "004d99" - description: "Actively being worked on." -- name: "status/blocked" - color: "5319e7" - description: "Blocked on another issue, dependency, or external signal." -- name: "status/ready" - color: "28a745" - description: "Ready to merge/release once tests pass." -- name: "good first issue" - color: "7057ff" - description: "Small, well-scoped tasks for new contributors." -- name: "help wanted" - color: "008672" - description: "Looking for community contributions." -- name: "triage/needs-owner" - color: "5319e7" - description: "Needs someone to own or shepherd the fix." diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index 31c571f..0000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,20 +0,0 @@ -name: CI - -on: - workflow_dispatch: - push: - branches: - - main - pull_request: - branches: - - main - -permissions: - contents: read - -jobs: - lint: - uses: ./.github/workflows/lint.yml - - test: - uses: ./.github/workflows/test.yml diff --git a/.github/workflows/label-sync.yml b/.github/workflows/label-sync.yml deleted file mode 100644 index 66741ac..0000000 --- a/.github/workflows/label-sync.yml +++ /dev/null @@ -1,26 +0,0 @@ -name: Label Sync - -on: - workflow_dispatch: - push: - paths: - - '.github/labels.yml' - - '.github/workflows/label-sync.yml' - -permissions: - issues: write - contents: read - -jobs: - sync: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v6 - - - name: Sync labels - uses: micnncim/action-label-syncer@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - manifest: .github/labels.yml diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 9306d45..1361d7d 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,29 +1,28 @@ name: Lint on: - workflow_call: - workflow_dispatch: + push: + pull_request: permissions: contents: read jobs: lint: + name: Run on Ubuntu runs-on: ubuntu-latest steps: - - name: Checkout - uses: actions/checkout@v6 + - name: Clone the code + uses: actions/checkout@v5 - name: Setup Go uses: actions/setup-go@v6 with: - go-version: '1.26.x' + go-version-file: go.mod check-latest: true cache: true - cache-dependency-path: go.sum - - - name: Check linter configuration - run: make lint-config - name: Run linter - run: make lint + uses: golangci/golangci-lint-action@v9 + with: + version: v2.4.0 diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml index 70be8c8..b3dc3ae 100644 --- a/.github/workflows/release-please.yml +++ b/.github/workflows/release-please.yml @@ -8,45 +8,14 @@ on: permissions: contents: write pull-requests: write + packages: write concurrency: group: release-please-${{ github.ref }} cancel-in-progress: true jobs: - verify: - name: Verify release commit - runs-on: ubuntu-latest - permissions: - contents: read - steps: - - name: Checkout - uses: actions/checkout@v6 - - - name: Setup Go - uses: actions/setup-go@v6 - with: - go-version: '1.26.x' - check-latest: true - cache: true - cache-dependency-path: go.sum - - - name: Run linters - run: make lint - - - name: Run tests - run: go test ./... - - - name: Run race detector - run: go test -race ./... - - - name: Verify module state - run: | - go mod verify - go mod tidy -diff - release-please: - needs: verify runs-on: ubuntu-latest outputs: release_created: ${{ steps.release.outputs.release_created }} @@ -56,42 +25,78 @@ jobs: id: release uses: googleapis/release-please-action@v4 with: + # If you want CI to run on the Release PRs, set a PAT here: + # token: ${{ secrets.RELEASE_PLEASE_PAT }} config-file: .github/release-please-config.json manifest-file: .github/.release-please-manifest.json - - name: Checkout + # Only runs when a new GitHub Release/tag was created by release-please + - name: Checkout code if: ${{ steps.release.outputs.release_created }} - uses: actions/checkout@v6 + uses: actions/checkout@v5 - - name: Set up Go + - name: Set up Go (from go.mod) if: ${{ steps.release.outputs.release_created }} uses: actions/setup-go@v6 with: - go-version: '1.26.x' + go-version-file: go.mod check-latest: true cache: true - cache-dependency-path: go.sum - - name: Warm Go module caches + - name: Run tests + if: ${{ steps.release.outputs.release_created }} + run: go test -v -race -coverprofile=coverage.out ./... + + - name: Upload coverage to Codecov + if: ${{ steps.release.outputs.release_created }} + uses: codecov/codecov-action@v5 + with: + file: ./coverage.out + # For private repos or protected branches you likely need a token: + # token: ${{ secrets.CODECOV_TOKEN }} + fail_ci_if_error: false + + - name: Verify module tidy state + if: ${{ steps.release.outputs.release_created }} + run: | + go mod verify + go mod tidy + git diff --exit-code go.mod go.sum + + - name: Warm Go module caches (proxy/sum/pkg.go.dev) if: ${{ steps.release.outputs.release_created }} run: | set -euo pipefail MOD=github.com/${{ github.repository }} VER=${{ steps.release.outputs.tag_name }} + echo "Warming proxy.golang.org for $MOD@$VER" curl -sSfL "https://proxy.golang.org/${MOD}/@v/${VER}.info" || true + echo "Warming sum.golang.org for $MOD@$VER" curl -sSfL "https://sum.golang.org/lookup/${MOD}@${VER}" || true + echo "Triggering pkg.go.dev indexing for $MOD@$VER" curl -sSfL "https://pkg.go.dev/${MOD}@${VER}" > /dev/null || true + - name: Trigger pkg.go.dev indexing + if: ${{ steps.release.outputs.release_created }} + run: | + VERSION=${{ steps.release.outputs.tag_name }} + echo "Triggering pkg.go.dev to index version $VERSION" + curl -sSf "https://proxy.golang.org/github.com/bubustack/bubu-sdk-go/@v/${VERSION}.info" || true + # Optional: also poke sum.golang.org: + # curl -sSf "https://sum.golang.org/lookup/github.com/bubustack/bubu-sdk-go@${VERSION}" || true + - name: Create release summary if: ${{ steps.release.outputs.release_created }} run: | { - echo "## Release ${{ steps.release.outputs.tag_name }}" + echo "## Release ${{ steps.release.outputs.tag_name }} 🚀" echo - echo "- GitHub release: https://github.com/${{ github.repository }}/releases/tag/${{ steps.release.outputs.tag_name }}" - echo "- pkg.go.dev: https://pkg.go.dev/github.com/${{ github.repository }}@${{ steps.release.outputs.tag_name }}" + echo "The new version has been released and is available at:" + echo "- **GitHub**: https://github.com/${{ github.repository }}/releases/tag/${{ steps.release.outputs.tag_name }}" + echo "- **pkg.go.dev**: https://pkg.go.dev/github.com/bubustack/bubu-sdk-go@${{ steps.release.outputs.tag_name }}" echo + echo "### Installation" echo '```bash' - echo "go get github.com/${{ github.repository }}@${{ steps.release.outputs.tag_name }}" + echo "go get github.com/bubustack/bubu-sdk-go@${{ steps.release.outputs.tag_name }}" echo '```' } >> "$GITHUB_STEP_SUMMARY" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d0e6147..48166c5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,140 +1,28 @@ name: Tests on: - workflow_call: - workflow_dispatch: + push: + pull_request: permissions: contents: read jobs: - go-test: - name: Go 1.26.x on ${{ matrix.os }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, ubuntu-24.04-arm64, macos-latest] - steps: - - name: Checkout - uses: actions/checkout@v6 - - - name: Setup Go - uses: actions/setup-go@v6 - with: - go-version: '1.26.x' - check-latest: true - cache: true - cache-dependency-path: go.sum - - - name: Run tests - run: make test - - race-and-coverage: - name: Race and coverage - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v6 - - - name: Setup Go - uses: actions/setup-go@v6 - with: - go-version: '1.26.x' - check-latest: true - cache: true - cache-dependency-path: go.sum - - - name: Run race detector with coverage - run: go test -race -coverprofile=coverage.out ./... - - - name: Upload coverage artifact - uses: actions/upload-artifact@v7 - with: - name: coverage.out - path: coverage.out - - govulncheck: - name: Govulncheck - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v6 - - - name: Setup Go - uses: actions/setup-go@v6 - with: - go-version: '1.26.x' - check-latest: true - cache: true - cache-dependency-path: go.sum - - - name: Install govulncheck - run: go install golang.org/x/vuln/cmd/govulncheck@latest - - - name: Run govulncheck - run: | - "$(go env GOPATH)/bin/govulncheck" ./... - - api-compat: - name: API compatibility + test: + name: Run on Ubuntu runs-on: ubuntu-latest steps: - - name: Checkout - uses: actions/checkout@v6 - with: - fetch-depth: 0 + - name: Clone the code + uses: actions/checkout@v5 - name: Setup Go uses: actions/setup-go@v6 with: - go-version: '1.26.x' + go-version-file: go.mod check-latest: true cache: true - cache-dependency-path: go.sum - - - name: Install gorelease - run: go install golang.org/x/exp/cmd/gorelease@latest - - name: Run API compatibility check + - name: Running Tests run: | - git fetch --tags --force - if git tag --list 'v*' | grep -q .; then - "$(go env GOPATH)/bin/gorelease" - else - echo "No release tags found; skipping API compatibility bootstrap check." - fi - - envtest: - name: Envtest ${{ matrix.k8s-version }} on ${{ matrix.os }} - runs-on: ${{ matrix.os }} - needs: go-test - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, ubuntu-24.04-arm64] - k8s-version: ['1.26.x', '1.27.x', '1.28.x', '1.29.x', '1.30.x', '1.31.x'] - steps: - - name: Checkout - uses: actions/checkout@v6 - - - name: Setup Go - uses: actions/setup-go@v6 - with: - go-version: '1.26.x' - check-latest: true - cache: true - cache-dependency-path: go.sum - - - name: Setup envtest assets - run: | - go install sigs.k8s.io/controller-runtime/tools/setup-envtest@latest - ARCH=amd64 - if [[ "${{ matrix.os }}" == *"arm64"* ]]; then - ARCH=arm64 - fi - KUBEBUILDER_ASSETS=$($(go env GOPATH)/bin/setup-envtest use -p path --arch "${ARCH}" --os linux ${{ matrix.k8s-version }}) - echo "KUBEBUILDER_ASSETS=$KUBEBUILDER_ASSETS" >> "$GITHUB_ENV" - - - name: Run envtest smoke tests - run: go test -tags=integration ./integration/... + go mod tidy + make test diff --git a/.gitignore b/.gitignore index f2fb312..0940b5b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,25 +1,30 @@ +# Created by https://www.toptal.com/developers/gitignore/api/go +# Edit at https://www.toptal.com/developers/gitignore?templates=go + +### Go ### +# If you prefer the allow list template instead of the deny list, see community template: +# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore +# # Binaries for programs and plugins *.exe *.exe~ *.dll *.so *.dylib -bin/ +bin/* -# Test binaries and coverage output +# Test binary, built with `go test -c` *.test + +# Output of the go coverage tool, specifically when used with LiteIDE *.out +# Dependency directories (remove the comment below to include it) +# vendor/ + # Go workspace file go.work -# Editor and IDE files -.idea/ -.vscode/ -*.swp -*.swo -*~ +# End of https://www.toptal.com/developers/gitignore/api/go -# macOS / local cache files -.DS_Store -.gocache/ +.DS_Store \ No newline at end of file diff --git a/.golangci.yml b/.golangci.yml index 4073910..6ef9fd0 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -21,18 +21,9 @@ linters: - unconvert - unparam - unused - - modernize - - logcheck settings: gocyclo: min-complexity: 15 - modernize: - disable: - - omitzero - custom: - logcheck: - type: "module" - description: Checks Go logging calls for Kubernetes logging conventions. revive: rules: - name: comment-spacings diff --git a/CHANGELOG.md b/CHANGELOG.md index 05027ee..02481d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,14 +1,25 @@ # Changelog +## 0.1.0 (2025-10-18) + + +### Features + +* initial public release of bubu-sdk-go ([19aabcc](https://github.com/bubustack/bubu-sdk-go/commit/19aabccf67f2506348a7379103d641a74463f04c)) + + +### Build System + +* **deps:** Bump codecov/codecov-action from 4 to 5 ([#3](https://github.com/bubustack/bubu-sdk-go/issues/3)) ([1f77a78](https://github.com/bubustack/bubu-sdk-go/commit/1f77a78b879b61630e016ba30f6f90a83d235ef8)) +* **deps:** Bump github.com/aws/aws-sdk-go-v2 from 1.39.2 to 1.39.3 ([#7](https://github.com/bubustack/bubu-sdk-go/issues/7)) ([f6f652c](https://github.com/bubustack/bubu-sdk-go/commit/f6f652cb1059db274027859ef1455052a3a8edf2)) +* **deps:** Bump github.com/aws/aws-sdk-go-v2/config ([#6](https://github.com/bubustack/bubu-sdk-go/issues/6)) ([8c7a250](https://github.com/bubustack/bubu-sdk-go/commit/8c7a25032f7166435cea9f7ae0aeea737f18317b)) +* **deps:** Bump github.com/aws/aws-sdk-go-v2/feature/s3/manager ([#5](https://github.com/bubustack/bubu-sdk-go/issues/5)) ([c2e0838](https://github.com/bubustack/bubu-sdk-go/commit/c2e083839d17530563ac8d0582e780715f55b06e)) + +## Changelog + All notable changes to this project will be documented in this file. This file is automatically generated by [Release Please](https://github.com/googleapis/release-please) based on [Conventional Commits](https://www.conventionalcommits.org/). The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - -## Unreleased - -### Bug Fixes - -* enforce `BUBU_GRPC_REQUIRE_TLS` on streaming server startup, preventing plaintext fallback when the toggle or TLS env vars are set. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 9a5b891..b5b32b5 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -49,7 +49,7 @@ Tensions can occur between community members even when they are trying their bes When an incident does occur, it is important to report it promptly. To report a possible violation, please contact the Community Moderators via one of the following channels: - Email: community@bubustack.com -- GitHub Discussions: https://github.com/orgs/bubustack/discussions (select the Community Moderation category) +- GitHub Discussions: https://github.com/bubustack/bobravoz-grpc/discussions (select the Community Moderation category) If you are uncomfortable reporting publicly, email is preferred. We aim to acknowledge reports within 72 hours and will keep reporters updated as appropriate. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4c43894..636a72a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,60 +1,64 @@ -# Contributing to bubu-sdk-go +# Contributing to bobravoz-grpc (transport operator) -The Go SDK powers every Engram and Impulse in BubuStack. Thanks for helping us keep it reliable, ergonomic, and production-ready. +First off, thank you for considering contributing. Your help is appreciated. -## Reporting bugs +This document provides guidelines for contributing to the sdk and its docs. Please read it carefully to ensure a smooth collaboration process. -- Check [existing issues](https://github.com/bubustack/bubu-sdk-go/issues?q=is%3Aissue) first. -- Include the following details when filing a bug: - - Engram/Impulse name and execution mode (batch, streaming, impulse) using the SDK. - - Minimal code sample or `Story` snippet that reproduces the issue. - - SDK version (tag/commit), Kubernetes version, and controller versions you tested with. - - Logs with `BUBU_DEBUG=true`, including stack traces or transport payloads if relevant. -- Apply the `kind/bug`, appropriate `area/*`, and `priority/*` labels when triaging. +## How Can I Contribute? -## Requesting enhancements +### Reporting Bugs -- Use the [feature request template](https://github.com/bubustack/bubu-sdk-go/issues/new?template=feature_request.md). -- Describe the workflow or scale constraint that motivates the change, and outline the desired API (interfaces, config structs, helper functions). -- If the change spans other repos (operators, Engram templates), open an org-level discussion so we can coordinate releases. +- **Ensure the bug was not already reported** by searching on GitHub under [Issues](https://github.com/bubustack/bubu-sdk-go/issues). +- If you're unable to find an open issue addressing the problem, [open a new one](https://github.com/bubustack/bubu-sdk-go/issues/new). Be sure to include a **title and clear description**, as much relevant information as possible, and a **code sample** or an **executable test case** demonstrating the expected behavior that is not occurring. -## Pull requests +### Suggesting Enhancements -1. Fork the repo, branch from `main`, and keep the PR focused. -2. Follow existing package boundaries (`batch`, `stream`, `impulse`, `runtime`, etc.); avoid introducing new dependencies without discussion. -3. Run the quality gates before requesting review: - ```bash - make lint # golangci-lint (downloaded to ./bin) - make test # fmt + vet + go test ./... -race - make test-integration # optional envtest smoke tests (requires KUBEBUILDER_ASSETS) - make test-coverage # optional coverage profile - make tidy # keep go.mod/go.sum clean - ``` -4. Update docs/examples/README when you add new APIs or change behaviour. Mention whether bobrapet/operator updates are required. -5. Use the PR template to record commands executed, test evidence, and linked issues (e.g., `Fixes #123`). +- Open a new issue to discuss your enhancement. Clearly describe the proposed enhancement and its benefits. +- Provide code snippets, mockups, or diagrams to illustrate your idea. -## Development workflow +### Pull Requests + +- Fork the repository and create your branch from `main`. +- Ensure the test suite passes: `make test` (envtest will be installed automatically). +- Lint: `make lint` (golangci-lint via ./bin). +- Submit the pull request. + +## Development Workflow ### Prerequisites -- Go 1.26+ (matching `go.mod`). -- `make`, bash, and optionally Docker (only required for building sample binaries/images). +- Go 1.24+ +- Docker +- `make` ### Setup -1. Fork the repository and clone your fork. -2. `cd bubu-sdk-go` -3. `make help` to explore targets grouped by category. -4. `make lint-config` verifies the golangci-lint configuration if needed. +1. Fork the repository. +2. Clone your fork: `git clone https://github.com/your_username/bubu-sdk-go.git` +3. Navigate to the repository directory: `cd bubu-sdk-go` +4. Build: `make build` -### Running tests +### Running Tests ```bash -make test # fmt + vet + go test ./... -race -make test-integration # optional envtest-backed suite +# Run all tests +make test ``` -### Commit style & Code of Conduct +### Commit Message Conventions + +We follow the [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) specification. This allows for automated changelog generation and semantic versioning. + +Examples: +- `feat: Add support for custom retry policies` +- `fix: Correctly handle nil inputs in Process` +- `docs: Update README with new quickstart` +- `chore: Upgrade to Go 1.24` + +### Code of Conduct + +Participation in this project is governed by the +[Contributor Covenant Code of Conduct](./CODE_OF_CONDUCT.md). By participating, +you are expected to uphold this code. Please report unacceptable behavior to +conduct@bubustack.com. -- Follow [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) so changelog automation works (e.g., `feat: add structured message helper`, `fix: guard nil transport envelope`). -- Participation in this project is governed by the [Contributor Covenant Code of Conduct](./CODE_OF_CONDUCT.md). Report unacceptable behaviour to [conduct@bubustack.com](mailto:conduct@bubustack.com) or via the org Discussions moderation channel. diff --git a/LICENSE b/LICENSE index 12b1432..d645695 100644 --- a/LICENSE +++ b/LICENSE @@ -187,7 +187,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2025 BubuStack. + Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/Makefile b/Makefile index 7e5b5fe..fcd6d8b 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ all: build .PHONY: help help: ## Display this help. - @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-18s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) + @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) ##@ Development @@ -41,22 +41,14 @@ vet: ## Run go vet against code. go vet ./... .PHONY: test -test: fmt vet ## Run tests with race detector. - go test -race ./... - -.PHONY: test-integration -test-integration: ## Run envtest smoke tests (requires KUBEBUILDER_ASSETS) - go test -tags=integration ./integration/... +test: fmt vet ## Run tests. + go test ./... .PHONY: test-coverage test-coverage: ## Run tests with coverage profile. go test -coverprofile=coverage.out ./... @echo "Coverage profile written to coverage.out" -.PHONY: vulncheck -vulncheck: govulncheck ## Run govulncheck against the module. - $(GOVULNCHECK) ./... - .PHONY: lint lint: golangci-lint ## Run golangci-lint linter $(GOLANGCI_LINT) run @@ -91,30 +83,18 @@ clean: ## Clean build and coverage artifacts ## Location to install dependencies to LOCALBIN ?= $(shell pwd)/bin $(LOCALBIN): - mkdir -p "$(LOCALBIN)" - + mkdir -p $(LOCALBIN) + ## Tool Binaries GOLANGCI_LINT = $(LOCALBIN)/golangci-lint -GOVULNCHECK = $(LOCALBIN)/govulncheck ## Tool Versions -GOLANGCI_LINT_VERSION ?= v2.11.4 -GOVULNCHECK_VERSION ?= v1.1.4 +GOLANGCI_LINT_VERSION ?= v2.4.0 .PHONY: golangci-lint golangci-lint: $(GOLANGCI_LINT) ## Download golangci-lint locally if necessary. $(GOLANGCI_LINT): $(LOCALBIN) $(call go-install-tool,$(GOLANGCI_LINT),github.com/golangci/golangci-lint/v2/cmd/golangci-lint,$(GOLANGCI_LINT_VERSION)) - @test -f .custom-gcl.yml && { \ - echo "Building custom golangci-lint with plugins..." && \ - $(GOLANGCI_LINT) custom --destination $(LOCALBIN) --name golangci-lint-custom && \ - mv -f $(LOCALBIN)/golangci-lint-custom $(GOLANGCI_LINT); \ - } || true - -.PHONY: govulncheck -govulncheck: $(GOVULNCHECK) ## Download govulncheck locally if necessary. -$(GOVULNCHECK): $(LOCALBIN) - $(call go-install-tool,$(GOVULNCHECK),golang.org/x/vuln/cmd/govulncheck,$(GOVULNCHECK_VERSION)) # go-install-tool will 'go install' any package with custom target and name of binary, if it doesn't exist # $1 - target path with name of binary @@ -125,9 +105,9 @@ define go-install-tool set -e; \ package=$(2)@$(3) ;\ echo "Downloading $${package}" ;\ -rm -f "$(1)" ;\ -GOBIN="$(LOCALBIN)" go install $${package} ;\ -mv "$(LOCALBIN)/$$(basename "$(1)")" "$(1)-$(3)" ;\ +rm -f $(1) ;\ +GOBIN=$(LOCALBIN) go install $${package} ;\ +mv $(1) $(1)-$(3) ;\ } ;\ -ln -sf "$$(realpath "$(1)-$(3)")" "$(1)" -endef +ln -sf $$(realpath $(1)-$(3)) $(1) +endef \ No newline at end of file diff --git a/README.md b/README.md index 02ecac5..88dbecb 100644 --- a/README.md +++ b/README.md @@ -1,58 +1,51 @@ -# 🧰 bubu-sdk-go — Official Go SDK for BubuStack +# 🧰 bubu-sdk-go — Official Go SDK for bobrapet [![Go Reference](https://pkg.go.dev/badge/github.com/bubustack/bubu-sdk-go.svg)](https://pkg.go.dev/github.com/bubustack/bubu-sdk-go) [![Go Report Card](https://goreportcard.com/badge/github.com/bubustack/bubu-sdk-go)](https://goreportcard.com/report/github.com/bubustack/bubu-sdk-go) -`bubu-sdk-go` is the public Go SDK for building BubuStack components: +The official Go SDK for building type-safe, production-grade components for **bobrapet**, the Kubernetes‑native AI and data workflow orchestration engine. -- **Engrams** for batch and streaming data processing -- **Impulses** for long-running external event listeners +Quick links: +- SDK docs: https://bubustack.io/docs/sdk +- API Reference: https://pkg.go.dev/github.com/bubustack/bubu-sdk-go -This is the component boundary. Engrams and Impulses depend on this SDK, not on -`bobrapet` controller internals. +## 🌟 Key Features -## Prerequisites +Use this SDK to build **engrams** (data processing tasks) and **impulses** (event listeners that trigger workflows). bobrapet orchestrates their execution as Kubernetes Jobs and Deployments, handling: -- Go 1.26.2 or newer (matching `go.mod`) -- Docker or another OCI-compatible image builder -- Access to a Kubernetes cluster supported by the current `bobrapet` release set +- **Type-safe configuration and inputs** — Define your interfaces as Go structs, get compile-time safety. +- **Automatic large payload handling** — Outputs exceeding size limits are transparently offloaded to S3/file storage. +- **Streaming pipelines** — Build real-time data processing chains with gRPC bidirectional streaming. +- **Retries and observability** — Exit codes inform retry policies; OpenTelemetry metrics/tracing hooks included (initialize an exporter in your app/infra). -## Quick Links +## 🏗️ Architecture -- Go SDK docs: https://bubustack.io/docs/sdk/go-sdk -- Authoring guide: https://bubustack.io/docs/sdk/building-engrams -- Durable semantics: https://bubustack.io/docs/overview/durable-semantics -- Streaming contract: https://bubustack.io/docs/streaming/streaming-contract -- API reference: https://pkg.go.dev/github.com/bubustack/bubu-sdk-go +High-level SDK architecture, execution modes, and operator integration are documented here: +- Overview: https://bubustack.io/docs/sdk -## What the SDK Handles +### 🧭 When to use which mode -- Type-safe config, input, and secret binding -- StepRun status patching and structured errors -- Storage-ref hydration and large-payload offloading -- Streaming transport lifecycle, control directives, and replay-safe acknowledgements -- Trigger submission from Impulses via durable `StoryTrigger` requests -- Cross-process effect deduplication via `EffectClaim` -- Test harnesses and conformance suites for component authors +- Batch (Jobs): finite tasks with clear start/end; use `StartBatch`. Evidence: batch flow in `batch.go`. +- Streaming (Deployments with gRPC): continuous processing with backpressure/heartbeats; use `StartStreaming`. Evidence: `stream.go`. +- Impulse (Deployments): long‑running trigger that creates `StoryRun`s; use `RunImpulse`. Evidence: `impulse.go`. -## Execution Modes +--- -| Entry point | Use case | Kubernetes workload | -| --- | --- | --- | -| `sdk.StartBatch[C, I]` | Finite tasks with clear start/end | Job | -| `sdk.StartStreaming[C]` | Continuous processing with gRPC bidirectional streaming | Deployment | -| `sdk.RunImpulse[C]` | Long-running trigger services that submit durable `StoryTrigger` requests | Deployment | +## 🚀 Quick Start -`sdk.StartStory(...)` remains the helper Impulses use to trigger workflows, but -the latest contract no longer creates `StoryRun` objects directly. The SDK now: +Let's build a simple batch engram that greets users. -1. submits a `StoryTrigger` -2. waits for controller resolution -3. returns the resolved `StoryRun` +### 1. Create your Go module -## Quick Start +```bash +mkdir hello-engram && cd hello-engram +go mod init github.com/yourusername/hello-engram +go get github.com/bubustack/bubu-sdk-go@latest +``` + +### 2. Write the code -Create a minimal batch Engram: +Create a single `main.go` file with all the necessary components. ```go package main @@ -61,206 +54,303 @@ import ( "context" "fmt" "log" - "os" sdk "github.com/bubustack/bubu-sdk-go" "github.com/bubustack/bubu-sdk-go/engram" ) +// Config holds static configuration from the Engram resource 'with' block. type Config struct { - DefaultMessage string `mapstructure:"defaultMessage"` + DefaultGreeting string `mapstructure:"defaultGreeting"` } +// Inputs holds runtime data passed to this execution via StepRun inputs. type Inputs struct { Name string `mapstructure:"name"` } -type HelloEngram struct { - message string +// GreeterEngram implements the engram.Batch interface. +type GreeterEngram struct { + greeting string } -func (e *HelloEngram) Init(ctx context.Context, cfg Config, secrets *engram.Secrets) error { - e.message = cfg.DefaultMessage - if e.message == "" { - e.message = "Hello" +// NewGreeter creates a new GreeterEngram. +func NewGreeter() *GreeterEngram { + return &GreeterEngram{} +} + +// Init is called once when the engram starts. +func (g *GreeterEngram) Init(ctx context.Context, cfg Config, secrets *engram.Secrets) error { + g.greeting = "Hello" + if cfg.DefaultGreeting != "" { + g.greeting = cfg.DefaultGreeting } return nil } -func (e *HelloEngram) Process(ctx context.Context, execCtx *engram.ExecutionContext, inputs Inputs) (*engram.Result, error) { +// Process is the core logic. It receives typed inputs and returns a Result. +func (g *GreeterEngram) Process(ctx context.Context, ec *engram.ExecutionContext, inputs Inputs) (*engram.Result, error) { if inputs.Name == "" { - return nil, fmt.Errorf("name is required") + return nil, fmt.Errorf("input 'name' is required") } + message := fmt.Sprintf("%s, %s!", g.greeting, inputs.Name) return engram.NewResultFrom(map[string]any{ - "message": fmt.Sprintf("%s, %s!", e.message, inputs.Name), - }), nil + "greeting": message, + }) } func main() { - if err := sdk.StartBatch(context.Background(), &HelloEngram{}); err != nil { - log.Printf("engram failed: %v", err) - os.Exit(sdk.BatchExitCode(err)) + if err := sdk.StartBatch(context.Background(), NewGreeter()); err != nil { + log.Fatalf("Engram failed: %v", err) } } ``` -Build it with: +### 3. Build the binary + +```bash +go build -o hello-engram . +``` + +### 4. Containerize and deploy + +Create a `Dockerfile`: + +```dockerfile +# Use a smaller base image +FROM golang:1.24-alpine AS builder +WORKDIR /app +COPY go.mod go.sum ./ +RUN go mod download +COPY . . +# Build a static binary +RUN CGO_ENABLED=0 go build -o /hello-engram . + +# Final stage +FROM alpine:latest +# Add non-root user +RUN addgroup -S app && adduser -S app -G app +USER app +# Copy binary and certificates +COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ +COPY --from=builder /hello-engram /hello-engram +ENTRYPOINT ["/hello-engram"] +``` + +Build and push the image to your container registry: + +```bash +docker build -t myregistry.io/hello-engram:latest . +docker push myregistry.io/hello-engram:latest +``` + +### 5. Deploy to Kubernetes + +Create an `Engram` resource (`hello-engram.yaml`): + +```yaml +apiVersion: bubustack.io/v1alpha1 +kind: Engram +metadata: + name: hello-engram +spec: + image: myregistry.io/hello-engram:latest + with: + defaultGreeting: "Greetings" +``` + +Create a `Story` that uses the engram (`greet-story.yaml`): + +```yaml +apiVersion: bubustack.io/v1alpha1 +kind: Story +metadata: + name: greet-users +spec: + steps: + - name: greet + engram: hello-engram + with: + name: "{{ .inputs.userName }}" +``` + +Apply the resources and trigger a `StoryRun`: ```bash -go build ./... +kubectl apply -f hello-engram.yaml -f greet-story.yaml + +kubectl create -f - < BUBU_MAX_INLINE_SIZE + }, +}, nil +``` -## Secrets +### Streaming Pipelines -`engram.Secrets` is intentionally narrow. Prefer scoped accessors instead of -dumping the full secret map. +Build real-time data processing chains with metadata propagation for tracing: ```go -apiKey, ok := secrets.Get("apiKey") -all := secrets.GetAll() // returns a copy -names := secrets.Names() // sorted key names -subset := secrets.Select("apiKey") // bounded plaintext selection +func (s *Streamer) Stream(ctx context.Context, in <-chan engram.StreamMessage, out chan<- engram.StreamMessage) error { + for msg := range in { + // Process msg.Payload, propagate msg.Metadata for tracing + out <- engram.StreamMessage{ + Metadata: msg.Metadata, // Preserve trace IDs + Payload: processedData, + } + } + return nil +} ``` -Useful methods: +### Retries and Exit Codes -- `Get(key)` -- `GetAll()` -- `Names()` -- `Select(keys...)` +The SDK patches StepRun status with exit codes that inform the operator's retry policy: -## Streaming Notes +- **0**: Success +- **1**: Logic error (terminal, no retry) +- **124**: Timeout (retryable) -- Streaming Engrams receive `engram.InboundMessage`, not raw `StreamMessage`. -- Call `msg.Done()` after successful processing or intentional drop. -- Structured JSON streaming outputs should keep the canonical JSON in - `Payload` and mirror the same bytes into `Binary` with - `MimeType: application/json`. -- Use raw `Binary` without `Payload` only for opaque media or non-JSON blobs. -- Startup now requires connector readiness metadata before the SDK starts the - Engram stream loop. -- Startup capability negotiation uses strict latest-only - `startup.capabilities=required|none` metadata. +--- -See the full contract: https://bubustack.io/docs/streaming/streaming-contract +## 📚 Documentation -## Signals and Effects +- SDK: https://bubustack.io/docs/sdk +- Guides: https://bubustack.io/docs +- Reference: https://bubustack.io/docs/reference +- How-to: https://bubustack.io/docs/howto +- Troubleshooting: https://bubustack.io/docs/troubleshooting -- `sdk.EmitSignal(...)` records bounded progress/state data on the current StepRun. -- `sdk.RecordEffect(...)` appends an effect record to `StepRun.status.effects`. -- `sdk.ExecuteEffectOnce(...)` uses `EffectClaim` for cross-process reservation, - renewal, recovery, and completion. +--- -See: +## ✅ Support matrix -- https://bubustack.io/docs/overview/durable-semantics -- https://bubustack.io/docs/api/effect-claims +| Component | Version | +|-----------|---------| +| **Go** | 1.24+ | +| **Kubernetes** | 1.28+ (bobrapet operator compatibility) | +| **bobrapet operator** | v0.1.0+ | -## Testing +--- -The SDK ships with: +## ⚙️ Environment variables -- `testkit.BatchHarness` -- `testkit.StreamHarness` -- `conformance.BatchSuite` -- `conformance.StreamSuite` +The SDK is controlled entirely by environment variables injected by the bobrapet operator. See [docs/reference/config](https://bubustack.io/docs/reference/config) for the complete reference. -Run the standard quality gates with: +Key variables: -```bash -make test -make lint -``` +- `BUBU_STEP_TIMEOUT` — Batch execution timeout (default: 30m) +- `BUBU_STORAGE_PROVIDER` — Storage backend: `s3`, `file`, or unset (disabled) +- `BUBU_MAX_INLINE_SIZE` — Offload threshold in bytes (default: 1024) +- `BUBU_GRPC_PORT` — gRPC server port for streaming engrams (default: 50051) +- `BUBU_EXECUTION_MODE` — Set by operator: `batch` | `streaming` (evidence in controllers) +- `BUBU_STORAGE_PATH` — Required when `BUBU_STORAGE_PROVIDER=file`; base directory for file store +- `BUBU_MAX_RECURSION_DEPTH` — Max traversal depth for hydrate/dehydrate (default: 10) +- `BUBU_STORAGE_TIMEOUT` — Timeout for storage operations (default: 5m) -## Environment Variables - -The operator injects runtime configuration for components. Do not hard-code the -env var set in downstream components; use the SDK and `core/contracts` as the -source of truth. - -Common groups: - -- **Execution context** - - `BUBU_STORY_NAME` - - `BUBU_STORYRUN_ID` - - `BUBU_STEP_NAME` - - `BUBU_STEPRUN_NAME` - - `BUBU_STEP_TIMEOUT` - - `BUBU_MAX_INLINE_SIZE` -- **Config and templating** - - `BUBU_TRIGGER_DATA` - - `BUBU_STEP_CONFIG` - - `BUBU_TEMPLATE_CONTEXT` -- **Transport** - - `BUBU_TRANSPORT_BINDING` - - `BUBU_GRPC_PORT` - - `BUBU_GRPC_CHANNEL_BUFFER_SIZE` - - `BUBU_GRPC_CHANNEL_SEND_TIMEOUT` - - `BUBU_GRPC_MESSAGE_TIMEOUT` - - `BUBU_GRPC_HANG_TIMEOUT` - - `BUBU_GRPC_GRACEFUL_SHUTDOWN_TIMEOUT` - - `BUBU_GRPC_RECONNECT_MAX_RETRIES` -- **SDK observability** - - `BUBU_SDK_METRICS_ENABLED` - - `BUBU_SDK_TRACING_ENABLED` -- **Kubernetes client** - - `BUBU_K8S_USER_AGENT` - - `BUBU_K8S_TIMEOUT` - - `BUBU_K8S_OPERATION_TIMEOUT` - - `BUBU_K8S_PATCH_MAX_RETRIES` -- **Effects and signals** - - `BUBU_EFFECT_MAX_DETAILS_BYTES` - - `BUBU_SIGNAL_MAX_PAYLOAD_BYTES` - -Use the website docs for the curated reference: - -- https://bubustack.io/docs/sdk/go-sdk -- https://bubustack.io/docs/streaming/transport-settings - -## Local Development +--- -```bash -git clone https://github.com/bubustack/bubu-sdk-go.git -cd bubu-sdk-go -make test -make lint -``` +## 🛠️ Local Development + +1. **Clone the repository:** + ```bash + git clone https://github.com/bubustack/bubu-sdk-go.git + cd bubu-sdk-go + ``` + +2. **Run tests:** + ```bash + make test + ``` + +3. **Lint:** + ```bash + make lint + ``` -## Support, Security, and Changelog +4. **Run all checks:** + ```bash + make all + ``` -- Support: [SUPPORT.md](./SUPPORT.md) -- Security: [SECURITY.md](./SECURITY.md) -- Changelog: [CHANGELOG.md](./CHANGELOG.md) +--- -## License +## 📢 Support, Security, and Changelog + +- See `SUPPORT.md` for how to get help and report issues. +- See `SECURITY.md` for vulnerability reporting and security posture. +- See `CHANGELOG.md` for version history. + +## 🤝 Community + +- Code of Conduct: see [CODE_OF_CONDUCT.md](./CODE_OF_CONDUCT.md) (Contributor Covenant v3.0) + +## 📄 License Copyright 2025 BubuStack. -Licensed under the Apache License, Version 2.0. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. \ No newline at end of file diff --git a/SECURITY.md b/SECURITY.md index f2a392e..5adfb3a 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -2,22 +2,9 @@ ## Supported versions -We currently support only the latest tagged pre-1.0 release line of -`bubu-sdk-go`. +We provide security updates for the latest released minor of the sdk. Please ensure you are using a supported version to receive security patches. We generally support the latest minor and the immediately previous minor. -| Version line | Status | Notes | -| --- | --- | --- | -| Latest tagged `0.x` release | Supported | Security fixes land here first while the SDK remains pre-1.0. | -| Older tagged releases | Unsupported | Upgrade to the newest tagged release before requesting a security fix. | -| Unreleased commits on `main` or feature branches | Unsupported | Security fixes are released through tagged versions, not guaranteed on arbitrary commits. | - -We aim to support Kubernetes N-2 relative to upstream stable releases, but this -repository does not yet publish a separate compatibility matrix. Treat the -latest release notes and currently tested CI workflows as the source of truth -for supported environments. - -We do not currently publish a PGP key for vulnerability intake. Use the GitHub -Security Advisory flow below so reports stay private. +Supported Kubernetes versions: we aim to support N-2 of upstream stable releases. For example, when Kubernetes 1.31 is current, we target 1.31, 1.30, 1.29. ## Reporting a vulnerability @@ -33,7 +20,7 @@ When reporting a vulnerability, please provide the following information: - **A clear description** of the vulnerability and its potential impact. - **Steps to reproduce** the vulnerability, including any example code, scripts, or configurations. -- **The version(s) of the SDK** affected. +- **The version(s) of the operator** affected. - **Your contact information** for us to follow up with you. ## Disclosure process @@ -44,6 +31,6 @@ When reporting a vulnerability, please provide the following information: 4. **Fix**: We will develop a patch for the vulnerability. 5. **Disclosure**: We will create a security advisory, issue a CVE (if applicable), and release a new version with the patch. We will credit you for your discovery unless you prefer to remain anonymous. -We aim to resolve high severity vulnerabilities within 30 days, medium within -60 days, and low within 90 days, subject to complexity and scope. We'll keep -you informed of progress. +We aim to resolve high severity vulnerabilities within 30 days, medium within 60 days, and low within 90 days, subject to complexity and scope. We'll keep you informed of progress. + +We aim to resolve all vulnerabilities as quickly as possible. The timeline for a fix and disclosure will vary depending on the complexity and severity of the vulnerability. We will keep you informed of our progress throughout the process. diff --git a/SUPPORT.md b/SUPPORT.md index 28004ef..2f3cb42 100644 --- a/SUPPORT.md +++ b/SUPPORT.md @@ -5,7 +5,7 @@ Thank you for using BubuStack. Here's how you can get help. ## Self-Service - **Documentation**: Our documentation is the best place to start. Find guides, how‑to articles, and references in the `website` repository: https://bubustack.io/docs -- **Roadmap**: https://bubustack.io/docs/community/roadmap +- **Troubleshooting**: For common issues, please see our [`troubleshooting`](https://bubustack.io/docs/troubleshooting) guide. - **Examples**: We provide a collection of example engrams and impulses in the [`BubuStack GitHub organization`](https://github.com/bubustack). ## Community support @@ -13,19 +13,22 @@ Thank you for using BubuStack. Here's how you can get help. For questions, discussions, and community support, please use the following channels: - **GitHub Issues**: For bug reports and feature requests, please open an issue: https://github.com/bubustack/bubu-sdk-go/issues -- **GitHub Discussions**: For general questions and sharing your projects, please use Discussions: https://github.com/orgs/bubustack/discussions -- **Discord**: https://discord.gg/dysrB7D8H6 +- **GitHub Discussions**: For general questions and sharing your projects, please use Discussions: https://github.com/bubustack/bubu-sdk-go/discussions -### Triage and response expectations +### Triage and response SLAs (best effort) -- We review issues and discussions on a best-effort basis. -- We do not currently provide guaranteed response times or commercial SLAs. -- Security reports follow the process in SECURITY.md. +- We triage new GitHub issues Mon–Fri during business hours. +- Initial response target: within 2 business days. +- Security reports follow the timelines in SECURITY.md. ### Supported versions -- We support only the latest tagged release line of the SDK while it remains pre-1.0. -- Kubernetes compatibility target (via the bobrapet operator): N-2 upstream stable releases. +- We generally support the latest minor release and the previous minor release of the operator. +- Kubernetes compatibility target: N-2 upstream stable releases. + +## Commercial support + +For commercial support, including enterprise features, dedicated support channels, and SLAs, contact [sales@bubustack.com](mailto:sales@bubustack.com). ## Reporting security vulnerabilities @@ -33,4 +36,5 @@ To report a security vulnerability, please follow the instructions in our [Secur ### Related documentation +- Troubleshooting: https://bubustack.io/docs/troubleshooting - Known issues: https://github.com/bubustack/bubu-sdk-go/issues?q=is%3Aissue+is%3Aopen+label%3Abug diff --git a/batch.go b/batch.go index d67f870..d1e7f4a 100644 --- a/batch.go +++ b/batch.go @@ -1,111 +1,50 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - package sdk import ( "context" "encoding/json" - "errors" "fmt" "log/slog" "os" - "regexp" "strings" "time" - "unicode/utf8" runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - "github.com/bubustack/bobrapet/pkg/conditions" "github.com/bubustack/bobrapet/pkg/enums" - "github.com/bubustack/bobrapet/pkg/storage" "github.com/bubustack/bubu-sdk-go/engram" "github.com/bubustack/bubu-sdk-go/k8s" - "github.com/bubustack/bubu-sdk-go/pkg/env" - "github.com/bubustack/bubu-sdk-go/pkg/observability" "github.com/bubustack/bubu-sdk-go/runtime" - "github.com/bubustack/core/contracts" + "github.com/bubustack/bubu-sdk-go/storage" + "go.opentelemetry.io/otel" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" k8sruntime "k8s.io/apimachinery/pkg/runtime" ) -const ( - // ReasonTimeout indicates the step failed because it exceeded its deadline. - ReasonTimeout = "Timeout" -) - -// exitProcess allows tests to override how the SDK terminates on fatal errors. -var exitProcess = os.Exit - // getStepTimeout returns the timeout for batch step execution from env or default func getStepTimeout() time.Duration { - return env.GetDuration(contracts.StepTimeoutEnv, 30*time.Minute) + if v := os.Getenv("BUBU_STEP_TIMEOUT"); v != "" { + if d, err := time.ParseDuration(v); err == nil && d > 0 { + return d + } + } + return 30 * time.Minute // Default: 30 minutes } // RunBatch is the primary entry point for a BatchEngram. It provides a fully // type-safe execution environment, handling all the boilerplate of context loading, // data hydration, and status patching. func RunBatch[C any, I any](ctx context.Context, e engram.BatchEngram[C, I]) error { - ctx, _ = withDefaultLogger(ctx) - defer publishCapturedLogs(ctx) - logger := LoggerFromContext(ctx) - - execCtxData, err := runtime.LoadExecutionContextData() + sm, err := storage.NewManager(ctx) if err != nil { - logger.Error("Failed to load execution context", "error", err) - return fmt.Errorf("failed to load execution context: %w", err) + return fmt.Errorf("failed to create storage manager: %w", err) } k8sClient, err := k8s.NewClient() if err != nil { - logger.Error("Failed to create Kubernetes client", "error", err) - return patchBootstrapFailure(ctx, execCtxData, nil, fmt.Errorf("failed to create k8s client: %w", err)) + return fmt.Errorf("failed to create k8s client: %w", err) } - sm, err := storage.SharedManager(ctx) - if err != nil { - logger.Error("Failed to create storage manager", "error", err) - return patchBootstrapFailure(ctx, execCtxData, k8sClient, fmt.Errorf("failed to create storage manager: %w", err)) - } - - return runWithClientsWithContext[C, I](ctx, e, k8sClient, sm, execCtxData) -} - -func patchBootstrapFailure( - ctx context.Context, - execCtxData *runtime.ExecutionContextData, - k8sClient K8sClient, - cause error, -) error { - client := k8sClient - if client == nil { - var err error - client, err = k8s.SharedClient() - if err != nil { - return fmt.Errorf("%w (status patch failed: %v)", cause, err) - } - } - exitCode := BatchExitCode(cause) - if exitCode == 0 { - exitCode = 1 - } - if patchErr := patchFailureStatus(ctx, client, execCtxData, cause, exitCode); patchErr != nil { - return fmt.Errorf("%w (status patch also failed: %v)", cause, patchErr) - } - return cause + return runWithClients[C, I](ctx, e, k8sClient, sm) } // runWithClients contains the core logic for a batch execution, using injected clients. @@ -119,24 +58,10 @@ func runWithClients[C any, I any]( if err != nil { return fmt.Errorf("failed to load execution context: %w", err) } - return runWithClientsWithContext(ctx, e, k8sClient, sm, execCtxData) -} - -func runWithClientsWithContext[C any, I any]( - ctx context.Context, - e engram.BatchEngram[C, I], - k8sClient K8sClient, - sm StorageManager, - execCtxData *runtime.ExecutionContextData, -) error { - if execCtxData == nil { - return fmt.Errorf("execution context data cannot be nil") - } logger := LoggerFromContext(ctx) - logExecutionContextDebug(logger, execCtxData) - tracer := observability.Tracer("bubu-sdk") - execCtx := engram.NewExecutionContextWithCELContext(logger, tracer, execCtxData.StoryInfo, execCtxData.CELContext) + tracer := otel.Tracer("bubu-sdk") + execCtx := engram.NewExecutionContext(logger, tracer, execCtxData.StoryInfo) // Enforce timeout on batch execution to prevent runaway engrams // This ensures engrams receive context cancellation before Job-level SIGKILL @@ -149,11 +74,8 @@ func runWithClientsWithContext[C any, I any]( "stepRunID", execCtxData.StoryInfo.StepRunID) // Initialize the engram. - if err := initializeEngram[C, I](ctxWithTimeout, e, sm, execCtxData); err != nil { - logger.Error("Engram initialization failed", "error", err) - if patchErr := patchFailureStatus(ctx, k8sClient, execCtxData, err, 1); patchErr != nil { - return fmt.Errorf("%w (status patch also failed: %v)", err, patchErr) - } + if err := initializeEngram[C, I](ctxWithTimeout, e, execCtxData); err != nil { + // We can't patch status if init fails, so just return the error. return err } @@ -162,42 +84,27 @@ func runWithClientsWithContext[C any, I any]( if err != nil { return err } - logTypedInputs(logger, inputs) - - var terminateOverride *statusOverride // Process the inputs. - result, processErr := callWithPanicRecovery[*engram.Result]("engram Process", func() (*engram.Result, error) { - return e.Process(ctxWithTimeout, execCtx, inputs) - }) - logProcessResult(logger, result, processErr) + result, processErr := e.Process(ctxWithTimeout, execCtx, inputs) // Check if timeout was hit - timedOut := ctxWithTimeout.Err() == context.DeadlineExceeded - var timeoutErr *BatchTimeoutError + timedOut := (processErr != nil && ctxWithTimeout.Err() == context.DeadlineExceeded) if timedOut { - originalErr := processErr logger.Error("Batch execution timed out", "timeout", stepTimeout, - "stepRunID", execCtxData.StoryInfo.StepRunID, - "error", originalErr, - ) - timeoutErr = &BatchTimeoutError{ - Timeout: stepTimeout, - Cause: originalErr, - } - processErr = timeoutErr + "stepRunID", execCtxData.StoryInfo.StepRunID) + processErr = fmt.Errorf("execution timed out after %v: %w", stepTimeout, processErr) } // Handle the result and patch the final status. // Use original context (not timed-out) for status patch to ensure it completes // even if engram execution exceeded timeout. patchStart := time.Now() - succeeded, outputBytes, finalResultErr, patchErr := handleResultAndPatchStatus( + succeeded, outputBytes, patchErr := handleResultAndPatchStatus( ctx, sm, k8sClient, execCtxData, - result, processErr, timedOut, terminateOverride, + result, processErr, timedOut, ) - processErr = finalResultErr patchDuration := time.Since(patchStart) // Critical: If timeout occurred, enforce process termination after status patch completes @@ -206,38 +113,33 @@ func runWithClientsWithContext[C any, I any]( // Exit code 124 follows GNU timeout convention and allows retry policies to // differentiate timeouts from other failure modes. if timedOut { - if timeoutErr == nil { - timeoutErr = &BatchTimeoutError{Timeout: stepTimeout, Cause: processErr} - } - - var exitErr error = timeoutErr - exitCode := BatchExitCode(exitErr) - if patchErr == nil { - logger.Info("Timeout exceeded and status patched; exiting", - "timeout", stepTimeout, - "stepRunID", execCtxData.StoryInfo.StepRunID, - "patchDuration", patchDuration, - "exitCode", exitCode, - ) + // Patch succeeded; wait for remainder of grace period to allow log flush and propagation + // Grace period: max(5s, 10% of remaining time after patch) + minGrace := 5 * time.Second + remainingGrace := minGrace - patchDuration + if remainingGrace > 0 { + logger.Info("Timeout exceeded and status patched; sleeping before exit", + "timeout", stepTimeout, + "stepRunID", execCtxData.StoryInfo.StepRunID, + "patchDuration", patchDuration, + "remainingGrace", remainingGrace, + "exitCode", 124) + time.Sleep(remainingGrace) + } } else { - exitErr = errors.Join(timeoutErr, patchErr) - exitCode = BatchExitCode(exitErr) - logger.Error("Timeout exceeded and status patch failed; exiting", + // Patch failed; log error but still exit to prevent zombie + logger.Error("Timeout exceeded and status patch failed; forcing exit", "timeout", stepTimeout, "stepRunID", execCtxData.StoryInfo.StepRunID, "patchErr", patchErr, - "exitCode", exitCode, - ) + "exitCode", 124) } - - publishCapturedLogs(ctx) - exitProcess(exitCode) - return exitErr + os.Exit(124) // Exit code 124: timeout (GNU timeout convention) } if patchErr != nil { - return processErr + return patchErr } // Hybrid bridge: on successful batch execution, optionally forward output to Hub @@ -246,7 +148,7 @@ func runWithClientsWithContext[C any, I any]( bTimeout := getBridgeTimeout() bCtx, cancel := context.WithTimeout(ctx, bTimeout) defer cancel() - if err := bridgeToHub(bCtx, outputBytes, execCtxData); err != nil { + if err := bridgeToHub(bCtx, outputBytes); err != nil { logger.Warn("Hybrid bridge (batch→stream) send failed; continuing without downstream delivery", "error", err) } else { logger.Info("Hybrid bridge (batch→stream) delivered output to Hub") @@ -262,24 +164,14 @@ func initializeEngram[ ]( ctx context.Context, e engram.BatchEngram[C, I], - sm StorageManager, execCtxData *runtime.ExecutionContextData, ) error { - configMap, err := hydrateConfig(ctx, sm, execCtxData.Config, execCtxData.CELContext) - if err != nil { - return fmt.Errorf("failed to hydrate config: %w", err) - } - config, err := runtime.UnmarshalFromMap[C](configMap) + config, err := runtime.UnmarshalFromMap[C](execCtxData.Config) if err != nil { return fmt.Errorf("failed to unmarshal config: %w", err) } - secrets, err := engram.NewSecretsWithError(ctx, execCtxData.Secrets) - if err != nil { - return fmt.Errorf("failed to expand secrets: %w", err) - } - if err := callWithPanicRecoveryNoValue("engram Init", func() error { - return e.Init(ctx, config, secrets) - }); err != nil { + secrets := engram.NewSecrets(execCtxData.Secrets) + if err := e.Init(ctx, config, secrets); err != nil { return fmt.Errorf("engram initialization failed: %w", err) } return nil @@ -304,7 +196,7 @@ func dehydrateWithRetry( backoff := 1 * time.Second var lastErr error - for attempt := range maxRetries { + for attempt := 0; attempt < maxRetries; attempt++ { if attempt > 0 { logger.Warn("Retrying dehydration after failure", "attempt", attempt, @@ -349,514 +241,119 @@ func dehydrateWithRetry( ) } -type statusOverride struct { - phase enums.Phase - exitCode int32 - exitClass enums.ExitClass - lastFailureMsg string - failureErr error -} - // handleResultAndPatchStatus determines the final phase from the Engram's result, // and patches the StepRun status accordingly. It returns a wrapped error if // processing or patching fails. Phase determination happens after dehydration // to ensure storage failures are reflected in both phase and container exit code. func handleResultAndPatchStatus( - ctx context.Context, - sm StorageManager, - k8sClient K8sClient, - execCtxData *runtime.ExecutionContextData, - result *engram.Result, - processErr error, - timedOut bool, - override *statusOverride, -) (bool, []byte, error, error) { + ctx context.Context, sm StorageManager, k8sClient K8sClient, execCtxData *runtime.ExecutionContextData, + result *engram.Result, processErr error, timedOut bool, +) (bool, []byte, error) { logger := LoggerFromContext(ctx) - outputBytes, dehydrationErr := attemptResultDehydration(ctx, sm, execCtxData, result, logger) - - // Apply the nil-output fallback early so schema validation sees the final value. - if processErr == nil && dehydrationErr == nil && len(outputBytes) == 0 { - outputBytes = []byte("{}") - } - - if processErr == nil && dehydrationErr == nil { - if err := validateBatchOutputs(ctx, k8sClient, execCtxData, outputBytes); err != nil { - processErr = err - } - } - - phase, finalErr := initialPhaseAndError(processErr, override) - phase, finalErr = applyDehydrationOutcome(phase, finalErr, dehydrationErr) - - if timedOut { - phase = enums.PhaseTimeout - } - - status := newStepRunStatus(execCtxData, phase, timedOut, finalErr) - applyStatusOverride(&status, override, processErr, timedOut, phase, finalErr) - // Always set Output when phase is Succeeded so downstream steps can reference .steps["x"].output. - appendOutput(&status, outputBytes) - - outputLen := 0 - if outputBytes != nil { - outputLen = len(outputBytes) - } - logger.Info("Patching StepRun status", - "phase", phase, - "stepRunID", execCtxData.StoryInfo.StepRunID, - "outputBytes", outputLen, - ) - if isDebugEnabled() { - logger.Debug("StepRun status patch detail", - slog.String("phase", string(phase)), - slog.Bool("timedOut", timedOut), - debugBytesAttr("outputPreview", outputBytes), - ) - } - - patchErr := k8sClient.PatchStepRunStatus(ctx, execCtxData.StoryInfo.StepRunID, status) - if patchErr != nil { - logger.Error("status patch failed after processing", "patchErr", patchErr) - return false, nil, combinePatchError(finalErr, patchErr), patchErr - } - - if isDebugEnabled() { - logger.Debug("StepRun status patch applied", - slog.String("stepRun", execCtxData.StoryInfo.StepRunID), - slog.String("phase", string(phase)), - ) - } - - return phase == enums.PhaseSucceeded, outputBytes, finalErr, nil -} - -func initialPhaseAndError(processErr error, override *statusOverride) (enums.Phase, error) { + // Determine initial phase based on processing result phase := enums.PhaseSucceeded finalErr := processErr - if processErr != nil { phase = enums.PhaseFailed } - if override != nil && processErr == nil { - phase = override.phase - if override.failureErr != nil { - finalErr = override.failureErr - } - } - - return phase, finalErr -} - -func attemptResultDehydration( - ctx context.Context, - sm StorageManager, - execCtxData *runtime.ExecutionContextData, - result *engram.Result, - logger *slog.Logger, -) ([]byte, error) { - stepStorageKey := storage.NamespacedKey(execCtxData.StoryInfo.StepRunNamespace, execCtxData.StoryInfo.StepRunID) - if schemaID, schemaVersion := outputSchemaMetadata(execCtxData); schemaID != "" || schemaVersion != "" { - ctx = storage.WithStorageSchema(ctx, schemaID, schemaVersion) - } - - var data any + // Attempt dehydration with fallback + var dataToDehydrate any if result != nil { - data = result.Data - } - if data == nil { - logger.Warn("Engram result or result.Data is nil; StepRun status will have no output", - "stepRunID", execCtxData.StoryInfo.StepRunID, - "resultNil", result == nil, - "dataNil", data == nil, - ) - } - - return dehydrateWithRetry(ctx, sm, data, stepStorageKey, logger) -} - -func outputSchemaMetadata(execCtxData *runtime.ExecutionContextData) (string, string) { - if execCtxData == nil { - return "", "" - } - namespace := strings.TrimSpace(execCtxData.StoryInfo.StepRunNamespace) - storyName := strings.TrimSpace(execCtxData.StoryInfo.StoryName) - stepName := strings.TrimSpace(execCtxData.StoryInfo.StepName) - engramName := strings.TrimSpace(os.Getenv(contracts.EngramNameEnv)) - - var schema string - switch { - case namespace != "" && engramName != "": - schema = fmt.Sprintf("bubu://engram/%s/%s/output", namespace, engramName) - case namespace != "" && storyName != "" && stepName != "": - schema = fmt.Sprintf("bubu://story/%s/%s/steps/%s/output", namespace, storyName, stepName) - case storyName != "" && stepName != "": - schema = fmt.Sprintf("bubu://story/%s/steps/%s/output", storyName, stepName) - } - - schemaVersion := strings.TrimSpace(os.Getenv(contracts.EngramVersionEnv)) - if schemaVersion == "" { - schemaVersion = strings.TrimSpace(os.Getenv(contracts.StoryVersionEnv)) + dataToDehydrate = result.Data } - return schema, schemaVersion -} + outputBytes, dehydrationErr := dehydrateWithRetry( + ctx, sm, dataToDehydrate, execCtxData.StoryInfo.StepRunID, logger, + ) -func applyDehydrationOutcome(phase enums.Phase, finalErr error, dehydrationErr error) (enums.Phase, error) { - if dehydrationErr == nil { - return phase, finalErr + // Handle dehydration failures - always fail the step if storage is unavailable + if dehydrationErr != nil { + phase = enums.PhaseFailed + finalErr = combineErrors(finalErr, dehydrationErr) } - return enums.PhaseFailed, combineErrors(finalErr, dehydrationErr) -} -func newStepRunStatus( - execCtxData *runtime.ExecutionContextData, - phase enums.Phase, - timedOut bool, - finalErr error, -) runsv1alpha1.StepRunStatus { + // Build status patch finishedAt := metav1.Now() - status := runsv1alpha1.StepRunStatus{ + newStatus := runsv1alpha1.StepRunStatus{ Phase: phase, FinishedAt: &finishedAt, Duration: finishedAt.Sub(execCtxData.StartedAt.Time).String(), } - applyReadyCondition(&status, phase, timedOut, finalErr) - applyExitMetadata(&status, phase, timedOut, finalErr) - appendStructuredError(&status, phase, timedOut, finalErr) - - return status -} - -// appendStructuredError populates status.error with machine-readable error details -// when the step reaches a failure phase. The structured format uses a stable schema -// so downstream consumers (dashboards, alerting, CLI) can parse errors programmatically. -func appendStructuredError(status *runsv1alpha1.StepRunStatus, phase enums.Phase, timedOut bool, finalErr error) { - if phase == enums.PhaseSucceeded || finalErr == nil { - return - } - - errType := classifyError(finalErr) - - errObj := runsv1alpha1.StructuredError{ - Version: runsv1alpha1.StructuredErrorVersionV1, - Type: errType, - Message: sanitizePersistedErrorMessage(finalErr.Error(), maxErrorMessageBytes), - } - - if provided, ok := structuredErrorFrom(finalErr); ok { - errObj = mergeStructuredError(errObj, provided) - } - if timedOut { - errObj.Type = runsv1alpha1.StructuredErrorTypeTimeout - } - errObj.Message = sanitizePersistedErrorMessage(errObj.Message, maxErrorMessageBytes) - if status.ExitCode != 0 { - exitCode := status.ExitCode - errObj.ExitCode = &exitCode - } - if status.ExitClass != "" { - errObj.ExitClass = runsv1alpha1.StructuredErrorExitClass(status.ExitClass) - if errObj.Retryable == nil { - retryable := isRetryableExitClass(status.ExitClass) - errObj.Retryable = &retryable - } - } - status.Error = &errObj -} - -// maxErrorMessageBytes is the maximum byte length of error messages stored in -// StepRun status fields (LastFailureMsg, StructuredError.Message, condition -// Message). K8s etcd rejects objects larger than 1.5MB; capping messages at -// 8 KiB prevents bloated status objects and avoids etcd write failures when -// an error message contains a very large payload (e.g. deserialization errors -// that echo back the full input). -const maxErrorMessageBytes = 8192 - -var persistedErrorRedactionPatterns = []struct { - re *regexp.Regexp - replacement string -}{ - { - re: regexp.MustCompile(`(?i)(authorization\s*:\s*(?:basic|bearer)\s+)([^\s,;]+)`), - replacement: `${1}[REDACTED]`, - }, - { - re: regexp.MustCompile( - `(?i)(["']?(?:api[_-]?key|x-api-key|access[_-]?token|refresh[_-]?token|token|password|secret|client[_-]?secret|authorization)["']?\s*[:=]\s*["']?)([^"'\\\s,;}{\]&]+)(["']?)`, //nolint:lll - ), - replacement: `${1}[REDACTED]${3}`, - }, -} - -// truncateErrorMessage caps msg at limit bytes while preserving valid UTF-8. -// The limit must be > 0; if it is not positive, msg is returned unchanged. -func truncateErrorMessage(msg string, limit int) string { - if limit <= 0 || len(msg) <= limit { - return msg - } - // Walk backwards from the limit to find a valid UTF-8 boundary. - truncated := msg[:limit] - for len(truncated) > 0 && !utf8.ValidString(truncated) { - truncated = truncated[:len(truncated)-1] - } - return truncated -} - -func sanitizePersistedErrorMessage(msg string, limit int) string { //nolint:unparam - sanitized := msg - for _, pattern := range persistedErrorRedactionPatterns { - sanitized = pattern.re.ReplaceAllString(sanitized, pattern.replacement) - } - return truncateErrorMessage(sanitized, limit) -} - -// classifyError determines a machine-readable error type from the error chain. -// IMPORTANT: The cases are order-dependent — more specific patterns must appear -// before broader ones (e.g. "storage" before the default "execution" fallback). -// The StructuredErrorProvider interface allows engrams to bypass string matching -// entirely; prefer implementing that interface over relying on this function. -func classifyError(err error) runsv1alpha1.StructuredErrorType { - if err == nil { - return runsv1alpha1.StructuredErrorTypeUnknown - } - msg := err.Error() - switch { - case strings.Contains(msg, "dehydrat") || strings.Contains(msg, "storage"): - return runsv1alpha1.StructuredErrorTypeStorage - case strings.Contains(msg, "unmarshal") || strings.Contains(msg, "marshal"): - return runsv1alpha1.StructuredErrorTypeSerialization - case strings.Contains(msg, "schema") || strings.Contains(msg, "validation"): - return runsv1alpha1.StructuredErrorTypeValidation - case strings.Contains(msg, "initialization") || strings.Contains(msg, "Init"): - return runsv1alpha1.StructuredErrorTypeInitialization - default: - return runsv1alpha1.StructuredErrorTypeExecution - } -} - -func isRetryableExitClass(exitClass enums.ExitClass) bool { - switch exitClass { - case enums.ExitClassRetry, enums.ExitClassRateLimited: - return true - default: - return false - } -} - -func structuredErrorFrom(err error) (runsv1alpha1.StructuredError, bool) { - if err == nil { - return runsv1alpha1.StructuredError{}, false - } - var provider StructuredErrorProvider - if errors.As(err, &provider) { - return provider.StructuredError(), true - } - return runsv1alpha1.StructuredError{}, false -} - -func mergeStructuredError(base, override runsv1alpha1.StructuredError) runsv1alpha1.StructuredError { - if override.Version != "" { - base.Version = override.Version - } - if override.Type != "" { - base.Type = override.Type - } - if override.Message != "" { - base.Message = override.Message - } - if override.Retryable != nil { - base.Retryable = override.Retryable - } - if override.ExitClass != "" { - base.ExitClass = override.ExitClass - } - if override.Code != "" { - base.Code = override.Code - } - if override.Details != nil { - base.Details = cloneRawExtension(override.Details) - } - return base -} - -func cloneRawExtension(src *k8sruntime.RawExtension) *k8sruntime.RawExtension { - if src == nil || len(src.Raw) == 0 { - return nil - } - return &k8sruntime.RawExtension{Raw: append([]byte(nil), src.Raw...)} -} - -func overrideExitClassFromError(err error) (enums.ExitClass, bool) { - if err == nil { - return "", false - } - serr, ok := structuredErrorFrom(err) - if !ok { - return "", false - } - if serr.ExitClass != "" { - if parsed, ok := parseExitClass(string(serr.ExitClass)); ok { - return parsed, true - } - } - if serr.Retryable != nil { - if *serr.Retryable { - return enums.ExitClassRetry, true - } - return enums.ExitClassTerminal, true - } - return "", false -} - -func parseExitClass(value string) (enums.ExitClass, bool) { - switch strings.ToLower(strings.TrimSpace(value)) { - case string(enums.ExitClassSuccess): - return enums.ExitClassSuccess, true - case string(enums.ExitClassRetry): - return enums.ExitClassRetry, true - case string(enums.ExitClassTerminal): - return enums.ExitClassTerminal, true - case string(enums.ExitClassRateLimited): - return enums.ExitClassRateLimited, true - default: - return "", false - } -} - -func applyReadyCondition(status *runsv1alpha1.StepRunStatus, phase enums.Phase, timedOut bool, finalErr error) { + // Set exit code and class for operator retry policy classification + // This allows intelligent retry decisions (timeout=124 → retry, logic error=1 → terminal) if phase == enums.PhaseSucceeded { - setCondition( - status, - conditions.ConditionReady, - metav1.ConditionTrue, - conditions.ReasonCompleted, - "Step completed successfully", - ) - return + newStatus.ExitCode = 0 + newStatus.ExitClass = enums.ExitClassSuccess + } else { + // Failed: distinguish timeout (124, retryable) from logic errors (1, terminal) + if timedOut { + newStatus.ExitCode = 124 // GNU timeout convention + newStatus.ExitClass = enums.ExitClassRetry + } else { + newStatus.ExitCode = 1 // General application error + newStatus.ExitClass = enums.ExitClassTerminal + } } - reason := conditions.ReasonExecutionFailed - if timedOut { - reason = ReasonTimeout + if len(outputBytes) > 0 { + newStatus.Output = &k8sruntime.RawExtension{Raw: outputBytes} } - errMsg := "Step failed" if finalErr != nil { - errMsg = sanitizePersistedErrorMessage(finalErr.Error(), maxErrorMessageBytes) - } - setCondition(status, conditions.ConditionReady, metav1.ConditionFalse, reason, errMsg) -} - -func applyExitMetadata(status *runsv1alpha1.StepRunStatus, phase enums.Phase, timedOut bool, finalErr error) { - if phase == enums.PhaseSucceeded { - status.ExitCode = 0 - status.ExitClass = enums.ExitClassSuccess - return + newStatus.LastFailureMsg = finalErr.Error() } - if timedOut { - status.ExitCode = 124 - status.ExitClass = enums.ExitClassRetry - return - } - - status.ExitCode = 1 - status.ExitClass = enums.ExitClassTerminal - if override, ok := overrideExitClassFromError(finalErr); ok { - status.ExitClass = override - } -} - -func applyStatusOverride( - status *runsv1alpha1.StepRunStatus, - override *statusOverride, - processErr error, - timedOut bool, - phase enums.Phase, - finalErr error, -) { - if override != nil && processErr == nil && !timedOut { - if override.exitClass != "" { - status.ExitClass = override.exitClass - } - if override.exitCode != 0 || override.phase != enums.PhaseSucceeded { - status.ExitCode = override.exitCode - } - if override.lastFailureMsg != "" { - status.LastFailureMsg = sanitizePersistedErrorMessage(override.lastFailureMsg, maxErrorMessageBytes) - } else if override.failureErr == nil && phase == enums.PhaseSucceeded { - status.LastFailureMsg = "" + // Patch StepRun status + patchErr := k8sClient.PatchStepRunStatus(ctx, execCtxData.StoryInfo.StepRunID, newStatus) + if patchErr != nil { + logger.Error("status patch failed after successful processing", + "patchErr", patchErr, + ) + if finalErr != nil { + return false, nil, fmt.Errorf("engram completed with error: %w (status patch also failed: %v)", + finalErr, patchErr, + ) } - return - } - - if finalErr != nil { - status.LastFailureMsg = sanitizePersistedErrorMessage(finalErr.Error(), maxErrorMessageBytes) - } -} - -func appendOutput(status *runsv1alpha1.StepRunStatus, outputBytes []byte) { - if len(outputBytes) > 0 { - status.Output = &k8sruntime.RawExtension{Raw: outputBytes} + return false, nil, fmt.Errorf("engram completed successfully but status patch failed: %w", + patchErr, + ) } -} -func combinePatchError(finalErr error, patchErr error) error { - if finalErr != nil { - return fmt.Errorf("engram completed with error: %w (status patch also failed: %v)", finalErr, patchErr) - } - return fmt.Errorf("engram completed successfully but status patch failed: %w", patchErr) + // Return final error (if any) for container exit code + return phase == enums.PhaseSucceeded, outputBytes, finalErr } // patchFailureStatus builds and sends a failure status patch with consistent fields. func patchFailureStatus( - ctx context.Context, - k8sClient K8sClient, - execCtxData *runtime.ExecutionContextData, - err error, - exitCode int, + ctx context.Context, k8sClient K8sClient, execCtxData *runtime.ExecutionContextData, + err error, exitCode int, exitClass enums.ExitClass, ) error { finishedAt := metav1.Now() - exitClass := enums.ExitClassTerminal - if override, ok := overrideExitClassFromError(err); ok { - exitClass = override - } - errMsg := sanitizePersistedErrorMessage(err.Error(), maxErrorMessageBytes) status := runsv1alpha1.StepRunStatus{ Phase: enums.PhaseFailed, FinishedAt: &finishedAt, Duration: finishedAt.Sub(execCtxData.StartedAt.Time).String(), - LastFailureMsg: errMsg, + LastFailureMsg: err.Error(), ExitCode: int32(exitCode), ExitClass: exitClass, } - setCondition(&status, conditions.ConditionReady, metav1.ConditionFalse, conditions.ReasonExecutionFailed, errMsg) - appendStructuredError(&status, enums.PhaseFailed, false, err) return k8sClient.PatchStepRunStatus(ctx, execCtxData.StoryInfo.StepRunID, status) } // hydrateAndUnmarshalInputs hydrates raw inputs and unmarshals them into the target type I. // On failure, it patches StepRun status and returns the error. -func hydrateAndUnmarshalInputs[C any, I any]( //nolint:gocyclo +func hydrateAndUnmarshalInputs[C any, I any]( ctx context.Context, sm StorageManager, k8sClient K8sClient, execCtxData *runtime.ExecutionContextData, ) (I, error) { var zero I - logger := LoggerFromContext(ctx) // Attach StepRunID to context for hydration metrics attribution. - stepStorageKey := storage.NamespacedKey(execCtxData.StoryInfo.StepRunNamespace, execCtxData.StoryInfo.StepRunID) - hctx := storage.WithStepRunID(ctx, stepStorageKey) - - // Record which input paths contain storage refs before hydration. - // After hydration these become raw user data that may contain literal - // "{{ ... }}" strings — must not be template-evaluated. - storageRefPaths := findStorageRefPaths(execCtxData.Inputs) - + hctx := storage.WithStepRunID(ctx, execCtxData.StoryInfo.StepRunID) hydratedInputs, err := sm.Hydrate(hctx, execCtxData.Inputs) if err != nil { err = fmt.Errorf("failed to hydrate inputs: %w", err) - if patchErr := patchFailureStatus(ctx, k8sClient, execCtxData, err, 1); patchErr != nil { + if patchErr := patchFailureStatus(ctx, k8sClient, execCtxData, err, 1, enums.ExitClassTerminal); patchErr != nil { return zero, fmt.Errorf("engram failed during input hydration: %w (status patch also failed: %v)", err, patchErr, ) @@ -864,72 +361,20 @@ func hydrateAndUnmarshalInputs[C any, I any]( //nolint:gocyclo return zero, err } - if hydratedInputs == nil { - hydratedInputs = map[string]any{} - } - // Defensive: ensure map for unmarshal rawMap, ok := hydratedInputs.(map[string]any) if !ok { err = fmt.Errorf("hydrated inputs have unexpected type %T (want map[string]any)", hydratedInputs) - if patchErr := patchFailureStatus(ctx, k8sClient, execCtxData, err, 1); patchErr != nil { + if patchErr := patchFailureStatus(ctx, k8sClient, execCtxData, err, 1, enums.ExitClassTerminal); patchErr != nil { return zero, fmt.Errorf("engram failed during input validation: %w (status patch also failed: %v)", err, patchErr) } return zero, err } - if rawMap == nil { - rawMap = map[string]any{} - } - - // Extract storage-ref-hydrated values before template evaluation so - // raw user data (e.g. RSS bodies with literal {{ }}) is never parsed - // as Go templates. - extractedInputs := extractPaths(rawMap, storageRefPaths) - if len(extractedInputs) > 0 { - logger.Debug("Extracted storage-ref input values from template evaluation", - slog.Int("count", len(extractedInputs)), - ) - } - - resolvedInputs := any(rawMap) - if !skipInputTemplating() { - var err error - resolvedInputs, err = resolveCELTemplates(ctx, logger, sm, execCtxData.CELContext, rawMap) - if err != nil { - err = fmt.Errorf("failed to resolve input templates: %w", err) - if patchErr := patchFailureStatus(ctx, k8sClient, execCtxData, err, 1); patchErr != nil { - return zero, fmt.Errorf("engram failed during input template resolution: %w (status patch also failed: %v)", err, - patchErr) - } - return zero, err - } - if resolvedInputs == nil { - resolvedInputs = map[string]any{} - } - } - rawMap, ok = resolvedInputs.(map[string]any) - if !ok { - err = fmt.Errorf("resolved inputs have unexpected type %T (want map[string]any)", resolvedInputs) - if patchErr := patchFailureStatus(ctx, k8sClient, execCtxData, err, 1); patchErr != nil { - return zero, fmt.Errorf("engram failed during input template validation: %w (status patch also failed: %v)", err, patchErr) //nolint:lll - } - return zero, err - } - - // Restore extracted storage-ref values into the resolved inputs. - restorePaths(rawMap, extractedInputs) - - if err := validateBatchInputs(ctx, k8sClient, execCtxData, rawMap); err != nil { - if patchErr := patchFailureStatus(ctx, k8sClient, execCtxData, err, 1); patchErr != nil { - return zero, fmt.Errorf("engram failed during input schema validation: %w (status patch also failed: %v)", err, patchErr) //nolint:lll - } - return zero, err - } inputs, err := runtime.UnmarshalFromMap[I](rawMap) if err != nil { err = fmt.Errorf("failed to unmarshal inputs into the target type: %w", err) - if patchErr := patchFailureStatus(ctx, k8sClient, execCtxData, err, 1); patchErr != nil { + if patchErr := patchFailureStatus(ctx, k8sClient, execCtxData, err, 1, enums.ExitClassTerminal); patchErr != nil { return zero, fmt.Errorf("engram failed during input unmarshaling: %w (status patch also failed: %v)", err, patchErr) } return zero, err @@ -937,19 +382,6 @@ func hydrateAndUnmarshalInputs[C any, I any]( //nolint:gocyclo return inputs, nil } -func skipInputTemplating() bool { - value := strings.TrimSpace(os.Getenv(contracts.SkipInputTemplatingEnv)) - if value == "" { - return false - } - switch strings.ToLower(value) { - case "1", "true", "yes", "y", "on": - return true - default: - return false - } -} - // combineErrors combines a processing error and a dehydration/marshal error into a single error. func combineErrors(processErr, storageErr error) error { if processErr != nil && storageErr != nil { @@ -964,7 +396,7 @@ func combineErrors(processErr, storageErr error) error { // bridgeEnabled returns whether the SDK should forward batch outputs to the Hub. // Defaults to true to enable hybrid by default; set BUBU_HYBRID_BRIDGE=false to disable. func bridgeEnabled() bool { - if v := os.Getenv(contracts.HybridBridgeEnv); v != "" { + if v := os.Getenv("BUBU_HYBRID_BRIDGE"); v != "" { // Accept common truthy/falsey values switch strings.ToLower(v) { case "0", "false", "no", "off": @@ -979,157 +411,68 @@ func bridgeEnabled() bool { // getBridgeTimeout returns the max duration to spend on the hub bridge (best-effort). // Prevents extending Job lifetime indefinitely in case of hub outages. func getBridgeTimeout() time.Duration { - return env.GetDuration(contracts.HybridBridgeTimeoutEnv, 15*time.Second) -} - -// bridgeToHub forwards hybrid outputs through the transport connector advertised in the binding. -func bridgeToHub(ctx context.Context, payloadJSON []byte, execCtxData *runtime.ExecutionContextData) error { - // If payload is empty or nil, nothing to forward. - if len(payloadJSON) == 0 { - return nil - } - - ctx, cancel := ensureBridgeTimeout(ctx) - defer cancel() - - logger := LoggerFromContext(ctx) - - ref, err := bindingReferenceFromEnv() - if errors.Is(err, errBindingEnvMissing) { - logger.Debug("No transport binding provided; skipping hybrid bridge delivery") - return nil - } - if err != nil { - return err - } - - endpoint := strings.TrimSpace(ref.endpoint()) - if endpoint == "" { - return fmt.Errorf("transport binding missing endpoint for hybrid delivery") - } - - env := newEnvResolver(ref.envOverrides()) //nolint:revive - conn, err := connectorDial(ctx, endpoint, env) - if err != nil { - return fmt.Errorf("failed to dial transport connector %s: %w", endpoint, err) - } - if isDebugEnabled() { - logger.Debug("Delivering hybrid payload via connector", - slog.String("endpoint", endpoint), - slog.String("driver", normalizedDriver(ref)), - slog.Int("payloadBytes", len(payloadJSON)), - debugBytesAttr("payloadPreview", payloadJSON), - ) - } - defer func() { - if closeErr := conn.Close(); closeErr != nil { - logger.Warn("Failed to close transport connector", "error", closeErr) + if v := os.Getenv("BUBU_HYBRID_BRIDGE_TIMEOUT"); v != "" { + if d, err := time.ParseDuration(v); err == nil && d > 0 { + return d } - }() - - stream, err := conn.Client().Data(ctx) - if err != nil { - return fmt.Errorf("failed to open data stream: %w", err) } + return 15 * time.Second +} - msg, err := buildHybridStreamMessage(payloadJSON, execCtxData) - if err != nil { - return err - } - req, err := streamMessageToPublishRequest(msg) - if err != nil { - return err - } - dataReq := publishRequestToDataRequest(req) - if err := stream.Send(dataReq); err != nil { - return fmt.Errorf("failed to send hybrid payload: %w", err) +// getHubTarget resolves the Hub address. If DOWNSTREAM_HOST is set, use it. +// Otherwise, use the in-cluster DNS of the hub service in the current namespace. +func getHubTarget() string { + if v := os.Getenv("DOWNSTREAM_HOST"); v != "" { + return v } - if err := stream.CloseSend(); err != nil { - return fmt.Errorf("failed to close data stream: %w", err) + // Fallback to hub service DNS + ns := k8s.ResolvePodNamespace() + port := os.Getenv("BUBU_GRPC_PORT") + if port == "" { + port = DefaultGRPCPort } - if isDebugEnabled() { - logger.Debug("Hybrid payload delivered", "endpoint", endpoint) - } - return nil + return fmt.Sprintf("bobravoz-grpc-hub.%s.svc.cluster.local:%s", ns, port) } -func logTypedInputs(logger *slog.Logger, inputs any) { - if !isDebugEnabled() || logger == nil { - return +// bridgeToHub sends a single DataPacket with the given JSON payload bytes to the Hub. +// It uses StreamToWithMetadata to automatically attach StoryRun metadata from env. +func bridgeToHub(ctx context.Context, payloadJSON []byte) error { + // If payload is empty or nil, nothing to forward. + if len(payloadJSON) == 0 { + return nil } - logger.Debug("Typed inputs prepared for engram", debugPreviewAttr("typedInputsPreview", inputs)) -} -func logProcessResult(logger *slog.Logger, result *engram.Result, processErr error) { - if !isDebugEnabled() || logger == nil { - return - } - var data any - if result != nil { - data = result.Data + // If no deadline is set on the provided context, enforce a best-effort timeout + // based on BUBU_HYBRID_BRIDGE_TIMEOUT to avoid hanging tests or jobs. + if _, hasDeadline := ctx.Deadline(); !hasDeadline { + to := getBridgeTimeout() + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(ctx, to) + defer cancel() } - logger.Debug("Engram Process completed", - slog.Bool("success", processErr == nil), - debugPreviewAttr("resultPreview", data), - ) -} -func ensureBridgeTimeout(ctx context.Context) (context.Context, context.CancelFunc) { - if _, hasDeadline := ctx.Deadline(); hasDeadline { - return ctx, func() {} - } - timeout := getBridgeTimeout() - newCtx, cancel := context.WithTimeout(ctx, timeout) - return newCtx, cancel -} + target := getHubTarget() -func buildHybridStreamMessage( - payloadJSON []byte, - execCtxData *runtime.ExecutionContextData, -) (engram.StreamMessage, error) { - options := []StreamMessageOption{ - WithTimestamp(time.Now().UTC()), - } - if len(payloadJSON) > 0 { - options = append(options, WithJSONPayload(payloadJSON)) - } - if execCtxData != nil { - options = append(options, WithMetadata(storyMetadata(execCtxData.StoryInfo))) - inputsPayload, err := inputsJSON(execCtxData.Inputs) - if err != nil { - return engram.StreamMessage{}, fmt.Errorf("failed to marshal inputs payload: %w", err) + // Prepare single-message channels + in := make(chan engram.StreamMessage, 1) + out := make(chan engram.StreamMessage, 1) + + // Drain any responses so the client writer is never blocked + drainDone := make(chan struct{}) + go func() { + defer close(drainDone) + for range out { + // discard } - options = append(options, WithInputs(inputsPayload)) - options = append(options, WithTransports(cloneTransportDescriptors(execCtxData.Transports))) - } - return NewStreamMessage("data", options...), nil -} + }() -// setCondition is a helper to add or update a condition in a StepRunStatus. -func setCondition( - status *runsv1alpha1.StepRunStatus, - condType string, - condStatus metav1.ConditionStatus, - reason string, - message string, -) { - newCond := metav1.Condition{ - Type: condType, - Status: condStatus, - Reason: reason, - Message: message, - LastTransitionTime: metav1.Now(), - } + // Enqueue one message and close input + in <- engram.StreamMessage{Metadata: map[string]string{}, Payload: payloadJSON, Inputs: nil} + close(in) - for i, c := range status.Conditions { - if c.Type == condType { - // Update existing condition - if c.Status != newCond.Status || c.Reason != newCond.Reason || c.Message != newCond.Message { - status.Conditions[i] = newCond - } - return - } - } - // Add new condition - status.Conditions = append(status.Conditions, newCond) + err := StreamToWithMetadata(ctx, target, in, out) + // Ensure drain goroutine exits even if stream never closed 'out' + close(out) + <-drainDone + return err } diff --git a/batch_test.go b/batch_test.go index 127f6af..1774aa7 100644 --- a/batch_test.go +++ b/batch_test.go @@ -1,43 +1,15 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - package sdk import ( - "context" - "encoding/json" - "fmt" "os" - "strings" "testing" "time" - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - "github.com/bubustack/core/contracts" - coretransport "github.com/bubustack/core/runtime/transport" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "google.golang.org/grpc" - - "github.com/bubustack/bubu-sdk-go/engram" - "github.com/bubustack/bubu-sdk-go/runtime" ) func Test_bridgeEnabled_DefaultTrue(t *testing.T) { - err := os.Unsetenv(contracts.HybridBridgeEnv) + err := os.Unsetenv("BUBU_HYBRID_BRIDGE") if err != nil { t.Fatalf("Unsetenv() error = %v", err) } @@ -45,12 +17,12 @@ func Test_bridgeEnabled_DefaultTrue(t *testing.T) { } func Test_bridgeEnabled_Disabled(t *testing.T) { - err := os.Setenv(contracts.HybridBridgeEnv, "false") + err := os.Setenv("BUBU_HYBRID_BRIDGE", "false") if err != nil { t.Fatalf("Setenv() error = %v", err) } defer func() { - err = os.Unsetenv(contracts.HybridBridgeEnv) + err = os.Unsetenv("BUBU_HYBRID_BRIDGE") if err != nil { t.Fatalf("Unsetenv() error = %v", err) } @@ -59,7 +31,7 @@ func Test_bridgeEnabled_Disabled(t *testing.T) { } func Test_getBridgeTimeout_Default(t *testing.T) { - err := os.Unsetenv(contracts.HybridBridgeTimeoutEnv) + err := os.Unsetenv("BUBU_HYBRID_BRIDGE_TIMEOUT") if err != nil { t.Fatalf("Unsetenv() error = %v", err) } @@ -68,12 +40,12 @@ func Test_getBridgeTimeout_Default(t *testing.T) { } func Test_getBridgeTimeout_Override(t *testing.T) { - err := os.Setenv(contracts.HybridBridgeTimeoutEnv, "123ms") + err := os.Setenv("BUBU_HYBRID_BRIDGE_TIMEOUT", "123ms") if err != nil { t.Fatalf("Setenv() error = %v", err) } defer func() { - err = os.Unsetenv(contracts.HybridBridgeTimeoutEnv) + err = os.Unsetenv("BUBU_HYBRID_BRIDGE_TIMEOUT") if err != nil { t.Fatalf("Unsetenv() error = %v", err) } @@ -81,141 +53,16 @@ func Test_getBridgeTimeout_Override(t *testing.T) { assert.Equal(t, 123*time.Millisecond, getBridgeTimeout()) } -func Test_bridgeToHubSkipsWithoutBinding(t *testing.T) { - t.Setenv(contracts.TransportBindingEnv, "") - err := bridgeToHub(context.Background(), []byte(`{"ok":true}`), &runtime.ExecutionContextData{}) - require.NoError(t, err) -} - -func Test_bridgeToHubDialFailure(t *testing.T) { - t.Setenv(contracts.TransportBindingEnv, `{"binding":{"driver":"demo","endpoint":"dial-fail","protocolVersion":"`+coretransport.ProtocolVersion+`"}}`) //nolint:lll - original := connectorDial - connectorDial = func( - ctx context.Context, - endpoint string, - env envResolver, - opts ...grpc.DialOption, - ) (*TransportConnectorClient, error) { - return nil, fmt.Errorf("dial error") - } - defer func() { connectorDial = original }() - - err := bridgeToHub(context.Background(), []byte(`{"ok":true}`), &runtime.ExecutionContextData{}) - require.Error(t, err) -} - -type noopStorageManager struct{} - -func (noopStorageManager) Hydrate(ctx context.Context, data any) (any, error) { - return nil, nil -} - -func (noopStorageManager) Dehydrate(ctx context.Context, data any, stepRunID string) (any, error) { - return data, nil -} - -type noopK8sClient struct{} - -func (noopK8sClient) TriggerStory( - ctx context.Context, - storyName string, - storyNamespace string, - inputs map[string]any, -) (*runsv1alpha1.StoryRun, error) { - return nil, nil -} - -func (noopK8sClient) PatchStepRunStatus( - ctx context.Context, - stepRunName string, - patchData runsv1alpha1.StepRunStatus, -) error { - return nil -} - -func TestHydrateAndUnmarshalInputs_AllowsNil(t *testing.T) { - ctx := context.Background() - execCtx := &runtime.ExecutionContextData{ - Inputs: make(map[string]any), - StoryInfo: engram.StoryInfo{ - StepRunID: "step-run", - }, - } - _, err := hydrateAndUnmarshalInputs[struct{}, struct{}](ctx, noopStorageManager{}, noopK8sClient{}, execCtx) - require.NoError(t, err) -} - -func TestBuildHybridStreamMessage(t *testing.T) { - execCtx := &runtime.ExecutionContextData{ - Inputs: map[string]any{"foo": "bar"}, - StoryInfo: engram.StoryInfo{ - StoryName: "demo", - StepRunID: "step-123", - StoryRunID: "run-9", - }, - Transports: []engram.TransportDescriptor{ - {Name: "default", Kind: "live"}, - }, - } - msg, err := buildHybridStreamMessage([]byte(`{"ok":true}`), execCtx) - require.NoError(t, err) - require.Equal(t, "data", msg.Kind) - require.False(t, msg.Timestamp.IsZero()) - require.Equal(t, map[string]string{ - "storyName": "demo", - "stepRunID": "step-123", - "storyRunID": "run-9", - }, msg.Metadata) - var decoded map[string]any - require.NoError(t, json.Unmarshal(msg.Inputs, &decoded)) - require.Equal(t, map[string]any{"foo": "bar"}, decoded) - require.Len(t, msg.Transports, 1) - execCtx.Transports[0].Name = "mutated" - require.Equal(t, "default", msg.Transports[0].Name, "transports should be cloned") -} - -func TestBuildHybridStreamMessageNilCtx(t *testing.T) { - msg, err := buildHybridStreamMessage([]byte("x"), nil) - require.NoError(t, err) - require.Equal(t, "data", msg.Kind) - require.False(t, msg.Timestamp.IsZero()) - require.Equal(t, []byte("x"), msg.Payload) - require.Nil(t, msg.Metadata) - require.Nil(t, msg.Transports) - require.Nil(t, msg.Inputs) -} - -func TestBuildHybridStreamMessageInputMarshalError(t *testing.T) { - execCtx := &runtime.ExecutionContextData{ - Inputs: map[string]any{"bad": make(chan int)}, - } - _, err := buildHybridStreamMessage([]byte("x"), execCtx) - require.Error(t, err) -} - -func TestTruncateErrorMessage(t *testing.T) { - long := strings.Repeat("x", 10000) - got := truncateErrorMessage(long, 8192) - if len(got) > 8192 { - t.Errorf("truncateErrorMessage should cap at 8192 bytes, got %d", len(got)) - } - if len(got) != 8192 { - t.Errorf("truncateErrorMessage should use exactly 8192 bytes for input longer than limit, got %d", len(got)) - } -} - -func TestTruncateErrorMessage_ShortPassthrough(t *testing.T) { - msg := "short error" - got := truncateErrorMessage(msg, 8192) - if got != msg { - t.Errorf("truncateErrorMessage should return msg unchanged, got %q", got) - } -} - -func TestTruncateErrorMessage_ZeroLimit(t *testing.T) { - msg := "some error" - got := truncateErrorMessage(msg, 0) - if got != msg { - t.Errorf("zero limit should return msg unchanged, got %q", got) +func Test_getHubTarget_EnvOverride(t *testing.T) { + err := os.Setenv("DOWNSTREAM_HOST", "example:9000") + if err != nil { + t.Fatalf("Setenv() error = %v", err) } + defer func() { + err = os.Unsetenv("DOWNSTREAM_HOST") + if err != nil { + t.Fatalf("Unsetenv() error = %v", err) + } + }() + assert.Equal(t, "example:9000", getHubTarget()) } diff --git a/cel/cel.go b/cel/cel.go deleted file mode 100644 index 609a482..0000000 --- a/cel/cel.go +++ /dev/null @@ -1,185 +0,0 @@ -package cel - -import ( - "context" - "encoding/json" - "fmt" - "log/slog" - "strings" - "time" - - "github.com/bubustack/core/templating" -) - -// Expression holds a CEL expression and its variable bindings. -type Expression struct { - // Expr is the CEL source expression to evaluate. - Expr string - // Vars contains the variable bindings available to Expr at evaluation time. - Vars map[string]any -} - -// ExtractExpression extracts a template expression wrapper from a map. -func ExtractExpression(value any) (Expression, bool) { - m, ok := value.(map[string]any) - if !ok { - return Expression{}, false - } - exprRaw, ok := m[templating.TemplateExprKey] - if !ok { - return Expression{}, false - } - expr, ok := exprRaw.(string) - if !ok || strings.TrimSpace(expr) == "" { - return Expression{}, false - } - vars, _ := m[templating.TemplateVarsKey].(map[string]any) - return Expression{Expr: expr, Vars: vars}, true -} - -// Config aliases the core templating config for SDK evaluators. -type Config = templating.Config - -// Evaluator wraps core templating to evaluate expressions inside engrams. -type Evaluator struct { - inner *templating.Evaluator - logger *sdkCELLogger -} - -// NewEvaluator creates a template evaluator using core templating. -func NewEvaluator(logger *slog.Logger, cfg Config) (*Evaluator, error) { - inner, err := templating.New(cfg) - if err != nil { - return nil, err - } - return &Evaluator{inner: inner, logger: newSDKCELLogger(logger)}, nil -} - -// Close releases evaluator resources. -func (e *Evaluator) Close() { - if e != nil && e.inner != nil { - e.inner.Close() - } -} - -// EvaluateExpression evaluates a raw template expression with the provided variables. -func (e *Evaluator) EvaluateExpression(ctx context.Context, expr string, vars map[string]any) (any, error) { - if e == nil || e.inner == nil { - return nil, fmt.Errorf("template evaluator is nil") - } - trimmed := strings.TrimSpace(expr) - if trimmed == "" { - return "", nil - } - if !strings.Contains(trimmed, "{{") { - trimmed = "{{ " + trimmed + " }}" - } - start := time.Now() - e.logStart(trimmed, "expression") - resolved, err := e.inner.ResolveWithInputs(ctx, map[string]any{"value": trimmed}, vars) - if err != nil { - e.logError(err, trimmed, "expression", start) - return nil, err - } - result := coerceTemplateScalar(resolved["value"]) - e.logSuccess(trimmed, "expression", start, result) - return result, nil -} - -// EvaluateCondition evaluates a boolean template expression. -// Returns true if the expression resolves to a truthy value. -// An empty expression returns true (no filter = pass all). -func (e *Evaluator) EvaluateCondition(ctx context.Context, expr string, vars map[string]any) (bool, error) { - if e == nil || e.inner == nil { - return false, fmt.Errorf("template evaluator is nil") - } - start := time.Now() - e.logStart(expr, "condition") - result, err := e.inner.EvaluateCondition(ctx, expr, vars) - if err != nil { - e.logError(err, expr, "condition", start) - return false, err - } - e.logSuccess(expr, "condition", start, result) - return result, nil -} - -// ResolveTemplate resolves a string template or a map containing templates. -func (e *Evaluator) ResolveTemplate(ctx context.Context, template any, vars map[string]any) (any, error) { - if e == nil || e.inner == nil { - return nil, fmt.Errorf("template evaluator is nil") - } - expr := describeTemplateInput(template) - start := time.Now() - e.logStart(expr, "template") - switch typed := template.(type) { - case map[string]any: - resolved, err := e.inner.ResolveWithInputs(ctx, typed, vars) - if err != nil { - e.logError(err, expr, "template", start) - return nil, err - } - e.logSuccess(expr, "template", start, resolved) - return resolved, nil - case string: - resolved, err := e.inner.ResolveWithInputs(ctx, map[string]any{"value": typed}, vars) - if err != nil { - e.logError(err, expr, "template", start) - return nil, err - } - result := resolved["value"] - e.logSuccess(expr, "template", start, result) - return result, nil - default: - e.logSuccess(expr, "template", start, template) - return template, nil - } -} - -func (e *Evaluator) logStart(expression, expressionType string) { - if e == nil || e.logger == nil { - return - } - e.logger.EvaluationStart(expression, expressionType) -} - -func (e *Evaluator) logSuccess(expression, expressionType string, started time.Time, result any) { - if e == nil || e.logger == nil { - return - } - e.logger.EvaluationSuccess(expression, expressionType, time.Since(started), result) -} - -func (e *Evaluator) logError(err error, expression, expressionType string, started time.Time) { - if e == nil || e.logger == nil || err == nil { - return - } - e.logger.EvaluationError(err, expression, expressionType, time.Since(started)) -} - -func describeTemplateInput(template any) string { - switch typed := template.(type) { - case string: - return typed - case map[string]any: - return "" - default: - return fmt.Sprintf("<%T>", template) - } -} - -func coerceTemplateScalar(value any) any { - raw, ok := value.(string) - if !ok { - return value - } - trimmed := strings.TrimSpace(raw) - if trimmed == "" { - return raw - } - var decoded any - if err := json.Unmarshal([]byte(trimmed), &decoded); err == nil { - return decoded - } - return value -} diff --git a/cel/cel_test.go b/cel/cel_test.go deleted file mode 100644 index 141222a..0000000 --- a/cel/cel_test.go +++ /dev/null @@ -1,108 +0,0 @@ -package cel - -import ( - "bytes" - "context" - "log/slog" - "strings" - "testing" - - "github.com/bubustack/core/templating" -) - -func TestEvaluateExpression(t *testing.T) { - eval, err := NewEvaluator(nil, Config{}) - if err != nil { - t.Fatalf("NewEvaluator error: %v", err) - } - t.Cleanup(eval.Close) - - vars := map[string]any{"inputs": map[string]any{"value": 2}} - out, err := eval.EvaluateExpression(context.Background(), "add inputs.value 3", vars) - if err != nil { - t.Fatalf("EvaluateExpression error: %v", err) - } - switch v := out.(type) { - case int: - if v != 5 { - t.Fatalf("expected 5, got %#v", out) - } - case int64: - if v != 5 { - t.Fatalf("expected 5, got %#v", out) - } - case float64: - if v != 5 { - t.Fatalf("expected 5, got %#v", out) - } - default: - t.Fatalf("expected numeric 5, got %T (%#v)", out, out) - } -} - -func TestExtractExpression(t *testing.T) { - payload := map[string]any{ - templating.TemplateExprKey: "inputs.value", - templating.TemplateVarsKey: map[string]any{"inputs": map[string]any{"value": 1}}, - } - expr, ok := ExtractExpression(payload) - if !ok { - t.Fatalf("expected expression to be extracted") - } - if expr.Expr != "inputs.value" { - t.Fatalf("unexpected expr: %s", expr.Expr) - } - if expr.Vars == nil { - t.Fatalf("expected vars") - } -} - -func TestNewEvaluatorUsesLoggerForSuccessfulExpression(t *testing.T) { - var buf bytes.Buffer - logger := slog.New(slog.NewTextHandler(&buf, &slog.HandlerOptions{Level: slog.LevelDebug})) - - eval, err := NewEvaluator(logger, Config{}) - if err != nil { - t.Fatalf("NewEvaluator error: %v", err) - } - t.Cleanup(eval.Close) - - _, err = eval.EvaluateExpression(context.Background(), "add 1 2", nil) - if err != nil { - t.Fatalf("EvaluateExpression error: %v", err) - } - - logs := buf.String() - if !strings.Contains(logs, "Template evaluation started") { - t.Fatalf("expected start log, got %q", logs) - } - if !strings.Contains(logs, "Template evaluation succeeded") { - t.Fatalf("expected success log, got %q", logs) - } -} - -func TestNewEvaluatorUsesLoggerForConditionErrors(t *testing.T) { - var buf bytes.Buffer - logger := slog.New(slog.NewTextHandler(&buf, nil)) - - eval, err := NewEvaluator(logger, Config{}) - if err != nil { - t.Fatalf("NewEvaluator error: %v", err) - } - t.Cleanup(eval.Close) - - _, err = eval.EvaluateCondition(context.Background(), "inputs.value", map[string]any{ - "inputs": map[string]any{"value": "not-a-bool"}, - }) - if err == nil { - t.Fatal("expected condition evaluation error") - } - - logs := buf.String() - if !strings.Contains(logs, "Template evaluation failed") { - t.Fatalf("expected failure log, got %q", logs) - } - if !strings.Contains(logs, "condition") { - t.Fatalf("expected condition log context, got %q", logs) - } -} diff --git a/cel/logger.go b/cel/logger.go deleted file mode 100644 index 61f231c..0000000 --- a/cel/logger.go +++ /dev/null @@ -1,46 +0,0 @@ -package cel - -import ( - "log/slog" - "time" - - "github.com/bubustack/bobrapet/pkg/observability" -) - -type sdkCELLogger struct { - logger *slog.Logger -} - -var _ observability.Logger = (*sdkCELLogger)(nil) - -func newSDKCELLogger(logger *slog.Logger) *sdkCELLogger { - return &sdkCELLogger{logger: logger} -} - -func (l *sdkCELLogger) CacheHit(expression, expressionType string) { - if l == nil || l.logger == nil { - return - } - l.logger.Debug("Template cache hit", "expression", expression, "type", expressionType) -} - -func (l *sdkCELLogger) EvaluationStart(expression, expressionType string) { - if l == nil || l.logger == nil { - return - } - l.logger.Debug("Template evaluation started", "expression", expression, "type", expressionType) -} - -func (l *sdkCELLogger) EvaluationError(err error, expression, expressionType string, duration time.Duration) { - if l == nil || l.logger == nil { - return - } - l.logger.Error("Template evaluation failed", "expression", expression, "type", expressionType, "duration", duration.String(), "error", err) //nolint:lll -} - -func (l *sdkCELLogger) EvaluationSuccess(expression, expressionType string, duration time.Duration, result any) { - if l == nil || l.logger == nil { - return - } - l.logger.Debug("Template evaluation succeeded", "expression", expression, "type", expressionType, "duration", duration.String()) //nolint:lll -} diff --git a/cel_templates.go b/cel_templates.go deleted file mode 100644 index 221dea1..0000000 --- a/cel_templates.go +++ /dev/null @@ -1,181 +0,0 @@ -package sdk - -import ( - "context" - "encoding/json" - "fmt" - "log/slog" - "maps" - "time" - - sdkenv "github.com/bubustack/bubu-sdk-go/pkg/env" - "github.com/bubustack/core/templating" -) - -const ( - defaultTemplateMaxOutputBytes = 1 * 1024 * 1024 - defaultTemplateMaxInputBytes = 8 * 1024 * 1024 - defaultTemplateEvalTimeout = 30 * time.Second - templateMaxOutputBytesEnv = "BUBU_TEMPLATE_MAX_OUTPUT_BYTES" - templateMaxInputBytesEnv = "BUBU_TEMPLATE_MAX_INPUT_BYTES" - templateEvalTimeoutEnv = "BUBU_TEMPLATE_EVALUATION_TIMEOUT" -) - -func resolveCELTemplates( - ctx context.Context, - logger *slog.Logger, - sm StorageManager, - vars map[string]any, - payload any) (any, - error, -) { - return resolveTemplates(ctx, logger, sm, vars, payload) -} - -func resolveTemplates(ctx context.Context, logger *slog.Logger, sm StorageManager, vars map[string]any, payload any) (any, error) { //nolint:lll - if payload == nil { - return nil, nil - } - if logger == nil { - logger = slog.Default() - } - - start := time.Now() - logger.Debug("Resolving templates", - slog.Int("payloadFields", countTopLevelFields(payload)), - ) - maxInputBytes := resolveTemplateMaxInputBytes() - if err := validateTemplateInputSize("payload", payload, maxInputBytes); err != nil { - logger.Error("Template payload rejected by input size limit", "error", err) - return nil, err - } - - normalizedVars, err := hydrateCELContext(ctx, sm, vars) - if err != nil { - logger.Error("Failed to hydrate template context", "error", err) - return nil, err - } - if err := validateTemplateInputSize("context", normalizedVars, maxInputBytes); err != nil { - logger.Error("Template context rejected by input size limit", "error", err) - return nil, err - } - logger.Debug("Template context hydrated", - slog.Duration("hydrationDuration", time.Since(start)), - ) - - eval, err := templating.New(templating.Config{ - EvaluationTimeout: resolveTemplateEvaluationTimeout(), - MaxOutputBytes: resolveTemplateMaxOutputBytes(), - Deterministic: false, - }) - if err != nil { - return nil, fmt.Errorf("failed to initialize template evaluator: %w", err) - } - defer eval.Close() - - result, err := eval.ResolveValue(ctx, payload, normalizedVars) - if err != nil { - logger.Error("Template resolution failed", - "error", err, - slog.Duration("duration", time.Since(start)), - ) - return nil, err - } - logger.Debug("Template resolution completed", - slog.Duration("duration", time.Since(start)), - ) - return result, nil -} - -func normalizeCELVars(vars map[string]any) map[string]any { - normalized := map[string]any{ - "inputs": map[string]any{}, - "steps": map[string]any{}, - } - maps.Copy(normalized, vars) - if normalized["inputs"] == nil { - normalized["inputs"] = map[string]any{} - } - if normalized["steps"] == nil { - normalized["steps"] = map[string]any{} - } - return normalized -} - -func hydrateCELContext(ctx context.Context, sm StorageManager, vars map[string]any) (map[string]any, error) { - if vars == nil { - return normalizeCELVars(nil), nil - } - if sm == nil { - return normalizeCELVars(vars), nil - } - hydrated, err := sm.Hydrate(ctx, vars) - if err != nil { - return nil, fmt.Errorf("failed to hydrate CEL context: %w", err) - } - if hydrated == nil { - return normalizeCELVars(nil), nil - } - hydratedMap, ok := hydrated.(map[string]any) - if !ok { - return nil, fmt.Errorf("CEL context must be an object, got %T", hydrated) - } - return normalizeCELVars(hydratedMap), nil -} - -func resolveTemplateMaxOutputBytes() int { - return sdkenv.GetInt(templateMaxOutputBytesEnv, defaultTemplateMaxOutputBytes) -} - -func resolveTemplateMaxInputBytes() int { - return sdkenv.GetInt(templateMaxInputBytesEnv, defaultTemplateMaxInputBytes) -} - -func resolveTemplateEvaluationTimeout() time.Duration { - return sdkenv.GetDuration(templateEvalTimeoutEnv, defaultTemplateEvalTimeout) -} - -func validateTemplateInputSize(scope string, value any, maxBytes int) error { - if maxBytes <= 0 { - return nil - } - sizeBytes, err := estimateJSONSizeStrict(value) - if err != nil { - return fmt.Errorf("template %s size check failed: %w", scope, err) - } - if sizeBytes <= maxBytes { - return nil - } - return fmt.Errorf("template %s exceeds max input bytes (%d > %d)", scope, sizeBytes, maxBytes) -} - -func estimateJSONSizeStrict(v any) (int, error) { - raw, err := json.Marshal(v) - if err != nil { - return 0, err - } - return len(raw), nil -} - -// countTopLevelFields returns the number of top-level keys for maps, -// the length for slices, or 1 for scalar values. -func countTopLevelFields(v any) int { - switch typed := v.(type) { - case map[string]any: - return len(typed) - case []any: - return len(typed) - default: - return 1 - } -} - -// estimateJSONSize returns the approximate JSON-encoded size of a value. -// Returns 0 if marshaling fails. -func estimateJSONSize(v any) int { - raw, err := json.Marshal(v) - if err != nil { - return 0 - } - return len(raw) -} diff --git a/cel_templates_test.go b/cel_templates_test.go deleted file mode 100644 index cc36c2c..0000000 --- a/cel_templates_test.go +++ /dev/null @@ -1,131 +0,0 @@ -package sdk - -import ( - "context" - "log/slog" - "strings" - "testing" - "time" -) - -func TestResolveTemplatesEnforcesConfiguredOutputLimit(t *testing.T) { - t.Setenv(templateMaxOutputBytesEnv, "4") - - _, err := resolveTemplates( - context.Background(), - slog.Default(), - nil, - nil, - map[string]any{"value": `{{ "abcdef" }}`}, - ) - if err == nil { - t.Fatal("expected template output limit error") - } - if !strings.Contains(err.Error(), "template output exceeds max bytes") { - t.Fatalf("unexpected error: %v", err) - } -} - -func TestResolveTemplatesRejectsOversizedPayloadInput(t *testing.T) { - t.Setenv(templateMaxInputBytesEnv, "64") - - _, err := resolveTemplates( - context.Background(), - slog.Default(), - nil, - nil, - map[string]any{"value": strings.Repeat("x", 512)}, - ) - if err == nil { - t.Fatal("expected template payload input limit error") - } - if !strings.Contains(err.Error(), "template payload exceeds max input bytes") { - t.Fatalf("unexpected error: %v", err) - } -} - -func TestResolveTemplatesRejectsOversizedContextInput(t *testing.T) { - t.Setenv(templateMaxInputBytesEnv, "96") - - _, err := resolveTemplates( - context.Background(), - slog.Default(), - nil, - map[string]any{ - "inputs": map[string]any{ - "blob": strings.Repeat("y", 512), - }, - }, - map[string]any{"value": `{{ inputs.blob }}`}, - ) - if err == nil { - t.Fatal("expected template context input limit error") - } - if !strings.Contains(err.Error(), "template context exceeds max input bytes") { - t.Fatalf("unexpected error: %v", err) - } -} - -func TestResolveTemplateMaxInputBytesFromEnv(t *testing.T) { - t.Setenv(templateMaxInputBytesEnv, "2048") - - if got := resolveTemplateMaxInputBytes(); got != 2048 { - t.Fatalf("resolveTemplateMaxInputBytes() = %d, want 2048", got) - } -} - -func TestResolveTemplateMaxInputBytesFallsBackOnInvalidEnv(t *testing.T) { - t.Setenv(templateMaxInputBytesEnv, "invalid") - - if got := resolveTemplateMaxInputBytes(); got != defaultTemplateMaxInputBytes { - t.Fatalf("resolveTemplateMaxInputBytes() = %d, want %d", got, defaultTemplateMaxInputBytes) - } -} - -func TestResolveTemplateEvaluationTimeoutFromEnv(t *testing.T) { - t.Setenv(templateEvalTimeoutEnv, "7s") - - if got := resolveTemplateEvaluationTimeout(); got != 7*time.Second { - t.Fatalf("resolveTemplateEvaluationTimeout() = %v, want 7s", got) - } -} - -func TestResolveTemplateEvaluationTimeoutFallsBackOnInvalidEnv(t *testing.T) { - t.Setenv(templateEvalTimeoutEnv, "invalid") - - if got := resolveTemplateEvaluationTimeout(); got != defaultTemplateEvalTimeout { - t.Fatalf("resolveTemplateEvaluationTimeout() = %v, want %v", got, defaultTemplateEvalTimeout) - } -} - -func TestResolveTemplatesRejectsPayloadWhenInputSizeCannotBeMeasured(t *testing.T) { - _, err := resolveTemplates( - context.Background(), - slog.Default(), - nil, - nil, - map[string]any{"bad": make(chan int)}, - ) - if err == nil { - t.Fatal("expected payload size measurement error") - } - if !strings.Contains(err.Error(), "size check failed") { - t.Fatalf("unexpected error: %v", err) - } -} - -func TestResolveTemplatesRejectsContextWhenInputSizeCannotBeMeasured(t *testing.T) { - _, err := resolveTemplates( - context.Background(), - slog.Default(), - nil, - map[string]any{"bad": make(chan int)}, - map[string]any{"value": "ok"}, - ) - if err == nil { - t.Fatal("expected context size measurement error") - } - if !strings.Contains(err.Error(), "size check failed") { - t.Fatalf("unexpected error: %v", err) - } -} diff --git a/config_hydration.go b/config_hydration.go deleted file mode 100644 index f8bc69b..0000000 --- a/config_hydration.go +++ /dev/null @@ -1,360 +0,0 @@ -package sdk - -import ( - "context" - "fmt" - "log/slog" - "reflect" - "time" - - "github.com/bubustack/bobrapet/pkg/storage" - sdkenv "github.com/bubustack/bubu-sdk-go/pkg/env" -) - -const ( - defaultConfigHydrationMaxDepth = 64 - defaultConfigHydrationMaxNodes = 10000 - configHydrationMaxDepthEnv = "BUBU_CONFIG_HYDRATION_MAX_DEPTH" - configHydrationMaxNodesEnv = "BUBU_CONFIG_HYDRATION_MAX_NODES" -) - -type configPathSegment struct { - key string - index int - isIndex bool -} - -type configPath []configPathSegment - -type extractedConfigValue struct { - path configPath - value any -} - -type configTraversalKey struct { - kind reflect.Kind - ptr uintptr -} - -type configTraversalGuard struct { - maxDepth int - maxNodes int - nodes int - stack map[configTraversalKey]struct{} -} - -func hydrateConfig( - ctx context.Context, - sm StorageManager, - config map[string]any, - celContext map[string]any) (map[string]any, - error, -) { - if len(config) == 0 { - return config, nil - } - if sm == nil { - return nil, fmt.Errorf("storage manager is required to hydrate config") - } - - logger := LoggerFromContext(ctx) - start := time.Now() - logger.Debug("Hydrating config", - slog.Int("configFields", len(config)), - slog.Int("configBytes", estimateJSONSize(config)), - ) - - if err := validateConfigTraversalSafety(config); err != nil { - return nil, fmt.Errorf("config rejected before hydration: %w", err) - } - - // Record which config paths contain storage refs before hydration. - // After hydration these become raw user data (e.g. RSS bodies) that may - // contain literal "{{ ... }}" strings. We must skip template evaluation - // on these values to avoid treating user data as templates. - storageRefPaths := findStorageRefPaths(config) - - hydrated, err := sm.Hydrate(ctx, config) - if err != nil { - logger.Error("Config storage hydration failed", "error", err, slog.Duration("duration", time.Since(start))) - return nil, err - } - if hydrated == nil { - return nil, nil - } - hydratedMap, ok := hydrated.(map[string]any) - if !ok { - return nil, fmt.Errorf("hydrated config must be an object, got %T", hydrated) - } - if err := validateConfigTraversalSafety(hydratedMap); err != nil { - return nil, fmt.Errorf("hydrated config rejected before template resolution: %w", err) - } - logger.Debug("Config storage refs hydrated", - slog.Duration("duration", time.Since(start)), - slog.Int("hydratedFields", len(hydratedMap)), - slog.Int("hydratedBytes", estimateJSONSize(hydratedMap)), - ) - - // Extract storage-ref-hydrated values before template evaluation so - // raw user data is never parsed as Go templates. - extracted := extractPaths(hydratedMap, storageRefPaths) - if len(extracted) > 0 { - logger.Debug("Extracted storage-ref values from template evaluation", - slog.Int("count", len(extracted)), - ) - } - - resolved, err := resolveCELTemplates(ctx, logger, sm, celContext, hydratedMap) - if err != nil { - return nil, fmt.Errorf("failed to resolve config templates: %w", err) - } - if resolved == nil { - return map[string]any{}, nil - } - resolvedMap, ok := resolved.(map[string]any) - if !ok { - return nil, fmt.Errorf("resolved config must be an object, got %T", resolved) - } - - // Restore extracted storage-ref values into the resolved config. - restorePaths(resolvedMap, extracted) - - logger.Debug("Config hydration completed", - slog.Duration("totalDuration", time.Since(start)), - slog.Int("resolvedFields", len(resolvedMap)), - ) - return resolvedMap, nil -} - -func validateConfigTraversalSafety(config map[string]any) error { - guard := configTraversalGuard{ - maxDepth: sdkenv.GetInt(configHydrationMaxDepthEnv, defaultConfigHydrationMaxDepth), - maxNodes: sdkenv.GetInt(configHydrationMaxNodesEnv, defaultConfigHydrationMaxNodes), - stack: make(map[configTraversalKey]struct{}), - } - return guard.walk(config, 0) -} - -func (g *configTraversalGuard) walk(value any, depth int) error { - if depth > g.maxDepth { - return fmt.Errorf("config exceeds max depth %d", g.maxDepth) - } - g.nodes++ - if g.nodes > g.maxNodes { - return fmt.Errorf("config exceeds max nodes %d", g.maxNodes) - } - - switch typed := value.(type) { - case map[string]any: - key := configMapTraversalKey(typed) - if err := g.push(key); err != nil { - return err - } - defer g.pop(key) - for _, child := range typed { - if err := g.walk(child, depth+1); err != nil { - return err - } - } - case []any: - key := configSliceTraversalKey(typed) - if err := g.push(key); err != nil { - return err - } - defer g.pop(key) - for _, child := range typed { - if err := g.walk(child, depth+1); err != nil { - return err - } - } - } - - return nil -} - -func (g *configTraversalGuard) push(key configTraversalKey) error { - if key.ptr == 0 { - return nil - } - if _, exists := g.stack[key]; exists { - return fmt.Errorf("config contains cycle") - } - g.stack[key] = struct{}{} - return nil -} - -func (g *configTraversalGuard) pop(key configTraversalKey) { - if key.ptr == 0 { - return - } - delete(g.stack, key) -} - -func configMapTraversalKey(value map[string]any) configTraversalKey { - return configTraversalKey{ - kind: reflect.Map, - ptr: reflect.ValueOf(value).Pointer(), - } -} - -func configSliceTraversalKey(value []any) configTraversalKey { - if len(value) == 0 { - return configTraversalKey{} - } - return configTraversalKey{ - kind: reflect.Slice, - ptr: reflect.ValueOf(value).Pointer(), - } -} - -// findStorageRefPaths walks a config map and returns exact paths to values that -// are direct $bubuStorageRef objects. These exact values will become raw user -// data after hydration and must not be template-evaluated. -func findStorageRefPaths(config map[string]any) []configPath { - paths := make([]configPath, 0, len(config)) - for key, val := range config { - paths = append(paths, findStorageRefPathsInValue(val, configPath{{key: key}})...) - } - return paths -} - -func findStorageRefPathsInValue(v any, path configPath) []configPath { - switch val := v.(type) { - case map[string]any: - if _, ok := val[storage.StorageRefKey]; ok { - return []configPath{cloneConfigPath(path)} - } - var paths []configPath - for key, nested := range val { - paths = append(paths, findStorageRefPathsInValue(nested, appendConfigPathKey(path, key))...) - } - return paths - case []any: - var paths []configPath - for idx, item := range val { - paths = append(paths, findStorageRefPathsInValue(item, appendConfigPathIndex(path, idx))...) - } - return paths - } - return nil -} - -func cloneConfigPath(path configPath) configPath { - if len(path) == 0 { - return nil - } - cloned := make(configPath, len(path)) - copy(cloned, path) - return cloned -} - -func appendConfigPathKey(path configPath, key string) configPath { - cloned := make(configPath, len(path)+1) - copy(cloned, path) - cloned[len(path)] = configPathSegment{key: key} - return cloned -} - -func appendConfigPathIndex(path configPath, index int) configPath { - cloned := make(configPath, len(path)+1) - copy(cloned, path) - cloned[len(path)] = configPathSegment{index: index, isIndex: true} - return cloned -} - -// extractPaths replaces the given paths with inert placeholders and returns the -// original values for later restoration after template evaluation. -func extractPaths(m map[string]any, paths []configPath) []extractedConfigValue { - if len(paths) == 0 { - return nil - } - extracted := make([]extractedConfigValue, 0, len(paths)) - for idx, path := range paths { - if val, ok := replacePathWithPlaceholder(m, path, pathPlaceholder(idx)); ok { - extracted = append(extracted, extractedConfigValue{ - path: cloneConfigPath(path), - value: val, - }) - } - } - return extracted -} - -func pathPlaceholder(idx int) string { - return fmt.Sprintf("__bubu_storage_ref_skip_%d__", idx) -} - -func replacePathWithPlaceholder(m map[string]any, path configPath, placeholder any) (any, bool) { - var current any = m - for idx, segment := range path { - last := idx == len(path)-1 - if segment.isIndex { - items, ok := current.([]any) - if !ok || segment.index < 0 || segment.index >= len(items) { - return nil, false - } - if last { - value := items[segment.index] - items[segment.index] = placeholder - return value, true - } - current = items[segment.index] - continue - } - - obj, ok := current.(map[string]any) - if !ok { - return nil, false - } - value, ok := obj[segment.key] - if !ok { - return nil, false - } - if last { - obj[segment.key] = placeholder - return value, true - } - current = value - } - return nil, false -} - -// restorePaths puts extracted values back into the config map. -func restorePaths(m map[string]any, extracted []extractedConfigValue) { - for _, item := range extracted { - restorePath(m, item.path, item.value) - } -} - -func restorePath(m map[string]any, path configPath, value any) { - var current any = m - for idx, segment := range path { - last := idx == len(path)-1 - if segment.isIndex { - items, ok := current.([]any) - if !ok || segment.index < 0 || segment.index >= len(items) { - return - } - if last { - items[segment.index] = value - return - } - current = items[segment.index] - continue - } - - obj, ok := current.(map[string]any) - if !ok { - return - } - if last { - obj[segment.key] = value - return - } - next, ok := obj[segment.key] - if !ok { - return - } - current = next - } -} diff --git a/config_hydration_test.go b/config_hydration_test.go deleted file mode 100644 index 4dd125c..0000000 --- a/config_hydration_test.go +++ /dev/null @@ -1,370 +0,0 @@ -package sdk - -import ( - "context" - "fmt" - "strings" - "testing" - - "github.com/bubustack/bobrapet/pkg/storage" -) - -// mockHydratingStorageManager replaces $bubuStorageRef maps with the mapped -// replacement value, simulating what the real storage manager does. -type mockHydratingStorageManager struct { - // replacements maps storage ref paths to the hydrated content. - replacements map[string]any -} - -func (m mockHydratingStorageManager) Hydrate(_ context.Context, data any) (any, error) { - return m.hydrateValue(data), nil -} - -func (m mockHydratingStorageManager) hydrateValue(v any) any { - switch val := v.(type) { - case map[string]any: - if ref, ok := val[storage.StorageRefKey]; ok { - if refStr, ok := ref.(string); ok { - if replacement, ok := m.replacements[refStr]; ok { - return replacement - } - } - return val - } - result := make(map[string]any, len(val)) - for k, child := range val { - result[k] = m.hydrateValue(child) - } - return result - case []any: - result := make([]any, len(val)) - for i, item := range val { - result[i] = m.hydrateValue(item) - } - return result - default: - return v - } -} - -func (m mockHydratingStorageManager) Dehydrate(_ context.Context, data any, _ string) (any, error) { - return data, nil -} - -func TestHydrateConfig_SkipsTemplateEvalForStorageRefValues(t *testing.T) { - // RSS body containing literal {{ account_id }} — must not be template-evaluated. - rssBody := `Workers API: {{ account_id }} in path` - - sm := mockHydratingStorageManager{ - replacements: map[string]any{ - "outputs/step-1/body.json": rssBody, - }, - } - - config := map[string]any{ - "model": "gpt-4o-mini", - "userPrompt": map[string]any{ - storage.StorageRefKey: "outputs/step-1/body.json", - }, - "temperature": 0, - } - - celContext := map[string]any{} - - result, err := hydrateConfig(context.Background(), sm, config, celContext) - if err != nil { - t.Fatalf("hydrateConfig() returned unexpected error: %v", err) - } - - // userPrompt should contain the raw RSS body, not a template-evaluation error. - got, ok := result["userPrompt"] - if !ok { - t.Fatal("hydrateConfig() result missing 'userPrompt' key") - } - if got != rssBody { - t.Errorf("userPrompt = %q, want %q", got, rssBody) - } - - // Non-storage-ref values should still be present. - if result["model"] != "gpt-4o-mini" { - t.Errorf("model = %v, want 'gpt-4o-mini'", result["model"]) - } -} - -func TestHydrateConfig_TemplateEvalStillWorksForNonStorageRefValues(t *testing.T) { - sm := mockHydratingStorageManager{replacements: map[string]any{}} - - config := map[string]any{ - "greeting": "Hello {{ .inputs.name }}", - "static": "no templates here", - } - - celContext := map[string]any{ - "inputs": map[string]any{"name": "world"}, - } - - result, err := hydrateConfig(context.Background(), sm, config, celContext) - if err != nil { - t.Fatalf("hydrateConfig() returned unexpected error: %v", err) - } - - if result["greeting"] != "Hello world" { - t.Errorf("greeting = %q, want 'Hello world'", result["greeting"]) - } - if result["static"] != "no templates here" { - t.Errorf("static = %q, want 'no templates here'", result["static"]) - } -} - -func TestHydrateConfig_NestedStorageRefInArray(t *testing.T) { - sm := mockHydratingStorageManager{ - replacements: map[string]any{ - "outputs/step-1/data.json": "content with {{ braces }}", - }, - } - - config := map[string]any{ - "items": []any{ - map[string]any{ - storage.StorageRefKey: "outputs/step-1/data.json", - }, - }, - "label": "safe value", - } - - celContext := map[string]any{} - - result, err := hydrateConfig(context.Background(), sm, config, celContext) - if err != nil { - t.Fatalf("hydrateConfig() returned unexpected error: %v", err) - } - - items, ok := result["items"].([]any) - if !ok { - t.Fatalf("items is %T, want []any", result["items"]) - } - if len(items) != 1 || items[0] != "content with {{ braces }}" { - t.Errorf("items = %v, want [\"content with {{ braces }}\"]", items) - } -} - -func TestHydrateConfig_NestedStorageRefDoesNotSuppressSiblingTemplates(t *testing.T) { - rawBody := "raw body with {{ braces }}" - sm := mockHydratingStorageManager{ - replacements: map[string]any{ - "outputs/step-1/body.txt": rawBody, - }, - } - - config := map[string]any{ - "request": map[string]any{ - "body": map[string]any{ - storage.StorageRefKey: "outputs/step-1/body.txt", - }, - "summary": "Summarize {{ .inputs.topic }}", - "labels": []any{"{{ .inputs.label }}"}, - }, - } - - celContext := map[string]any{ - "inputs": map[string]any{ - "topic": "release notes", - "label": "urgent", - }, - } - - result, err := hydrateConfig(context.Background(), sm, config, celContext) - if err != nil { - t.Fatalf("hydrateConfig() returned unexpected error: %v", err) - } - - request, ok := result["request"].(map[string]any) - if !ok { - t.Fatalf("request is %T, want map[string]any", result["request"]) - } - if request["body"] != rawBody { - t.Fatalf("request.body = %q, want %q", request["body"], rawBody) - } - if request["summary"] != "Summarize release notes" { - t.Fatalf("request.summary = %q, want %q", request["summary"], "Summarize release notes") - } - labels, ok := request["labels"].([]any) - if !ok { - t.Fatalf("request.labels is %T, want []any", request["labels"]) - } - if len(labels) != 1 || labels[0] != "urgent" { - t.Fatalf("request.labels = %v, want [urgent]", labels) - } -} - -func TestFindStorageRefPaths(t *testing.T) { - config := map[string]any{ - "plain": "hello", - "ref": map[string]any{ - storage.StorageRefKey: "outputs/step-1/data.json", - }, - "nested": map[string]any{ - "inner": map[string]any{ - storage.StorageRefKey: "outputs/step-2/data.json", - }, - }, - "list": []any{ - map[string]any{ - storage.StorageRefKey: "outputs/step-3/data.json", - }, - }, - "number": 42, - } - - paths := findStorageRefPaths(config) - - pathSet := make(map[string]bool, len(paths)) - for _, p := range paths { - pathSet[configPathString(p)] = true - } - - if !pathSet["ref"] { - t.Error("expected 'ref' in storage ref paths") - } - if !pathSet["nested.inner"] { - t.Error("expected 'nested.inner' in storage ref paths") - } - if !pathSet["list[0]"] { - t.Error("expected 'list[0]' in storage ref paths") - } - if pathSet["plain"] { - t.Error("'plain' should not be in storage ref paths") - } - if pathSet["number"] { - t.Error("'number' should not be in storage ref paths") - } -} - -func TestExtractAndRestorePaths(t *testing.T) { - m := map[string]any{ - "a": map[string]any{ - "inner": "value-a", - }, - "b": []any{"value-b"}, - "c": "value-c", - } - - extracted := extractPaths(m, []configPath{ - {{key: "a"}, {key: "inner"}}, - {{key: "b"}, {index: 0, isIndex: true}}, - }) - - if len(extracted) != 2 { - t.Fatalf("expected 2 extracted values, got %d", len(extracted)) - } - if got := m["a"].(map[string]any)["inner"]; got == "value-a" { - t.Fatalf("expected nested value to be replaced with placeholder, got %v", got) - } - if got := m["b"].([]any)[0]; got == "value-b" { - t.Fatalf("expected array value to be replaced with placeholder, got %v", got) - } - - restorePaths(m, extracted) - - if len(m) != 3 { - t.Errorf("after restore, map has %d keys, want 3", len(m)) - } - if got := m["a"].(map[string]any)["inner"]; got != "value-a" { - t.Errorf("restored a.inner = %v, want value-a", got) - } - if got := m["b"].([]any)[0]; got != "value-b" { - t.Errorf("restored b[0] = %v, want value-b", got) - } - if m["c"] != "value-c" { - t.Errorf("restored map = %v", m) - } -} - -func TestHydrateConfigRejectsCyclicConfig(t *testing.T) { - config := map[string]any{} - config["self"] = config - - _, err := hydrateConfig(context.Background(), mockHydratingStorageManager{}, config, nil) - if err == nil { - t.Fatal("expected cyclic config to be rejected") - } - if got := err.Error(); got == "" || !containsAll(got, "config rejected before hydration", "cycle") { - t.Fatalf("expected cycle rejection error, got %v", err) - } -} - -func TestHydrateConfigRejectsExcessiveDepth(t *testing.T) { - t.Setenv(configHydrationMaxDepthEnv, "2") - - config := map[string]any{ - "level1": map[string]any{ - "level2": map[string]any{ - "level3": "too deep", - }, - }, - } - - _, err := hydrateConfig(context.Background(), mockHydratingStorageManager{}, config, nil) - if err == nil { - t.Fatal("expected deep config to be rejected") - } - if got := err.Error(); got == "" || !containsAll(got, "config rejected before hydration", "max depth 2") { - t.Fatalf("expected depth rejection error, got %v", err) - } -} - -func TestHydrateConfigRejectsHydratedConfigThatExceedsNodeBudget(t *testing.T) { - t.Setenv(configHydrationMaxNodesEnv, "3") - - sm := mockHydratingStorageManager{ - replacements: map[string]any{ - "outputs/step-1/payload.json": map[string]any{ - "a": map[string]any{ - "b": "c", - }, - }, - }, - } - - config := map[string]any{ - "payload": map[string]any{ - storage.StorageRefKey: "outputs/step-1/payload.json", - }, - } - - _, err := hydrateConfig(context.Background(), sm, config, nil) - if err == nil { - t.Fatal("expected hydrated config node budget rejection") - } - if got := err.Error(); got == "" || !containsAll(got, "hydrated config rejected before template resolution", - "max nodes 3") { - t.Fatalf("expected hydrated node budget rejection, got %v", err) - } -} - -func containsAll(s string, substrs ...string) bool { - for _, substr := range substrs { - if !strings.Contains(s, substr) { - return false - } - } - return true -} - -func configPathString(path configPath) string { - if len(path) == 0 { - return "" - } - var out strings.Builder - for i, segment := range path { - if segment.isIndex { - out.WriteString(fmt.Sprintf("[%d]", segment.index)) - continue - } - if i > 0 { - out.WriteString(".") - } - out.WriteString(segment.key) - } - return out.String() -} diff --git a/conformance/batch.go b/conformance/batch.go deleted file mode 100644 index d98623e..0000000 --- a/conformance/batch.go +++ /dev/null @@ -1,100 +0,0 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package conformance - -import ( - "context" - "errors" - "fmt" - "testing" - - sdk "github.com/bubustack/bubu-sdk-go" - "github.com/bubustack/bubu-sdk-go/engram" - "github.com/bubustack/bubu-sdk-go/testkit" -) - -// BatchSuite defines conformance checks for a BatchEngram. -type BatchSuite[C any, I any] struct { - Engram engram.BatchEngram[C, I] - Context context.Context - Config C - Inputs I - Secrets map[string]string - StoryInfo engram.StoryInfo - CELContext map[string]any - ExpectError bool - RequireStructuredError bool - ValidateError func(error) error - ValidateResult func(*engram.Result) error -} - -// Run executes Init + Process and enforces the configured contract checks. -func (s BatchSuite[C, I]) Run(t testing.TB) { - t.Helper() - ctx := s.Context - if ctx == nil { - ctx = context.Background() - } - h := testkit.BatchHarness[C, I]{ - Engram: s.Engram, - Config: s.Config, - Inputs: s.Inputs, - Secrets: s.Secrets, - StoryInfo: s.StoryInfo, - CELContext: s.CELContext, - } - result, err := h.Run(ctx) - if err := s.validateOutcome(result, err); err != nil { - t.Fatal(err) - } -} - -func (s BatchSuite[C, I]) validateOutcome(result *engram.Result, err error) error { - expectError := s.ExpectError || s.RequireStructuredError - if err != nil { - if !expectError { - return fmt.Errorf("engram run failed unexpectedly: %w", err) - } - if s.RequireStructuredError { - var provider sdk.StructuredErrorProvider - if !errors.As(err, &provider) { - return fmt.Errorf("expected structured error, got %v", err) - } - if err := testkit.ValidateStructuredError(provider.StructuredError()); err != nil { - return fmt.Errorf("invalid structured error: %w", err) - } - } - if s.ValidateError != nil { - if err := s.ValidateError(err); err != nil { - return fmt.Errorf("error validation failed: %w", err) - } - } - return nil - } - if expectError { - if s.RequireStructuredError { - return fmt.Errorf("expected structured error, but engram completed successfully") - } - return fmt.Errorf("expected error, but engram completed successfully") - } - if s.ValidateResult != nil { - if err := s.ValidateResult(result); err != nil { - return fmt.Errorf("result validation failed: %w", err) - } - } - return nil -} diff --git a/conformance/conformance_test.go b/conformance/conformance_test.go deleted file mode 100644 index f1b902b..0000000 --- a/conformance/conformance_test.go +++ /dev/null @@ -1,292 +0,0 @@ -package conformance - -import ( - "context" - "errors" - "testing" - - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - sdk "github.com/bubustack/bubu-sdk-go" - "github.com/bubustack/bubu-sdk-go/engram" -) - -type contextKey string - -type testBatchEngram struct{} - -type testConfig struct{} - -type testInputs struct{} - -func (t *testBatchEngram) Init(ctx context.Context, cfg testConfig, secrets *engram.Secrets) error { - return nil -} - -func (t *testBatchEngram) Process(ctx context.Context, execCtx *engram.ExecutionContext, inputs testInputs) (*engram.Result, error) { //nolint:lll - return engram.NewResultFrom(map[string]any{"ok": true}), nil -} - -type ctxCheckingBatchEngram struct { - key contextKey - gotValue any -} - -func (e *ctxCheckingBatchEngram) Init(ctx context.Context, cfg testConfig, secrets *engram.Secrets) error { - e.gotValue = ctx.Value(e.key) - return nil -} - -func (e *ctxCheckingBatchEngram) Process(ctx context.Context, execCtx *engram.ExecutionContext, inputs testInputs) (*engram.Result, error) { //nolint:lll - e.gotValue = ctx.Value(e.key) - return engram.NewResultFrom(map[string]any{"ok": true}), nil -} - -func TestBatchSuiteRun(t *testing.T) { - suite := BatchSuite[testConfig, testInputs]{ - Engram: &testBatchEngram{}, - } - suite.Run(t) -} - -func TestBatchSuiteRun_UsesProvidedContext(t *testing.T) { - key := contextKey("tenant") - ctx := context.WithValue(context.Background(), key, "content-digest") - eng := &ctxCheckingBatchEngram{key: key} - - BatchSuite[testConfig, testInputs]{ - Engram: eng, - Context: ctx, - }.Run(t) - - if eng.gotValue != "content-digest" { - t.Fatalf("expected provided context value to propagate, got %v", eng.gotValue) - } -} - -type errorBatchEngram struct{} - -func (t *errorBatchEngram) Init(ctx context.Context, cfg testConfig, secrets *engram.Secrets) error { - return nil -} - -func (t *errorBatchEngram) Process(ctx context.Context, execCtx *engram.ExecutionContext, inputs testInputs) (*engram.Result, error) { //nolint:lll - return nil, errors.New("boom") -} - -type structuredErrorBatchEngram struct{} - -func (t *structuredErrorBatchEngram) Init(ctx context.Context, cfg testConfig, secrets *engram.Secrets) error { - return nil -} - -func (t *structuredErrorBatchEngram) Process(ctx context.Context, execCtx *engram.ExecutionContext, inputs testInputs) (*engram.Result, error) { //nolint:lll - return nil, sdk.NewStructuredError(runsv1alpha1.StructuredErrorTypeExecution, "boom") -} - -func TestBatchSuiteRun_FailsOnUnexpectedError(t *testing.T) { - err := BatchSuite[testConfig, testInputs]{ - Engram: &errorBatchEngram{}, - }.validateOutcome(nil, errors.New("boom")) - if err == nil { - t.Fatal("expected BatchSuite to fail on unexpected errors") - } -} - -func TestBatchSuiteRun_FailsWhenStructuredErrorRequiredButRunSucceeds(t *testing.T) { - err := BatchSuite[testConfig, testInputs]{ - Engram: &testBatchEngram{}, - RequireStructuredError: true, - }.validateOutcome(engram.NewResultFrom(map[string]any{"ok": true}), nil) - if err == nil { - t.Fatal("expected BatchSuite to fail when a structured error was required but execution succeeded") - } -} - -func TestBatchSuiteRun_AllowsStructuredErrorWhenRequired(t *testing.T) { - err := BatchSuite[testConfig, testInputs]{ - Engram: &structuredErrorBatchEngram{}, - RequireStructuredError: true, - }.validateOutcome(nil, sdk.NewStructuredError(runsv1alpha1.StructuredErrorTypeExecution, "boom")) - if err != nil { - t.Fatalf("expected BatchSuite to accept structured errors when required, got: %v", err) - } -} - -func TestBatchSuiteRun_AllowsExpectedPlainError(t *testing.T) { - err := BatchSuite[testConfig, testInputs]{ - Engram: &errorBatchEngram{}, - ExpectError: true, - }.validateOutcome(nil, errors.New("boom")) - if err != nil { - t.Fatalf("expected BatchSuite to accept expected plain errors, got: %v", err) - } -} - -func TestBatchSuiteRun_ValidatesExpectedError(t *testing.T) { - var validated bool - err := BatchSuite[testConfig, testInputs]{ - Engram: &errorBatchEngram{}, - ExpectError: true, - ValidateError: func(err error) error { - validated = true - if err == nil || err.Error() != "boom" { - t.Fatalf("unexpected error passed to ValidateError: %v", err) - } - return nil - }, - }.validateOutcome(nil, errors.New("boom")) - if err != nil { - t.Fatalf("expected BatchSuite to accept validated error, got: %v", err) - } - if !validated { - t.Fatal("expected ValidateError to run") - } -} - -type testStreamEngram struct{} - -type streamConfig struct{} - -func (t *testStreamEngram) Init(ctx context.Context, cfg streamConfig, secrets *engram.Secrets) error { - return nil -} - -func (t *testStreamEngram) Stream(ctx context.Context, in <-chan engram.InboundMessage, out chan<- engram.StreamMessage) error { //nolint:lll - for msg := range in { - out <- msg.StreamMessage - msg.Done() - } - return nil -} - -type ctxCheckingStreamEngram struct { - key contextKey - gotValue any -} - -func (e *ctxCheckingStreamEngram) Init(ctx context.Context, cfg streamConfig, secrets *engram.Secrets) error { - e.gotValue = ctx.Value(e.key) - return nil -} - -func (e *ctxCheckingStreamEngram) Stream(ctx context.Context, in <-chan engram.InboundMessage, out chan<- engram.StreamMessage) error { //nolint:lll - e.gotValue = ctx.Value(e.key) - for msg := range in { - msg.Done() - } - return nil -} - -func TestStreamSuiteRun(t *testing.T) { - suite := StreamSuite[streamConfig]{ - Engram: &testStreamEngram{}, - Inputs: []engram.StreamMessage{{Payload: []byte(`{"ok":true}`)}}, - RequireValidMessages: true, - } - suite.Run(t) -} - -func TestStreamSuiteRun_UsesProvidedContext(t *testing.T) { - key := contextKey("tenant") - ctx := context.WithValue(context.Background(), key, "content-digest") - eng := &ctxCheckingStreamEngram{key: key} - - StreamSuite[streamConfig]{ - Engram: eng, - Context: ctx, - Inputs: []engram.StreamMessage{{Payload: []byte(`{"ok":true}`)}}, - RequireValidMessages: true, - }.Run(t) - - if eng.gotValue != "content-digest" { - t.Fatalf("expected provided context value to propagate, got %v", eng.gotValue) - } -} - -func TestStreamSuiteValidateOutputContract_DefaultAllowsEmpty(t *testing.T) { - err := (StreamSuite[streamConfig]{}).validateOutputContract(nil) - if err != nil { - t.Fatalf("expected default stream output contract to allow empty output, got: %v", err) - } -} - -func TestStreamSuiteValidateOutputContract_RequireNonEmptyOutput(t *testing.T) { - err := (StreamSuite[streamConfig]{RequireNonEmptyOutput: true}).validateOutputContract(nil) - if err == nil { - t.Fatal("expected stream output contract to reject empty output when RequireNonEmptyOutput=true") - } -} - -func TestStreamSuiteValidateOutputContract_MinOutputCount(t *testing.T) { - outputs := []engram.StreamMessage{{Payload: []byte(`{"ok":true}`)}} - err := (StreamSuite[streamConfig]{MinOutputCount: 2}).validateOutputContract(outputs) - if err == nil { - t.Fatal("expected stream output contract to reject outputs below MinOutputCount") - } -} - -func TestStreamSuiteValidateOutputContract_MinOutputCountSatisfied(t *testing.T) { - outputs := []engram.StreamMessage{ - {Payload: []byte(`{"ok":true}`)}, - {Payload: []byte(`{"ok":true}`)}, - } - err := (StreamSuite[streamConfig]{RequireNonEmptyOutput: true, MinOutputCount: 2}).validateOutputContract(outputs) - if err != nil { - t.Fatalf("expected stream output contract to pass when requirements are met, got: %v", err) - } -} - -func TestStreamSuiteRun_RequiresAllInputsDone(t *testing.T) { - err := (StreamSuite[streamConfig]{ - Inputs: []engram.StreamMessage{{Payload: []byte(`{"ok":true}`)}}, - RequireAllInputsDone: true, - }).validateInputAcknowledgements(0) - if err == nil { - t.Fatal("expected input acknowledgement validation to fail when inputs are not acknowledged") - } -} - -func TestStreamSuiteValidateOutcome_FailsOnUnexpectedError(t *testing.T) { - err := (StreamSuite[streamConfig]{}).validateOutcome(nil, errors.New("boom"), 0) - if err == nil { - t.Fatal("expected StreamSuite to fail on unexpected errors") - } -} - -func TestStreamSuiteValidateOutcome_AllowsExpectedPlainError(t *testing.T) { - err := (StreamSuite[streamConfig]{ExpectError: true}).validateOutcome(nil, errors.New("boom"), 0) - if err != nil { - t.Fatalf("expected StreamSuite to accept expected plain errors, got: %v", err) - } -} - -func TestStreamSuiteValidateOutcome_AllowsStructuredErrorWhenRequired(t *testing.T) { - err := (StreamSuite[streamConfig]{RequireStructuredError: true}).validateOutcome( - nil, - sdk.NewStructuredError(runsv1alpha1.StructuredErrorTypeExecution, "boom"), - 0, - ) - if err != nil { - t.Fatalf("expected StreamSuite to accept structured errors when required, got: %v", err) - } -} - -func TestStreamSuiteValidateOutcome_ValidatesExpectedError(t *testing.T) { - var validated bool - err := (StreamSuite[streamConfig]{ - ExpectError: true, - ValidateError: func(err error) error { - validated = true - if err == nil || err.Error() != "boom" { - t.Fatalf("unexpected error passed to ValidateError: %v", err) - } - return nil - }, - }).validateOutcome(nil, errors.New("boom"), 0) - if err != nil { - t.Fatalf("expected StreamSuite to accept validated error, got: %v", err) - } - if !validated { - t.Fatal("expected ValidateError to run") - } -} diff --git a/conformance/stream.go b/conformance/stream.go deleted file mode 100644 index ca107db..0000000 --- a/conformance/stream.go +++ /dev/null @@ -1,157 +0,0 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package conformance - -import ( - "context" - "errors" - "fmt" - "sync/atomic" - "testing" - - sdk "github.com/bubustack/bubu-sdk-go" - "github.com/bubustack/bubu-sdk-go/engram" - "github.com/bubustack/bubu-sdk-go/testkit" -) - -// StreamSuite defines conformance checks for a StreamingEngram. -type StreamSuite[C any] struct { - Engram engram.StreamingEngram[C] - Context context.Context - Config C - Inputs []engram.StreamMessage - Secrets map[string]string - StoryInfo engram.StoryInfo - CELContext map[string]any - RequireValidMessages bool - RequireNonEmptyOutput bool - MinOutputCount int - RequireAllInputsDone bool - ExpectError bool - RequireStructuredError bool - ValidateError func(error) error - ValidateOutputMessage func(engram.StreamMessage) error -} - -// Run executes Init + Stream and enforces the configured contract checks. -func (s StreamSuite[C]) Run(t testing.TB) { - t.Helper() - ctx := s.Context - if ctx == nil { - ctx = context.Background() - } - var inputDoneCount atomic.Int32 - h := testkit.StreamHarness[C]{ - Engram: s.Engram, - Config: s.Config, - Inputs: s.Inputs, - Secrets: s.Secrets, - StoryInfo: s.StoryInfo, - CELContext: s.CELContext, - OnInputProcessed: func(engram.StreamMessage) { - inputDoneCount.Add(1) - }, - } - outputs, err := h.Run(ctx) - if err := s.validateOutcome(outputs, err, int(inputDoneCount.Load())); err != nil { - t.Fatal(err) - } -} - -func (s StreamSuite[C]) validateOutputContract(outputs []engram.StreamMessage) error { - minOutputCount := s.MinOutputCount - if s.RequireNonEmptyOutput && minOutputCount < 1 { - minOutputCount = 1 - } - if minOutputCount > 0 && len(outputs) < minOutputCount { - return fmt.Errorf("expected at least %d stream output message(s), got %d", minOutputCount, len(outputs)) - } - return nil -} - -func (s StreamSuite[C]) validateInputAcknowledgements(doneCount int) error { - if !s.RequireAllInputsDone { - return nil - } - if doneCount != len(s.Inputs) { - return fmt.Errorf("expected %d input Done() call(s), got %d", len(s.Inputs), doneCount) - } - return nil -} - -func (s StreamSuite[C]) validateOutputMessages(outputs []engram.StreamMessage) error { - for _, msg := range outputs { - if s.RequireValidMessages { - if err := testkit.ValidateStreamMessage(msg); err != nil { - return fmt.Errorf("invalid stream message: %w", err) - } - } - if s.ValidateOutputMessage != nil { - if err := s.ValidateOutputMessage(msg); err != nil { - return fmt.Errorf("stream output validation failed: %w", err) - } - } - } - return nil -} - -//nolint:gocyclo,lll -func (s StreamSuite[C]) validateOutcome(outputs []engram.StreamMessage, err error, doneCount int) error { - expectError := s.ExpectError || s.RequireStructuredError - if err != nil { - if !expectError { - return fmt.Errorf("stream run failed unexpectedly: %w", err) - } - if s.RequireStructuredError { - var provider sdk.StructuredErrorProvider - if !errors.As(err, &provider) { - return fmt.Errorf("expected structured error, got %v", err) - } - if err := testkit.ValidateStructuredError(provider.StructuredError()); err != nil { - return fmt.Errorf("invalid structured error: %w", err) - } - } - if s.ValidateError != nil { - if err := s.ValidateError(err); err != nil { - return fmt.Errorf("error validation failed: %w", err) - } - } - if err := s.validateOutputContract(outputs); err != nil { - return fmt.Errorf("output validation failed (expected error path): %w", err) - } - if err := s.validateOutputMessages(outputs); err != nil { - return fmt.Errorf("output validation failed (expected error path): %w", err) - } - return nil - } - if expectError { - if s.RequireStructuredError { - return fmt.Errorf("expected structured error, but stream completed successfully") - } - return fmt.Errorf("expected error, but stream completed successfully") - } - if err := s.validateOutputContract(outputs); err != nil { - return fmt.Errorf("stream output contract failed: %w", err) - } - if err := s.validateInputAcknowledgements(doneCount); err != nil { - return fmt.Errorf("stream input completion contract failed: %w", err) - } - if err := s.validateOutputMessages(outputs); err != nil { - return err - } - return nil -} diff --git a/conformance/stream_error_test.go b/conformance/stream_error_test.go deleted file mode 100644 index b360e84..0000000 --- a/conformance/stream_error_test.go +++ /dev/null @@ -1,76 +0,0 @@ -package conformance - -import ( - "context" - "testing" - - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - sdk "github.com/bubustack/bubu-sdk-go" - "github.com/bubustack/bubu-sdk-go/engram" -) - -type errorStreamEngram struct{} - -func (e *errorStreamEngram) Init(ctx context.Context, cfg streamConfig, secrets *engram.Secrets) error { - return nil -} - -func (e *errorStreamEngram) Stream(ctx context.Context, in <-chan engram.InboundMessage, out chan<- engram.StreamMessage) error { //nolint:lll - _ = in - msg, err := sdk.NewStreamErrorMessage(runsv1alpha1.StructuredError{ - Version: runsv1alpha1.StructuredErrorVersionV1, - Type: runsv1alpha1.StructuredErrorTypeExecution, - Message: "stream error", - }) - if err != nil { - return err - } - out <- msg - return nil -} - -func TestStreamSuiteErrorEnvelope(t *testing.T) { - suite := StreamSuite[streamConfig]{ - Engram: &errorStreamEngram{}, - Inputs: []engram.StreamMessage{{Payload: []byte(`{"ok":true}`)}}, - RequireValidMessages: true, - } - suite.Run(t) -} - -type invalidOutputOnErrorStreamEngram struct{} - -func (e *invalidOutputOnErrorStreamEngram) Init(ctx context.Context, cfg streamConfig, secrets *engram.Secrets) error { - return nil -} - -func (e *invalidOutputOnErrorStreamEngram) Stream( - ctx context.Context, - in <-chan engram.InboundMessage, - out chan<- engram.StreamMessage, -) error { - _ = ctx - _ = in - out <- engram.StreamMessage{} - return sdk.NewStructuredError( - runsv1alpha1.StructuredErrorTypeExecution, - "stream failed", - ) -} - -func TestStreamSuiteExpectedErrorStillValidatesOutputs(t *testing.T) { - suite := StreamSuite[streamConfig]{ - Engram: &invalidOutputOnErrorStreamEngram{}, - Inputs: []engram.StreamMessage{{Payload: []byte(`{"ok":true}`)}}, - RequireValidMessages: true, - ExpectError: true, - } - - err := suite.validateOutcome([]engram.StreamMessage{{}}, sdk.NewStructuredError( - runsv1alpha1.StructuredErrorTypeExecution, - "stream failed", - ), 0) - if err == nil { - t.Fatal("expected suite validation to fail for invalid output on error path") - } -} diff --git a/debug.go b/debug.go deleted file mode 100644 index 9b667a1..0000000 --- a/debug.go +++ /dev/null @@ -1,135 +0,0 @@ -package sdk - -import ( - "encoding/json" - "fmt" - "log/slog" - "os" - "sort" - "strings" - - "github.com/bubustack/bubu-sdk-go/runtime" - "github.com/bubustack/core/contracts" -) - -const debugPreviewLimit = 2048 - -var truthyDebugValues = map[string]struct{}{ - "1": {}, - "true": {}, - "t": {}, - "yes": {}, - "y": {}, - "on": {}, - "debug": {}, -} - -func isDebugEnabled() bool { - raw := strings.TrimSpace(os.Getenv(contracts.DebugEnv)) - if raw == "" { - return false - } - _, ok := truthyDebugValues[strings.ToLower(raw)] - return ok -} - -// DebugModeEnabled reports whether verbose logging should be forced regardless of logger level. -func DebugModeEnabled() bool { - return isDebugEnabled() -} - -func newDefaultLogger() *slog.Logger { - opts := &slog.HandlerOptions{} - if isDebugEnabled() { - opts.Level = slog.LevelDebug - } - return slog.New(slog.NewJSONHandler(os.Stdout, opts)) -} - -func logExecutionContextDebug(logger *slog.Logger, data *runtime.ExecutionContextData) { - if !isDebugEnabled() || logger == nil || data == nil { - return - } - - attrs := []slog.Attr{ - slog.String("story", data.StoryInfo.StoryName), - slog.String("storyRun", data.StoryInfo.StoryRunID), - slog.String("step", data.StoryInfo.StepName), - slog.String("stepRun", data.StoryInfo.StepRunID), - slog.String("namespace", data.StoryInfo.StepRunNamespace), - slog.String("mode", data.Execution.Mode), - slog.Time("startedAt", data.StartedAt.Time), - } - if len(data.Inputs) > 0 { - attrs = append(attrs, - slog.Int("inputFields", len(data.Inputs)), - debugPreviewAttr("inputsPreview", data.Inputs), - ) - } - if len(data.Config) > 0 { - attrs = append(attrs, - slog.Int("configFields", len(data.Config)), - debugPreviewAttr("configPreview", data.Config), - ) - } - if len(data.Transports) > 0 { - names := make([]string, 0, len(data.Transports)) - for _, t := range data.Transports { - if strings.TrimSpace(t.Name) != "" { - names = append(names, strings.TrimSpace(t.Name)) - } - } - if len(names) > 0 { - sort.Strings(names) - attrs = append(attrs, slog.Any("transports", names)) - } - } - if len(data.Secrets) > 0 { - attrs = append(attrs, secretKeysAttr("secretKeys", data.Secrets)) - } - if data.Storage != nil { - attrs = append(attrs, slog.String("storageProvider", data.Storage.Provider)) - } - args := make([]any, 0, len(attrs)) - for _, attr := range attrs { - args = append(args, attr) - } - logger.Debug("Execution context hydrated", args...) -} - -func debugPreviewAttr(key string, value any) slog.Attr { - if value == nil { - return slog.String(key, "") - } - payload, err := json.Marshal(value) - if err != nil { - return slog.String(key, fmt.Sprintf("", err)) - } - return slog.String(key, truncateDebugPayload(payload)) -} - -func debugBytesAttr(key string, data []byte) slog.Attr { - if len(data) == 0 { - return slog.String(key, "") - } - return slog.String(key, truncateDebugPayload(data)) -} - -func secretKeysAttr(key string, secrets map[string]string) slog.Attr { - if len(secrets) == 0 { - return slog.Any(key, []string{}) - } - keys := make([]string, 0, len(secrets)) - for k := range secrets { - keys = append(keys, k) - } - sort.Strings(keys) - return slog.Any(key, keys) -} - -func truncateDebugPayload(data []byte) string { - if len(data) <= debugPreviewLimit { - return string(data) - } - return string(data[:debugPreviewLimit]) + "...(truncated)" -} diff --git a/effects.go b/effects.go deleted file mode 100644 index 49fe46e..0000000 --- a/effects.go +++ /dev/null @@ -1,774 +0,0 @@ -package sdk - -import ( - context "context" - "crypto/sha256" - "encoding/json" - "errors" - "fmt" - "os" - "strings" - "sync" - "time" - - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - "github.com/bubustack/bobrapet/pkg/refs" - runsidentity "github.com/bubustack/bobrapet/pkg/runs/identity" - "github.com/bubustack/bubu-sdk-go/k8s" - sdkenv "github.com/bubustack/bubu-sdk-go/pkg/env" - "github.com/bubustack/core/contracts" - "github.com/google/uuid" - apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -// ErrEffectsUnavailable indicates that the current process cannot record effects (e.g., -// it is not running inside a StepRun workload). Callers may treat this as a soft -// failure and continue without recording the effect. -var ErrEffectsUnavailable = errors.New("effect recording unavailable: not running inside a StepRun") - -// ErrEffectAlreadyRecorded indicates that the requested effect key is already -// present in the current StepRun effect ledger. -var ErrEffectAlreadyRecorded = errors.New("effect already recorded") - -const ( - effectPatchTimeout = 3 * time.Second - defaultMaxEffectDetailBytes = 8 * 1024 - effectMaxDetailsBytesEnv = "BUBU_EFFECT_MAX_DETAILS_BYTES" - defaultEffectLeaseDuration = 10 * time.Minute -) - -var ( - effectEmitterMu sync.Mutex - effectEmitterInst *effectEmitter - effectEmitterErr error // only set for permanent errors (missing env) - effectClientFactory = func() (effectPatcher, error) { return k8s.SharedClient() } - effectReaderFactory = func() (effectReader, error) { return k8s.SharedClient() } - effectClaimFactory = func() (effectClaimClient, error) { return k8s.SharedClient() } - effectLeaseDuration = defaultEffectLeaseDuration - effectLeaseRenewIntervalFunc = func(duration time.Duration) time.Duration { - if duration <= 0 { - duration = defaultEffectLeaseDuration - } - interval := max(min(duration/3, 30*time.Second), 100*time.Millisecond) - return interval - } - effectExecMu sync.Mutex - effectExecGates = make(map[string]*effectExecutionGate) -) - -type effectPatcher interface { - PatchStepRunStatus(ctx context.Context, stepRunName string, status runsv1alpha1.StepRunStatus) error -} - -type effectReader interface { - client.Reader - GetNamespace() string -} - -type effectClaimClient interface { - client.Reader - client.Writer - GetNamespace() string -} - -type effectEmitter struct { - client effectPatcher - stepRunID string -} - -type effectExecutionGate struct { - mu sync.Mutex - refs int - completed bool -} - -type effectExecutionLease struct { - key string - gate *effectExecutionGate -} - -type effectReservation struct { - client effectClaimClient - claim *runsv1alpha1.EffectClaim - holderIdentity string -} - -type effectReservationRenewer struct { - stopOnce sync.Once - stopCh chan struct{} - doneCh chan struct{} - - mu sync.Mutex - err error -} - -// RecordEffect appends an effect record to the current StepRun status ledger. -// The effect sequence is assigned server-side when Seq is 0. -func RecordEffect(ctx context.Context, key, status string, details any) error { - return recordEffect(ctx, key, status, details, 0) -} - -// HasEffect returns true if the current StepRun already recorded an effect for the key. -func HasEffect(ctx context.Context, key string) (bool, error) { - key = strings.TrimSpace(key) - if key == "" { - return false, fmt.Errorf("effect key is required") - } - stepRun, _, err := getCurrentStepRun(ctx) - if err != nil { - return false, err - } - for _, eff := range stepRun.Status.Effects { - if strings.TrimSpace(eff.Key) == key { - return true, nil - } - } - return false, nil -} - -// ExecuteEffectOnce runs fn only if the effect key has not been recorded yet. -// It records a successful effect with the returned details. When the effect -// already exists, it returns `already=true` with `ErrEffectAlreadyRecorded`. -func ExecuteEffectOnce(ctx context.Context, key string, fn func(context.Context) (any, error)) (any, bool, error) { - key = strings.TrimSpace(key) - if key == "" { - return nil, false, fmt.Errorf("effect key is required") - } - if fn == nil { - return nil, false, fmt.Errorf("effect function is required") - } - lease, already := acquireEffectExecutionLease(effectExecutionKey(key)) - if already { - return nil, true, ErrEffectAlreadyRecorded - } - completed := false - defer func() { - releaseEffectExecutionLease(lease, completed) - }() - reservation, already, err := reserveEffect(ctx, key) - if err != nil { - return nil, false, err - } - if already { - completed = true - return nil, true, ErrEffectAlreadyRecorded - } - execCtx := ctx - execCancel := func() {} //nolint:ineffassign,staticcheck - if execCtx == nil { - execCtx = context.Background() - } - execCtx, execCancel = context.WithCancel(execCtx) - defer execCancel() - - stopRenewal := startEffectReservationRenewal(reservation, execCancel) - result, err := fn(execCtx) - renewErr := stopRenewal() - if err != nil { - _ = releaseEffectReservation(ctx, reservation) - if renewErr != nil { - err = errors.Join(err, renewErr) - } - return result, false, err - } - if renewErr != nil { - _ = releaseEffectReservation(ctx, reservation) - return result, false, renewErr - } - // Mark the in-process gate as completed as soon as the effect function - // succeeds so retries do not execute the side effect again if persistence - // fails afterward. - completed = true - if err := completeEffectReservation(ctx, reservation, result); err != nil { - return result, false, err - } - if err := RecordEffect(ctx, key, "succeeded", result); err != nil { - return result, false, err - } - return result, false, nil -} - -func recordEffect(ctx context.Context, key, status string, details any, seq uint64) error { - key = strings.TrimSpace(key) - if key == "" { - return fmt.Errorf("effect key is required") - } - emitter, err := getEffectEmitter() - if err != nil { - return err - } - - detailRaw, err := marshalEffectDetails(details) - if err != nil { - return err - } - - now := metav1.NewTime(time.Now().UTC()) - patch := runsv1alpha1.StepRunStatus{ - Effects: []runsv1alpha1.EffectRecord{{ - Seq: seq, - Key: key, - Status: strings.TrimSpace(status), - EmittedAt: &now, - Details: detailRaw, - }}, - } - return emitter.emit(ctx, patch) -} - -func marshalEffectDetails(details any) (*runtime.RawExtension, error) { - if details == nil { - return nil, nil - } - raw, err := json.Marshal(details) - if err != nil { - return nil, fmt.Errorf("failed to marshal effect details: %w", err) - } - maxBytes := resolveEffectMaxDetailsBytes() - if maxBytes > 0 && len(raw) > maxBytes { - details = truncateEffectDetails(details, raw) - raw, err = json.Marshal(details) - if err != nil { - return nil, fmt.Errorf("failed to marshal truncated effect details: %w", err) - } - if len(raw) > maxBytes { - return nil, fmt.Errorf("truncated effect details exceed %d bytes", maxBytes) - } - } - value := runtime.RawExtension{Raw: raw} - return &value, nil -} - -func truncateEffectDetails(value any, raw []byte) map[string]any { - meta := map[string]any{ - "truncated": true, - "sizeBytes": len(raw), - } - if len(raw) > 0 { - sum := sha256.Sum256(raw) - meta["sha256"] = fmt.Sprintf("%x", sum) - } - kind, details := signalTypeSummary(value) - meta["type"] = kind - if len(details) > 0 { - meta["details"] = details - } - return meta -} - -func resolveEffectMaxDetailsBytes() int { - return sdkenv.GetInt(effectMaxDetailsBytesEnv, defaultMaxEffectDetailBytes) -} - -func getCurrentStepRun(ctx context.Context) (*runsv1alpha1.StepRun, string, error) { - stepRunName := strings.TrimSpace(os.Getenv(contracts.StepRunNameEnv)) - if stepRunName == "" { - return nil, "", ErrEffectsUnavailable - } - reader, err := effectReaderFactory() - if err != nil { - return nil, "", fmt.Errorf("failed to initialize effect reader: %w", err) - } - namespace := strings.TrimSpace(os.Getenv(contracts.StepRunNamespaceEnv)) - if namespace == "" { - namespace = strings.TrimSpace(reader.GetNamespace()) - } - if namespace == "" { - return nil, "", ErrEffectsUnavailable - } - if ctx == nil { - ctx = context.Background() - } - ctx, cancel := context.WithTimeout(ctx, effectPatchTimeout) - defer cancel() - - var stepRun runsv1alpha1.StepRun - if err := reader.Get(ctx, types.NamespacedName{Name: stepRunName, Namespace: namespace}, &stepRun); err != nil { - return nil, "", fmt.Errorf("failed to fetch StepRun for effect lookup: %w", err) - } - return &stepRun, namespace, nil -} - -func effectExecutionKey(key string) string { - return strings.TrimSpace(os.Getenv(contracts.StepRunNamespaceEnv)) + - ":" + strings.TrimSpace(os.Getenv(contracts.StepRunNameEnv)) + - ":" + strings.TrimSpace(key) -} - -func acquireEffectExecutionLease(key string) (*effectExecutionLease, bool) { - effectExecMu.Lock() - gate := effectExecGates[key] - if gate == nil { - gate = &effectExecutionGate{} - effectExecGates[key] = gate - } - gate.refs++ - effectExecMu.Unlock() - - gate.mu.Lock() - if gate.completed { - effectExecMu.Lock() - gate.refs-- - if gate.refs == 0 { - delete(effectExecGates, key) - } - effectExecMu.Unlock() - gate.mu.Unlock() - return nil, true - } - return &effectExecutionLease{key: key, gate: gate}, false -} - -func releaseEffectExecutionLease(lease *effectExecutionLease, completed bool) { - if lease == nil || lease.gate == nil { - return - } - effectExecMu.Lock() - if completed { - lease.gate.completed = true - } - lease.gate.refs-- - if lease.gate.refs == 0 { - delete(effectExecGates, lease.key) - } - effectExecMu.Unlock() - lease.gate.mu.Unlock() -} - -func reserveEffect(ctx context.Context, key string) (*effectReservation, bool, error) { - stepRun, namespace, err := getCurrentStepRun(ctx) - if err != nil { - return nil, false, err - } - for _, eff := range stepRun.Status.Effects { - if strings.TrimSpace(eff.Key) == key { - return nil, true, nil - } - } - claimClient, err := effectClaimFactory() - if err != nil { - return nil, false, fmt.Errorf("failed to initialize effect reservation client: %w", err) - } - if ctx == nil { - ctx = context.Background() - } - ctx, cancel := context.WithTimeout(ctx, effectPatchTimeout) - defer cancel() - - claim := newEffectReservationClaim(stepRun, namespace, key) - if err := claimClient.Create(ctx, claim); err == nil { - return &effectReservation{ - client: claimClient, - claim: claim, - holderIdentity: strings.TrimSpace(claim.Spec.HolderIdentity), - }, false, nil - } else if !apierrors.IsAlreadyExists(err) { - return nil, false, fmt.Errorf("failed to create effect reservation: %w", err) - } - - existing := &runsv1alpha1.EffectClaim{} - if err := claimClient.Get(ctx, types.NamespacedName{Name: claim.Name, Namespace: claim.Namespace}, existing); err != nil { //nolint:lll - return nil, false, fmt.Errorf("failed to fetch effect reservation: %w", err) - } - if !effectReservationMatchesStepRun(existing, stepRun, key) { - return nil, false, fmt.Errorf("existing effect reservation %s/%s does not match StepRun/effect identity", existing.Namespace, existing.Name) //nolint:lll - } - if effectReservationState(existing) == runsv1alpha1.EffectClaimCompletionStatusCompleted { - return nil, true, nil - } - if effectReservationAvailableForReuse(existing) { - return acquireExistingEffectReservation(ctx, claimClient, existing, stepRun, namespace, key, false) - } - if effectReservationIsStale(existing, time.Now().UTC()) { - return acquireExistingEffectReservation(ctx, claimClient, existing, stepRun, namespace, key, true) - } - return nil, true, nil -} - -func newEffectReservationClaim(stepRun *runsv1alpha1.StepRun, namespace, key string) *runsv1alpha1.EffectClaim { - now := metav1.NewMicroTime(time.Now().UTC()) - durationSeconds := int32(effectLeaseDuration / time.Second) - holderIdentity := newEffectReservationHolderIdentity(namespace, stepRun.Name, key) - idempotencyKey := strings.TrimSpace(stepRun.Spec.IdempotencyKey) - claim := &runsv1alpha1.EffectClaim{ - ObjectMeta: metav1.ObjectMeta{ - Name: effectReservationClaimName(namespace, stepRun.Name, key), - Namespace: namespace, - OwnerReferences: []metav1.OwnerReference{{ - APIVersion: runsv1alpha1.GroupVersion.String(), - Kind: "StepRun", - Name: stepRun.Name, - UID: stepRun.UID, - }}, - }, - Spec: runsv1alpha1.EffectClaimSpec{ - StepRunRef: refs.StepRunReference{ - ObjectReference: refs.ObjectReference{Name: stepRun.Name}, - UID: &stepRun.UID, - }, - EffectKey: key, - IdempotencyKey: idempotencyKey, - HolderIdentity: holderIdentity, - AcquireTime: &now, - RenewTime: &now, - LeaseDurationSeconds: durationSeconds, - }, - } - return claim -} - -func effectReservationClaimName(namespace, stepRunName, key string) string { - return runsidentity.DeriveEffectClaimName(namespace, stepRunName, key) -} - -func effectReservationState(claim *runsv1alpha1.EffectClaim) runsv1alpha1.EffectClaimCompletionStatus { - if claim == nil { - return "" - } - return claim.Spec.CompletionStatus -} - -func newEffectReservationHolderIdentity(namespace, stepRunName, key string) string { - return fmt.Sprintf("%s:%s:%s:%s", namespace, stepRunName, key, uuid.NewString()) -} - -func effectReservationAvailableForReuse(claim *runsv1alpha1.EffectClaim) bool { - if claim == nil { - return false - } - if effectReservationState(claim) == runsv1alpha1.EffectClaimCompletionStatusCompleted { - return false - } - return strings.TrimSpace(claim.Spec.HolderIdentity) == "" -} - -func effectReservationIsStale(claim *runsv1alpha1.EffectClaim, now time.Time) bool { - if claim == nil { - return false - } - expiresAt := effectReservationExpiresAt(claim) - if expiresAt.IsZero() { - return false - } - return !now.Before(expiresAt) -} - -func effectReservationExpiresAt(claim *runsv1alpha1.EffectClaim) time.Time { - if claim == nil { - return time.Time{} - } - duration := effectLeaseDuration - if claim.Spec.LeaseDurationSeconds > 0 { - duration = time.Duration(claim.Spec.LeaseDurationSeconds) * time.Second - } - switch { - case claim.Spec.RenewTime != nil: - return claim.Spec.RenewTime.Add(duration) - case claim.Spec.AcquireTime != nil: - return claim.Spec.AcquireTime.Add(duration) - default: - return claim.CreationTimestamp.Add(duration) - } -} - -func effectReservationOwnedBy(reservation *effectReservation, claim *runsv1alpha1.EffectClaim) bool { - if reservation == nil || claim == nil { - return false - } - return strings.TrimSpace(reservation.holderIdentity) != "" && - strings.TrimSpace(claim.Spec.HolderIdentity) == strings.TrimSpace(reservation.holderIdentity) -} - -func startEffectReservationRenewal(reservation *effectReservation, cancel context.CancelFunc) func() error { - if reservation == nil || reservation.client == nil || reservation.claim == nil { - return func() error { return nil } - } - duration := effectLeaseDuration - if reservation.claim.Spec.LeaseDurationSeconds > 0 { - duration = time.Duration(reservation.claim.Spec.LeaseDurationSeconds) * time.Second - } - interval := effectLeaseRenewIntervalFunc(duration) - if interval <= 0 { - return func() error { return nil } - } - - renewer := &effectReservationRenewer{ - stopCh: make(chan struct{}), - doneCh: make(chan struct{}), - } - go func() { - defer close(renewer.doneCh) - ticker := time.NewTicker(interval) - defer ticker.Stop() - for { - select { - case <-renewer.stopCh: - return - case <-ticker.C: - if err := renewEffectReservation(context.Background(), reservation); err != nil { - renewer.mu.Lock() - if renewer.err == nil { - renewer.err = err - } - renewer.mu.Unlock() - if cancel != nil { - cancel() - } - return - } - } - } - }() - - return func() error { - renewer.stopOnce.Do(func() { - close(renewer.stopCh) - }) - <-renewer.doneCh - renewer.mu.Lock() - defer renewer.mu.Unlock() - return renewer.err - } -} - -func renewEffectReservation(ctx context.Context, reservation *effectReservation) error { - if reservation == nil || reservation.client == nil || reservation.claim == nil { - return nil - } - if ctx == nil { - ctx = context.Background() - } - ctx, cancel := context.WithTimeout(ctx, effectPatchTimeout) - defer cancel() - - current := &runsv1alpha1.EffectClaim{} - if err := reservation.client.Get(ctx, types.NamespacedName{ - Name: reservation.claim.Name, - Namespace: reservation.claim.Namespace, - }, current); err != nil { - return fmt.Errorf("failed to fetch effect reservation for renewal: %w", err) - } - if !effectReservationOwnedBy(reservation, current) { - if effectReservationState(current) == runsv1alpha1.EffectClaimCompletionStatusCompleted { - reservation.claim = current - return fmt.Errorf("effect reservation completed by another holder before renewal") - } - return fmt.Errorf("effect reservation ownership lost before renewal") - } - - updated := current.DeepCopy() - now := metav1.NewMicroTime(time.Now().UTC()) - if updated.Spec.AcquireTime == nil { - updated.Spec.AcquireTime = &now - } - updated.Spec.RenewTime = &now - if err := reservation.client.Update(ctx, updated); err != nil { - return fmt.Errorf("failed to renew effect reservation: %w", err) - } - reservation.claim = updated - return nil -} - -func acquireExistingEffectReservation( - ctx context.Context, - claimClient effectClaimClient, - existing *runsv1alpha1.EffectClaim, - stepRun *runsv1alpha1.StepRun, - namespace, key string, - incrementTransitions bool, -) (*effectReservation, bool, error) { - holderIdentity := newEffectReservationHolderIdentity(namespace, stepRun.Name, key) - current := existing.DeepCopy() - for range 3 { - if effectReservationState(current) == runsv1alpha1.EffectClaimCompletionStatusCompleted { - return nil, true, nil - } - if !incrementTransitions && !effectReservationAvailableForReuse(current) { - return nil, true, nil - } - if incrementTransitions && !effectReservationIsStale(current, time.Now().UTC()) { - return nil, true, nil - } - - updated := current.DeepCopy() - now := metav1.NewMicroTime(time.Now().UTC()) - durationSeconds := int32(effectLeaseDuration / time.Second) - if updated.Spec.LeaseDurationSeconds <= 0 { - updated.Spec.LeaseDurationSeconds = durationSeconds - } - updated.Spec.HolderIdentity = holderIdentity - updated.Spec.AcquireTime = &now - updated.Spec.RenewTime = &now - if incrementTransitions { - updated.Spec.LeaseTransitions = current.Spec.LeaseTransitions + 1 - } - if err := claimClient.Update(ctx, updated); err == nil { - return &effectReservation{ - client: claimClient, - claim: updated, - holderIdentity: holderIdentity, - }, false, nil - } else if !apierrors.IsConflict(err) { - return nil, false, fmt.Errorf("failed to recover stale effect reservation: %w", err) - } - - refreshed := &runsv1alpha1.EffectClaim{} - if err := claimClient.Get(ctx, types.NamespacedName{Name: current.Name, Namespace: current.Namespace}, refreshed); err != nil { //nolint:lll - return nil, false, fmt.Errorf("failed to refetch stale effect reservation: %w", err) - } - current = refreshed - } - return nil, true, nil -} - -func completeEffectReservation(ctx context.Context, reservation *effectReservation, details any) error { - if reservation == nil || reservation.client == nil || reservation.claim == nil { - return nil - } - if ctx == nil { - ctx = context.Background() - } - ctx, cancel := context.WithTimeout(ctx, effectPatchTimeout) - defer cancel() - - current := &runsv1alpha1.EffectClaim{} - if err := reservation.client.Get(ctx, types.NamespacedName{ - Name: reservation.claim.Name, - Namespace: reservation.claim.Namespace, - }, current); err != nil { - return fmt.Errorf("failed to fetch effect reservation before completion: %w", err) - } - if !effectReservationOwnedBy(reservation, current) { - if effectReservationState(current) == runsv1alpha1.EffectClaimCompletionStatusCompleted { - reservation.claim = current - return nil - } - return fmt.Errorf("effect reservation ownership lost before completion") - } - - updated := current.DeepCopy() - detailRaw, err := marshalEffectDetails(details) - if err != nil { - return err - } - now := time.Now().UTC() - renewedAt := metav1.NewMicroTime(now) - completedAt := metav1.NewTime(now) - updated.Spec.CompletionStatus = runsv1alpha1.EffectClaimCompletionStatusCompleted - updated.Spec.CompletedAt = &completedAt - updated.Spec.RenewTime = &renewedAt - updated.Spec.Details = detailRaw - if err := reservation.client.Update(ctx, updated); err != nil { - return fmt.Errorf("failed to complete effect reservation: %w", err) - } - reservation.claim = updated - return nil -} - -func releaseEffectReservation(ctx context.Context, reservation *effectReservation) error { - if reservation == nil || reservation.client == nil || reservation.claim == nil { - return nil - } - if ctx == nil { - ctx = context.Background() - } - ctx, cancel := context.WithTimeout(ctx, effectPatchTimeout) - defer cancel() - - current := &runsv1alpha1.EffectClaim{} - if err := reservation.client.Get(ctx, types.NamespacedName{ - Name: reservation.claim.Name, - Namespace: reservation.claim.Namespace, - }, current); err != nil { - if apierrors.IsNotFound(err) { - return nil - } - return fmt.Errorf("failed to fetch effect reservation before release: %w", err) - } - if !effectReservationOwnedBy(reservation, current) { - return nil - } - updated := current.DeepCopy() - updated.Spec.HolderIdentity = "" - updated.Spec.AcquireTime = nil - updated.Spec.RenewTime = nil - updated.Spec.CompletionStatus = runsv1alpha1.EffectClaimCompletionStatusReleased - updated.Spec.CompletedAt = nil - updated.Spec.Details = nil - if err := reservation.client.Update(ctx, updated); err != nil && !apierrors.IsNotFound(err) { - return fmt.Errorf("failed to release effect reservation: %w", err) - } - return nil -} - -func effectReservationMatchesStepRun(claim *runsv1alpha1.EffectClaim, stepRun *runsv1alpha1.StepRun, key string) bool { - if claim == nil || stepRun == nil { - return false - } - if strings.TrimSpace(claim.Spec.EffectKey) != strings.TrimSpace(key) { - return false - } - if strings.TrimSpace(claim.Spec.StepRunRef.Name) != strings.TrimSpace(stepRun.Name) { - return false - } - if claim.Spec.StepRunRef.UID != nil && *claim.Spec.StepRunRef.UID != stepRun.UID { - return false - } - stepRunKey := strings.TrimSpace(stepRun.Spec.IdempotencyKey) - claimKey := strings.TrimSpace(claim.Spec.IdempotencyKey) - return claimKey == "" || claimKey == stepRunKey -} - -func getEffectEmitter() (*effectEmitter, error) { - effectEmitterMu.Lock() - defer effectEmitterMu.Unlock() - if effectEmitterInst != nil { - return effectEmitterInst, nil - } - if effectEmitterErr != nil { - // Permanent error (e.g. missing env) — don't retry. - return nil, effectEmitterErr - } - stepRunID := strings.TrimSpace(os.Getenv(contracts.StepRunNameEnv)) - if stepRunID == "" { - effectEmitterErr = ErrEffectsUnavailable - return nil, effectEmitterErr - } - client, err := effectClientFactory() //nolint:revive - if err != nil { - // Transient error — don't cache, allow retry on next call. - return nil, fmt.Errorf("failed to initialize effect client: %w", err) - } - effectEmitterInst = &effectEmitter{ - client: client, - stepRunID: stepRunID, - } - return effectEmitterInst, nil -} - -func (e *effectEmitter) emit(ctx context.Context, patch runsv1alpha1.StepRunStatus) error { - if e == nil { - return ErrEffectsUnavailable - } - if ctx == nil { - ctx = context.Background() - } - ctx, cancel := context.WithTimeout(ctx, effectPatchTimeout) - defer cancel() - return e.client.PatchStepRunStatus(ctx, e.stepRunID, patch) -} - -// testResetEffectEmitter resets the cached emitter between tests. -func testResetEffectEmitter() { - effectEmitterMu.Lock() - defer effectEmitterMu.Unlock() - effectEmitterInst = nil - effectEmitterErr = nil - effectExecMu.Lock() - defer effectExecMu.Unlock() - effectExecGates = make(map[string]*effectExecutionGate) -} diff --git a/effects_integration_test.go b/effects_integration_test.go deleted file mode 100644 index f0fcc34..0000000 --- a/effects_integration_test.go +++ /dev/null @@ -1,370 +0,0 @@ -//go:build integration - -package sdk - -import ( - context "context" - "os" - "path/filepath" - "strings" - "sync" - "testing" - "time" - - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - "github.com/bubustack/bobrapet/pkg/refs" - "github.com/bubustack/core/contracts" - "github.com/stretchr/testify/require" - corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - clientgoscheme "k8s.io/client-go/kubernetes/scheme" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/envtest" -) - -type envtestEffectClient struct { - client.Client - namespace string -} - -func (c *envtestEffectClient) GetNamespace() string { - return c.namespace -} - -func (c *envtestEffectClient) PatchStepRunStatus(ctx context.Context, stepRunName string, status runsv1alpha1.StepRunStatus) error { - var stepRun runsv1alpha1.StepRun - if err := c.Get(ctx, client.ObjectKey{Name: stepRunName, Namespace: c.namespace}, &stepRun); err != nil { - return err - } - base := stepRun.DeepCopy() - stepRun.Status.Effects = append(stepRun.Status.Effects, status.Effects...) - return c.Status().Patch(ctx, &stepRun, client.MergeFrom(base)) -} - -func resolveEffectsCRDPath(t *testing.T) string { - t.Helper() - - if override := os.Getenv("BOBRAPET_CRD_PATH"); override != "" { - if info, err := os.Stat(override); err == nil && info.IsDir() { - return override - } - t.Fatalf("BOBRAPET_CRD_PATH=%q does not exist or is not a directory", override) - } - - candidates := []string{ - filepath.Join("..", "bobrapet", "config", "crd", "bases"), - filepath.Join("..", "..", "bobrapet", "config", "crd", "bases"), - } - for _, candidate := range candidates { - if info, err := os.Stat(candidate); err == nil && info.IsDir() { - return candidate - } - } - - t.Skip("bobrapet CRDs not found; set BOBRAPET_CRD_PATH or run tests within the bobrapet+bubu-sdk-go workspace") - return "" -} - -func setupEffectsEnvtest(t *testing.T) *envtestEffectClient { - t.Helper() - if testing.Short() { - t.Skip("skipping integration envtest in short mode") - } - if os.Getenv("KUBEBUILDER_ASSETS") == "" { - t.Skip("KUBEBUILDER_ASSETS not set; skipping integration envtest") - } - - testEnv := &envtest.Environment{ - CRDDirectoryPaths: []string{resolveEffectsCRDPath(t)}, - } - cfg, err := testEnv.Start() - require.NoError(t, err) - t.Cleanup(func() { - require.NoError(t, testEnv.Stop()) - }) - - scheme := runtime.NewScheme() - require.NoError(t, clientgoscheme.AddToScheme(scheme)) - require.NoError(t, runsv1alpha1.AddToScheme(scheme)) - - apiClient, err := client.New(cfg, client.Options{Scheme: scheme}) - require.NoError(t, err) - - const namespace = "default" - err = apiClient.Create(context.Background(), &corev1.Namespace{ - ObjectMeta: metav1.ObjectMeta{Name: namespace}, - }) - if err != nil && !apierrors.IsAlreadyExists(err) { - require.NoError(t, err) - } - - return &envtestEffectClient{Client: apiClient, namespace: namespace} -} - -func newIntegrationStepRun(name, namespace string) *runsv1alpha1.StepRun { - return &runsv1alpha1.StepRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - }, - Spec: runsv1alpha1.StepRunSpec{ - StoryRunRef: refs.StoryRunReference{ - ObjectReference: refs.ObjectReference{Name: "storyrun-" + name}, - }, - StepID: "step-1", - }, - } -} - -func TestExecuteEffectOnce_RecoversExpiredClaimLive(t *testing.T) { - effectClient := setupEffectsEnvtest(t) - testResetEffectEmitter() - - prevReader := effectReaderFactory - prevFactory := effectClientFactory - prevClaim := effectClaimFactory - t.Cleanup(func() { - effectReaderFactory = prevReader - effectClientFactory = prevFactory - effectClaimFactory = prevClaim - testResetEffectEmitter() - }) - - const stepRunName = "effect-live-recover" - t.Setenv(contracts.StepRunNameEnv, stepRunName) - t.Setenv(contracts.StepRunNamespaceEnv, effectClient.namespace) - - stepRun := newIntegrationStepRun(stepRunName, effectClient.namespace) - require.NoError(t, effectClient.Create(context.Background(), stepRun)) - - staleAt := metav1.NewMicroTime(time.Now().Add(-2 * time.Second).UTC()) - durationSeconds := int32(1) - claim := &runsv1alpha1.EffectClaim{ - ObjectMeta: metav1.ObjectMeta{ - Name: effectReservationClaimName(effectClient.namespace, stepRunName, "effect-live"), - Namespace: effectClient.namespace, - }, - Spec: runsv1alpha1.EffectClaimSpec{ - StepRunRef: refs.StepRunReference{ - ObjectReference: refs.ObjectReference{Name: stepRunName}, - UID: &stepRun.UID, - }, - EffectKey: "effect-live", - HolderIdentity: "old-holder", - AcquireTime: &staleAt, - RenewTime: &staleAt, - LeaseDurationSeconds: durationSeconds, - }, - } - require.NoError(t, effectClient.Create(context.Background(), claim)) - - effectReaderFactory = func() (effectReader, error) { return effectClient, nil } - effectClientFactory = func() (effectPatcher, error) { return effectClient, nil } - effectClaimFactory = func() (effectClaimClient, error) { return effectClient, nil } - - result, already, err := ExecuteEffectOnce(context.Background(), "effect-live", func(context.Context) (any, error) { - return map[string]any{"providerId": "live"}, nil - }) - require.NoError(t, err) - require.False(t, already) - require.NotNil(t, result) - - var updatedClaim runsv1alpha1.EffectClaim - require.NoError(t, effectClient.Get(context.Background(), client.ObjectKey{Name: claim.Name, Namespace: claim.Namespace}, &updatedClaim)) - require.Equal(t, runsv1alpha1.EffectClaimCompletionStatusCompleted, effectReservationState(&updatedClaim)) - - var updatedStepRun runsv1alpha1.StepRun - require.NoError(t, effectClient.Get(context.Background(), client.ObjectKey{Name: stepRunName, Namespace: effectClient.namespace}, &updatedStepRun)) - require.Len(t, updatedStepRun.Status.Effects, 1) - require.Equal(t, "effect-live", updatedStepRun.Status.Effects[0].Key) -} - -func TestReserveEffect_OnlyOneWorkerRecoversStaleReservationLive(t *testing.T) { - effectClient := setupEffectsEnvtest(t) - testResetEffectEmitter() - - prevReader := effectReaderFactory - prevClaim := effectClaimFactory - t.Cleanup(func() { - effectReaderFactory = prevReader - effectClaimFactory = prevClaim - testResetEffectEmitter() - }) - - const stepRunName = "effect-live-race" - t.Setenv(contracts.StepRunNameEnv, stepRunName) - t.Setenv(contracts.StepRunNamespaceEnv, effectClient.namespace) - - stepRun := newIntegrationStepRun(stepRunName, effectClient.namespace) - require.NoError(t, effectClient.Create(context.Background(), stepRun)) - - staleAt := metav1.NewMicroTime(time.Now().Add(-2 * time.Second).UTC()) - durationSeconds := int32(1) - claim := &runsv1alpha1.EffectClaim{ - ObjectMeta: metav1.ObjectMeta{ - Name: effectReservationClaimName(effectClient.namespace, stepRunName, "effect-race-live"), - Namespace: effectClient.namespace, - }, - Spec: runsv1alpha1.EffectClaimSpec{ - StepRunRef: refs.StepRunReference{ - ObjectReference: refs.ObjectReference{Name: stepRunName}, - UID: &stepRun.UID, - }, - EffectKey: "effect-race-live", - HolderIdentity: "old-holder", - AcquireTime: &staleAt, - RenewTime: &staleAt, - LeaseDurationSeconds: durationSeconds, - }, - } - require.NoError(t, effectClient.Create(context.Background(), claim)) - - effectReaderFactory = func() (effectReader, error) { return effectClient, nil } - effectClaimFactory = func() (effectClaimClient, error) { return effectClient, nil } - - type result struct { - reservation *effectReservation - already bool - err error - } - results := make(chan result, 2) - var wg sync.WaitGroup - wg.Add(2) - run := func() { - defer wg.Done() - reservation, already, err := reserveEffect(context.Background(), "effect-race-live") - results <- result{reservation: reservation, already: already, err: err} - } - - go run() - go run() - wg.Wait() - close(results) - - successes := 0 - alreadyCount := 0 - for item := range results { - require.NoError(t, item.err) - if item.already { - alreadyCount++ - continue - } - successes++ - require.NotNil(t, item.reservation) - } - require.Equal(t, 1, successes) - require.Equal(t, 1, alreadyCount) - - var updatedClaim runsv1alpha1.EffectClaim - require.NoError(t, effectClient.Get(context.Background(), client.ObjectKey{Name: claim.Name, Namespace: claim.Namespace}, &updatedClaim)) - require.Equal(t, runsv1alpha1.EffectClaimCompletionStatus(""), effectReservationState(&updatedClaim)) - require.NotEqual(t, "old-holder", strings.TrimSpace(updatedClaim.Spec.HolderIdentity)) -} - -func TestReleaseEffectReservation_DoesNotClearRecoveredClaimLive(t *testing.T) { - effectClient := setupEffectsEnvtest(t) - - claim := &runsv1alpha1.EffectClaim{ - ObjectMeta: metav1.ObjectMeta{ - Name: effectReservationClaimName(effectClient.namespace, "step-1", "effect-live-release-guard"), - Namespace: effectClient.namespace, - }, - Spec: runsv1alpha1.EffectClaimSpec{ - StepRunRef: refs.StepRunReference{ - ObjectReference: refs.ObjectReference{Name: "step-1"}, - }, - EffectKey: "effect-live-release-guard", - HolderIdentity: "current-holder", - }, - } - require.NoError(t, effectClient.Create(context.Background(), claim)) - - err := releaseEffectReservation(context.Background(), &effectReservation{ - client: effectClient, - claim: claim.DeepCopy(), - holderIdentity: "stale-holder", - }) - require.NoError(t, err) - - var remaining runsv1alpha1.EffectClaim - err = effectClient.Get(context.Background(), client.ObjectKey{Name: claim.Name, Namespace: claim.Namespace}, &remaining) - require.NoError(t, err) - require.Equal(t, "current-holder", strings.TrimSpace(remaining.Spec.HolderIdentity)) -} - -func TestExecuteEffectOnce_RenewsReservationDuringLongExecutionLive(t *testing.T) { - effectClient := setupEffectsEnvtest(t) - testResetEffectEmitter() - - prevReader := effectReaderFactory - prevFactory := effectClientFactory - prevClaim := effectClaimFactory - prevLeaseDuration := effectLeaseDuration - prevRenewInterval := effectLeaseRenewIntervalFunc - t.Cleanup(func() { - effectReaderFactory = prevReader - effectClientFactory = prevFactory - effectClaimFactory = prevClaim - effectLeaseDuration = prevLeaseDuration - effectLeaseRenewIntervalFunc = prevRenewInterval - testResetEffectEmitter() - }) - - effectLeaseDuration = 150 * time.Millisecond - effectLeaseRenewIntervalFunc = func(time.Duration) time.Duration { return 30 * time.Millisecond } - - const stepRunName = "effect-live-renew" - t.Setenv(contracts.StepRunNameEnv, stepRunName) - t.Setenv(contracts.StepRunNamespaceEnv, effectClient.namespace) - - stepRun := newIntegrationStepRun(stepRunName, effectClient.namespace) - require.NoError(t, effectClient.Create(context.Background(), stepRun)) - - effectReaderFactory = func() (effectReader, error) { return effectClient, nil } - effectClientFactory = func() (effectPatcher, error) { return effectClient, nil } - effectClaimFactory = func() (effectClaimClient, error) { return effectClient, nil } - - started := make(chan struct{}) - release := make(chan struct{}) - errCh := make(chan error, 1) - go func() { - _, _, err := ExecuteEffectOnce(context.Background(), "effect-live-renew", func(context.Context) (any, error) { - close(started) - <-release - return map[string]any{"providerId": "renew-live"}, nil - }) - errCh <- err - }() - - select { - case <-started: - case <-time.After(2 * time.Second): - t.Fatal("expected effect execution to start") - } - - claimName := effectReservationClaimName(effectClient.namespace, stepRunName, "effect-live-renew") - require.Eventually(t, func() bool { - var claim runsv1alpha1.EffectClaim - if err := effectClient.Get(context.Background(), client.ObjectKey{Name: claimName, Namespace: effectClient.namespace}, &claim); err != nil { - return false - } - return claim.Spec.AcquireTime != nil && claim.Spec.RenewTime != nil && - claim.Spec.RenewTime.Time.After(claim.Spec.AcquireTime.Time) - }, 2*time.Second, 20*time.Millisecond, "expected live claim renew time to advance while effect is running") - - reservation, already, err := reserveEffect(context.Background(), "effect-live-renew") - require.NoError(t, err) - require.True(t, already) - require.Nil(t, reservation) - - close(release) - - select { - case err := <-errCh: - require.NoError(t, err) - case <-time.After(2 * time.Second): - t.Fatal("expected ExecuteEffectOnce to finish after release") - } -} diff --git a/effects_test.go b/effects_test.go deleted file mode 100644 index 8726c70..0000000 --- a/effects_test.go +++ /dev/null @@ -1,953 +0,0 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package sdk - -import ( - context "context" - "encoding/json" - "errors" - "os" - "strings" - "sync" - "sync/atomic" - "testing" - "time" - - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - "github.com/bubustack/bobrapet/pkg/refs" - "github.com/bubustack/core/contracts" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - k8sruntime "k8s.io/apimachinery/pkg/runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -type mockEffectPatcher struct { - mock.Mock -} - -func (m *mockEffectPatcher) PatchStepRunStatus(ctx context.Context, - stepRunName string, status runsv1alpha1.StepRunStatus) error { - args := m.Called(ctx, stepRunName, status) - return args.Error(0) -} - -type mockEffectReader struct { - stepRun *runsv1alpha1.StepRun - namespace string - err error -} - -func (m *mockEffectReader) Get(_ context.Context, _ client.ObjectKey, obj client.Object, _ ...client.GetOption) error { - if m.err != nil { - return m.err - } - if m.stepRun == nil { - return errors.New("step run not found") - } - target := obj.(*runsv1alpha1.StepRun) - *target = *m.stepRun - return nil -} - -func (m *mockEffectReader) List(_ context.Context, _ client.ObjectList, _ ...client.ListOption) error { - if m.err != nil { - return m.err - } - return nil -} - -func (m *mockEffectReader) GetNamespace() string { - return m.namespace -} - -type fakeEffectClusterClient struct { - client.Client - namespace string -} - -func newFakeEffectClusterClient(t *testing.T, namespace string, objects ...client.Object) *fakeEffectClusterClient { - t.Helper() - scheme := k8sruntime.NewScheme() - if err := runsv1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("add StepRun scheme: %v", err) - } - kubeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithStatusSubresource(&runsv1alpha1.StepRun{}). - WithObjects(objects...). - Build() - return &fakeEffectClusterClient{Client: kubeClient, namespace: namespace} -} - -func (c *fakeEffectClusterClient) GetNamespace() string { - return c.namespace -} - -func (c *fakeEffectClusterClient) PatchStepRunStatus(ctx context.Context, stepRunName string, status runsv1alpha1.StepRunStatus) error { //nolint:lll - var stepRun runsv1alpha1.StepRun - if err := c.Get(ctx, client.ObjectKey{Name: stepRunName, Namespace: c.namespace}, &stepRun); err != nil { - return err - } - base := stepRun.DeepCopy() - stepRun.Status.Effects = append(stepRun.Status.Effects, status.Effects...) - return c.Status().Patch(ctx, &stepRun, client.MergeFrom(base)) -} - -func TestRecordEffect_EmitsPatch(t *testing.T) { - t.Setenv(contracts.StepRunNameEnv, "step-1") - testResetEffectEmitter() - - prevFactory := effectClientFactory - mockClient := &mockEffectPatcher{} - effectClientFactory = func() (effectPatcher, error) { return mockClient, nil } - t.Cleanup(func() { - effectClientFactory = prevFactory - testResetEffectEmitter() - }) - - mockClient.On("PatchStepRunStatus", mock.Anything, "step-1", mock.MatchedBy(func(status runsv1alpha1.StepRunStatus) bool { //nolint:lll - if len(status.Effects) != 1 { - return false - } - eff := status.Effects[0] - if eff.Key != "effect-1" || eff.Status != "succeeded" { - return false - } - if eff.EmittedAt == nil || eff.EmittedAt.IsZero() { - return false - } - if eff.Details == nil || len(eff.Details.Raw) == 0 { - return false - } - var details map[string]any - if err := json.Unmarshal(eff.Details.Raw, &details); err != nil { - return false - } - return details["providerId"] == "abc" - })).Return(nil) - - err := RecordEffect(context.Background(), "effect-1", "succeeded", map[string]any{"providerId": "abc"}) - assert.NoError(t, err) - mockClient.AssertExpectations(t) -} - -func TestRecordEffect_MissingStepRun(t *testing.T) { - testResetEffectEmitter() - prevFactory := effectClientFactory - t.Cleanup(func() { - effectClientFactory = prevFactory - testResetEffectEmitter() - }) - - err := RecordEffect(context.Background(), "effect-1", "succeeded", nil) - assert.ErrorIs(t, err, ErrEffectsUnavailable) -} - -func TestRecordEffect_MissingKey(t *testing.T) { - prevFactory := effectClientFactory - t.Cleanup(func() { - effectClientFactory = prevFactory - testResetEffectEmitter() - }) - - err := RecordEffect(context.Background(), " ", "succeeded", nil) - assert.Error(t, err) -} - -func TestExecuteEffectOnce_SkipsWhenRecorded(t *testing.T) { - t.Setenv(contracts.StepRunNameEnv, "step-1") - t.Setenv(contracts.StepRunNamespaceEnv, "test-ns") - - prevReader := effectReaderFactory - prevFactory := effectClientFactory - t.Cleanup(func() { - effectReaderFactory = prevReader - effectClientFactory = prevFactory - testResetEffectEmitter() - }) - - effectReaderFactory = func() (effectReader, error) { - return &mockEffectReader{ - namespace: "test-ns", - stepRun: &runsv1alpha1.StepRun{ - ObjectMeta: metav1.ObjectMeta{Name: "step-1", Namespace: "test-ns"}, - Status: runsv1alpha1.StepRunStatus{ - Effects: []runsv1alpha1.EffectRecord{{Seq: 1, Key: "effect-1", Status: "succeeded"}}, - }, - }, - }, nil - } - mockClient := &mockEffectPatcher{} - effectClientFactory = func() (effectPatcher, error) { return mockClient, nil } - - result, already, err := ExecuteEffectOnce(context.Background(), "effect-1", func(context.Context) (any, error) { - t.Fatalf("effect should not execute when already recorded") - return nil, nil - }) - assert.Nil(t, result) - assert.True(t, already) - assert.ErrorIs(t, err, ErrEffectAlreadyRecorded) - mockClient.AssertNotCalled(t, "PatchStepRunStatus", mock.Anything, mock.Anything, mock.Anything) -} - -func TestExecuteEffectOnce_RecordsOnSuccess(t *testing.T) { - t.Setenv(contracts.StepRunNameEnv, "step-1") - t.Setenv(contracts.StepRunNamespaceEnv, "test-ns") - - prevReader := effectReaderFactory - prevFactory := effectClientFactory - prevClaim := effectClaimFactory - t.Cleanup(func() { - effectReaderFactory = prevReader - effectClientFactory = prevFactory - effectClaimFactory = prevClaim - testResetEffectEmitter() - }) - - effectReaderFactory = func() (effectReader, error) { - return &mockEffectReader{ - namespace: "test-ns", - stepRun: &runsv1alpha1.StepRun{ - ObjectMeta: metav1.ObjectMeta{Name: "step-1", Namespace: "test-ns"}, - Status: runsv1alpha1.StepRunStatus{}, - }, - }, nil - } - claimClient := newFakeEffectClusterClient(t, "test-ns") - effectClaimFactory = func() (effectClaimClient, error) { return claimClient, nil } - mockClient := &mockEffectPatcher{} - effectClientFactory = func() (effectPatcher, error) { return mockClient, nil } - - mockClient.On("PatchStepRunStatus", mock.Anything, "step-1", mock.MatchedBy(func(status runsv1alpha1.StepRunStatus) bool { //nolint:lll - if len(status.Effects) != 1 { - return false - } - eff := status.Effects[0] - return eff.Key == "effect-2" && eff.Status == "succeeded" - })).Return(nil) - - result, already, err := ExecuteEffectOnce(context.Background(), "effect-2", func(context.Context) (any, error) { - return map[string]any{"providerId": "xyz"}, nil - }) - assert.False(t, already) - assert.NoError(t, err) - assert.NotNil(t, result) - mockClient.AssertExpectations(t) -} - -func TestExecuteEffectOnce_DoesNotReexecuteAfterPatchFailure(t *testing.T) { - t.Setenv(contracts.StepRunNameEnv, "step-1") - t.Setenv(contracts.StepRunNamespaceEnv, "test-ns") - testResetEffectEmitter() - - prevReader := effectReaderFactory - prevFactory := effectClientFactory - prevClaim := effectClaimFactory - t.Cleanup(func() { - effectReaderFactory = prevReader - effectClientFactory = prevFactory - effectClaimFactory = prevClaim - testResetEffectEmitter() - }) - - effectReaderFactory = func() (effectReader, error) { - return &mockEffectReader{ - namespace: "test-ns", - stepRun: &runsv1alpha1.StepRun{ - ObjectMeta: metav1.ObjectMeta{Name: "step-1", Namespace: "test-ns"}, - Status: runsv1alpha1.StepRunStatus{}, - }, - }, nil - } - - patchErr := errors.New("patch failed") - claimClient := newFakeEffectClusterClient(t, "test-ns") - effectClaimFactory = func() (effectClaimClient, error) { return claimClient, nil } - mockClient := &mockEffectPatcher{} - effectClientFactory = func() (effectPatcher, error) { return mockClient, nil } - mockClient. - On("PatchStepRunStatus", mock.Anything, "step-1", mock.Anything). - Return(patchErr). - Once() - - var calls atomic.Int32 - firstResult, firstAlready, firstErr := ExecuteEffectOnce(context.Background(), "effect-3", func(context.Context) (any, error) { //nolint:lll - calls.Add(1) - return map[string]any{"providerId": "xyz"}, nil - }) - assert.False(t, firstAlready) - assert.ErrorIs(t, firstErr, patchErr) - assert.NotNil(t, firstResult) - - var recordedClaim runsv1alpha1.EffectClaim - err := claimClient.Get(context.Background(), client.ObjectKey{ - Name: effectReservationClaimName("test-ns", "step-1", "effect-3"), - Namespace: "test-ns", - }, &recordedClaim) - assert.NoError(t, err) - assert.Equal(t, runsv1alpha1.EffectClaimCompletionStatusCompleted, effectReservationState(&recordedClaim)) - - testResetEffectEmitter() - - secondResult, secondAlready, secondErr := ExecuteEffectOnce(context.Background(), "effect-3", func(context.Context) (any, error) { //nolint:lll - calls.Add(1) - return map[string]any{"providerId": "should-not-run"}, nil - }) - assert.Nil(t, secondResult) - assert.True(t, secondAlready) - assert.ErrorIs(t, secondErr, ErrEffectAlreadyRecorded) - assert.EqualValues(t, 1, calls.Load()) - - mockClient.AssertExpectations(t) -} - -func TestEffectExecutionLeaseEvictsCompletedGateWhenRefsDropToZero(t *testing.T) { - testResetEffectEmitter() - lease, already := acquireEffectExecutionLease("ns:step:effect") - require.False(t, already) - require.NotNil(t, lease) - - releaseEffectExecutionLease(lease, true) - - effectExecMu.Lock() - defer effectExecMu.Unlock() - _, exists := effectExecGates["ns:step:effect"] - require.False(t, exists) -} - -func TestExecuteEffectOnce_ReleasesReservationAfterEffectFailure(t *testing.T) { - t.Setenv(contracts.StepRunNameEnv, "step-1") - t.Setenv(contracts.StepRunNamespaceEnv, "test-ns") - testResetEffectEmitter() - - prevReader := effectReaderFactory - prevFactory := effectClientFactory - prevClaim := effectClaimFactory - t.Cleanup(func() { - effectReaderFactory = prevReader - effectClientFactory = prevFactory - effectClaimFactory = prevClaim - testResetEffectEmitter() - }) - - effectReaderFactory = func() (effectReader, error) { - return &mockEffectReader{ - namespace: "test-ns", - stepRun: &runsv1alpha1.StepRun{ - ObjectMeta: metav1.ObjectMeta{Name: "step-1", Namespace: "test-ns"}, - Status: runsv1alpha1.StepRunStatus{}, - }, - }, nil - } - - claimClient := newFakeEffectClusterClient(t, "test-ns") - effectClaimFactory = func() (effectClaimClient, error) { return claimClient, nil } - mockClient := &mockEffectPatcher{} - effectClientFactory = func() (effectPatcher, error) { return mockClient, nil } - mockClient. - On("PatchStepRunStatus", mock.Anything, "step-1", mock.Anything). - Return(nil). - Once() - - var calls atomic.Int32 - firstResult, firstAlready, firstErr := ExecuteEffectOnce(context.Background(), "effect-release", func(context.Context) (any, error) { //nolint:lll - calls.Add(1) - return nil, errors.New("boom") - }) - assert.Nil(t, firstResult) - assert.False(t, firstAlready) - assert.EqualError(t, firstErr, "boom") - - var claim runsv1alpha1.EffectClaim - err := claimClient.Get(context.Background(), client.ObjectKey{ - Name: effectReservationClaimName("test-ns", "step-1", "effect-release"), - Namespace: "test-ns", - }, &claim) - assert.NoError(t, err) - assert.Equal(t, runsv1alpha1.EffectClaimCompletionStatusReleased, claim.Spec.CompletionStatus) - assert.Empty(t, strings.TrimSpace(claim.Spec.HolderIdentity)) - - testResetEffectEmitter() - - secondResult, secondAlready, secondErr := ExecuteEffectOnce(context.Background(), "effect-release", func(context.Context) (any, error) { //nolint:lll - calls.Add(1) - return map[string]any{"providerId": "xyz"}, nil - }) - assert.NoError(t, secondErr) - assert.False(t, secondAlready) - assert.NotNil(t, secondResult) - assert.EqualValues(t, 2, calls.Load()) - - mockClient.AssertExpectations(t) -} - -func TestReserveEffect_RecoversStaleReservation(t *testing.T) { - t.Setenv(contracts.StepRunNameEnv, "step-1") - t.Setenv(contracts.StepRunNamespaceEnv, "test-ns") - testResetEffectEmitter() - - prevReader := effectReaderFactory - prevClaim := effectClaimFactory - t.Cleanup(func() { - effectReaderFactory = prevReader - effectClaimFactory = prevClaim - testResetEffectEmitter() - }) - - stepRun := &runsv1alpha1.StepRun{ - ObjectMeta: metav1.ObjectMeta{Name: "step-1", Namespace: "test-ns"}, - Status: runsv1alpha1.StepRunStatus{}, - } - staleAt := metav1.NewMicroTime(time.Now().Add(-2 * time.Second).UTC()) - durationSeconds := int32(1) - staleClaim := &runsv1alpha1.EffectClaim{ - ObjectMeta: metav1.ObjectMeta{ - Name: effectReservationClaimName("test-ns", "step-1", "effect-stale"), - Namespace: "test-ns", - }, - Spec: runsv1alpha1.EffectClaimSpec{ - StepRunRef: refs.StepRunReference{ - ObjectReference: refs.ObjectReference{Name: "step-1"}, - UID: &stepRun.UID, - }, - EffectKey: "effect-stale", - HolderIdentity: "old-holder", - AcquireTime: &staleAt, - RenewTime: &staleAt, - LeaseDurationSeconds: durationSeconds, - }, - } - - effectReaderFactory = func() (effectReader, error) { - return &mockEffectReader{namespace: "test-ns", stepRun: stepRun}, nil - } - claimClient := newFakeEffectClusterClient(t, "test-ns", stepRun, staleClaim) - effectClaimFactory = func() (effectClaimClient, error) { return claimClient, nil } - - reservation, already, err := reserveEffect(context.Background(), "effect-stale") - assert.NoError(t, err) - assert.False(t, already) - if reservation == nil { - t.Fatal("expected recovered reservation") - } - - var claim runsv1alpha1.EffectClaim - err = claimClient.Get(context.Background(), client.ObjectKey{ - Name: staleClaim.Name, - Namespace: "test-ns", - }, &claim) - assert.NoError(t, err) - assert.Equal(t, runsv1alpha1.EffectClaimCompletionStatus(""), effectReservationState(&claim)) - assert.NotEqual(t, "old-holder", strings.TrimSpace(claim.Spec.HolderIdentity)) - assert.Equal(t, reservation.holderIdentity, strings.TrimSpace(claim.Spec.HolderIdentity)) - assert.False(t, effectReservationIsStale(&claim, time.Now().UTC())) -} - -func TestReserveEffect_DoesNotRecoverActiveReservation(t *testing.T) { - t.Setenv(contracts.StepRunNameEnv, "step-1") - t.Setenv(contracts.StepRunNamespaceEnv, "test-ns") - testResetEffectEmitter() - - prevReader := effectReaderFactory - prevClaim := effectClaimFactory - t.Cleanup(func() { - effectReaderFactory = prevReader - effectClaimFactory = prevClaim - testResetEffectEmitter() - }) - - stepRun := &runsv1alpha1.StepRun{ - ObjectMeta: metav1.ObjectMeta{Name: "step-1", Namespace: "test-ns"}, - Status: runsv1alpha1.StepRunStatus{}, - } - now := metav1.NewMicroTime(time.Now().UTC()) - durationSeconds := int32(60) - activeClaim := &runsv1alpha1.EffectClaim{ - ObjectMeta: metav1.ObjectMeta{ - Name: effectReservationClaimName("test-ns", "step-1", "effect-active"), - Namespace: "test-ns", - }, - Spec: runsv1alpha1.EffectClaimSpec{ - StepRunRef: refs.StepRunReference{ - ObjectReference: refs.ObjectReference{Name: "step-1"}, - UID: &stepRun.UID, - }, - EffectKey: "effect-active", - HolderIdentity: "active-holder", - AcquireTime: &now, - RenewTime: &now, - LeaseDurationSeconds: durationSeconds, - }, - } - - effectReaderFactory = func() (effectReader, error) { - return &mockEffectReader{namespace: "test-ns", stepRun: stepRun}, nil - } - claimClient := newFakeEffectClusterClient(t, "test-ns", stepRun, activeClaim) - effectClaimFactory = func() (effectClaimClient, error) { return claimClient, nil } - - reservation, already, err := reserveEffect(context.Background(), "effect-active") - assert.NoError(t, err) - assert.True(t, already) - assert.Nil(t, reservation) -} - -func TestReleaseEffectReservation_DoesNotClearReservationOwnedByAnotherHolder(t *testing.T) { - t.Setenv(contracts.StepRunNameEnv, "step-1") - t.Setenv(contracts.StepRunNamespaceEnv, "test-ns") - testResetEffectEmitter() - - claim := &runsv1alpha1.EffectClaim{ - ObjectMeta: metav1.ObjectMeta{ - Name: effectReservationClaimName("test-ns", "step-1", "effect-guarded"), - Namespace: "test-ns", - }, - Spec: runsv1alpha1.EffectClaimSpec{ - StepRunRef: refs.StepRunReference{ - ObjectReference: refs.ObjectReference{Name: "step-1"}, - }, - EffectKey: "effect-guarded", - HolderIdentity: "current-holder", - }, - } - claimClient := newFakeEffectClusterClient(t, "test-ns", claim) - - err := releaseEffectReservation(context.Background(), &effectReservation{ - client: claimClient, - claim: claim.DeepCopy(), - holderIdentity: "stale-holder", - }) - assert.NoError(t, err) - - var remaining runsv1alpha1.EffectClaim - err = claimClient.Get(context.Background(), client.ObjectKey{Name: claim.Name, Namespace: claim.Namespace}, &remaining) - assert.NoError(t, err) - assert.Equal(t, "current-holder", strings.TrimSpace(remaining.Spec.HolderIdentity)) -} - -func TestReserveEffect_OnlyOneWorkerRecoversStaleReservation(t *testing.T) { - t.Setenv(contracts.StepRunNameEnv, "step-1") - t.Setenv(contracts.StepRunNamespaceEnv, "test-ns") - testResetEffectEmitter() - - prevReader := effectReaderFactory - prevClaim := effectClaimFactory - t.Cleanup(func() { - effectReaderFactory = prevReader - effectClaimFactory = prevClaim - testResetEffectEmitter() - }) - - stepRun := &runsv1alpha1.StepRun{ - ObjectMeta: metav1.ObjectMeta{Name: "step-1", Namespace: "test-ns"}, - Status: runsv1alpha1.StepRunStatus{}, - } - staleAt := metav1.NewMicroTime(time.Now().Add(-2 * time.Second).UTC()) - durationSeconds := int32(1) - staleClaim := &runsv1alpha1.EffectClaim{ - ObjectMeta: metav1.ObjectMeta{ - Name: effectReservationClaimName("test-ns", "step-1", "effect-race"), - Namespace: "test-ns", - }, - Spec: runsv1alpha1.EffectClaimSpec{ - StepRunRef: refs.StepRunReference{ - ObjectReference: refs.ObjectReference{Name: "step-1"}, - UID: &stepRun.UID, - }, - EffectKey: "effect-race", - HolderIdentity: "old-holder", - AcquireTime: &staleAt, - RenewTime: &staleAt, - LeaseDurationSeconds: durationSeconds, - }, - } - - effectReaderFactory = func() (effectReader, error) { - return &mockEffectReader{namespace: "test-ns", stepRun: stepRun}, nil - } - claimClient := newFakeEffectClusterClient(t, "test-ns", stepRun, staleClaim) - effectClaimFactory = func() (effectClaimClient, error) { return claimClient, nil } - - type result struct { - reservation *effectReservation - already bool - err error - } - results := make(chan result, 2) - - run := func() { - reservation, already, err := reserveEffect(context.Background(), "effect-race") - results <- result{reservation: reservation, already: already, err: err} - } - - go run() - go run() - - first := <-results - second := <-results - - successes := 0 - alreadyCount := 0 - for _, item := range []result{first, second} { - assert.NoError(t, item.err) - if item.already { - alreadyCount++ - } else { - successes++ - if item.reservation == nil { - t.Fatal("expected recovered reservation for successful worker") - } - } - } - assert.Equal(t, 1, successes) - assert.Equal(t, 1, alreadyCount) -} - -func TestExecuteEffectOnce_RenewsReservationDuringLongExecution(t *testing.T) { - t.Setenv(contracts.StepRunNameEnv, "step-1") - t.Setenv(contracts.StepRunNamespaceEnv, "test-ns") - testResetEffectEmitter() - - prevReader := effectReaderFactory - prevFactory := effectClientFactory - prevClaim := effectClaimFactory - prevLeaseDuration := effectLeaseDuration - prevRenewInterval := effectLeaseRenewIntervalFunc - t.Cleanup(func() { - effectReaderFactory = prevReader - effectClientFactory = prevFactory - effectClaimFactory = prevClaim - effectLeaseDuration = prevLeaseDuration - effectLeaseRenewIntervalFunc = prevRenewInterval - testResetEffectEmitter() - }) - - effectLeaseDuration = 150 * time.Millisecond - effectLeaseRenewIntervalFunc = func(time.Duration) time.Duration { return 30 * time.Millisecond } - - stepRun := &runsv1alpha1.StepRun{ - ObjectMeta: metav1.ObjectMeta{Name: "step-1", Namespace: "test-ns"}, - Status: runsv1alpha1.StepRunStatus{}, - } - effectReaderFactory = func() (effectReader, error) { - return &mockEffectReader{namespace: "test-ns", stepRun: stepRun}, nil - } - - claimClient := newFakeEffectClusterClient(t, "test-ns", stepRun) - effectClaimFactory = func() (effectClaimClient, error) { return claimClient, nil } - store := newStatefulEffectStore("step-1", "test-ns") - effectClientFactory = func() (effectPatcher, error) { return &statefulEffectPatcher{store: store}, nil } - - started := make(chan struct{}) - release := make(chan struct{}) - errCh := make(chan error, 1) - go func() { - _, _, err := ExecuteEffectOnce(context.Background(), "effect-renew", func(context.Context) (any, error) { - close(started) - <-release - return map[string]any{"providerId": "renewed"}, nil - }) - errCh <- err - }() - - select { - case <-started: - case <-time.After(time.Second): - t.Fatal("expected effect execution to start") - } - - claimName := effectReservationClaimName("test-ns", "step-1", "effect-renew") - require.Eventually(t, func() bool { - var claim runsv1alpha1.EffectClaim - if err := claimClient.Get(context.Background(), client.ObjectKey{Name: claimName, Namespace: "test-ns"}, &claim); err != nil { //nolint:lll - return false - } - return claim.Spec.AcquireTime != nil && claim.Spec.RenewTime != nil && - claim.Spec.RenewTime.After(claim.Spec.AcquireTime.Time) - }, time.Second, 20*time.Millisecond, "expected claim renew time to advance while effect is running") - - reservation, already, err := reserveEffect(context.Background(), "effect-renew") - assert.NoError(t, err) - assert.True(t, already) - assert.Nil(t, reservation) - - close(release) - - select { - case err := <-errCh: - assert.NoError(t, err) - case <-time.After(time.Second): - t.Fatal("expected ExecuteEffectOnce to finish after release") - } -} - -func TestExecuteEffectOnce_DeduperKeyIncludesNamespace(t *testing.T) { - t.Setenv(contracts.StepRunNameEnv, "step-1") - t.Setenv(contracts.StepRunNamespaceEnv, "ns-a") - testResetEffectEmitter() - - prevReader := effectReaderFactory - prevFactory := effectClientFactory - prevClaim := effectClaimFactory - t.Cleanup(func() { - effectReaderFactory = prevReader - effectClientFactory = prevFactory - effectClaimFactory = prevClaim - testResetEffectEmitter() - }) - - effectReaderFactory = func() (effectReader, error) { - ns := strings.TrimSpace(os.Getenv(contracts.StepRunNamespaceEnv)) - return &mockEffectReader{ - namespace: ns, - stepRun: &runsv1alpha1.StepRun{ - ObjectMeta: metav1.ObjectMeta{Name: "step-1", Namespace: ns}, - Status: runsv1alpha1.StepRunStatus{}, - }, - }, nil - } - - effectClaimFactory = func() (effectClaimClient, error) { - return newFakeEffectClusterClient(t, strings.TrimSpace(os.Getenv(contracts.StepRunNamespaceEnv))), nil - } - mockClient := &mockEffectPatcher{} - effectClientFactory = func() (effectPatcher, error) { return mockClient, nil } - mockClient. - On("PatchStepRunStatus", mock.Anything, "step-1", mock.Anything). - Return(nil). - Twice() - - var calls atomic.Int32 - resultA, alreadyA, errA := ExecuteEffectOnce(context.Background(), "effect-shared", func(context.Context) (any, error) { //nolint:lll - calls.Add(1) - return map[string]any{"namespace": "ns-a"}, nil - }) - assert.NoError(t, errA) - assert.False(t, alreadyA) - assert.NotNil(t, resultA) - - t.Setenv(contracts.StepRunNamespaceEnv, "ns-b") - resultB, alreadyB, errB := ExecuteEffectOnce(context.Background(), "effect-shared", func(context.Context) (any, error) { //nolint:lll - calls.Add(1) - return map[string]any{"namespace": "ns-b"}, nil - }) - assert.NoError(t, errB) - assert.False(t, alreadyB) - assert.NotNil(t, resultB) - assert.EqualValues(t, 2, calls.Load()) - - mockClient.AssertExpectations(t) -} - -type statefulEffectStore struct { - mu sync.Mutex - stepRun runsv1alpha1.StepRun - patches int - statuses []runsv1alpha1.StepRunStatus -} - -func newStatefulEffectStore(stepRunName string, namespace string) *statefulEffectStore { - return &statefulEffectStore{ - stepRun: runsv1alpha1.StepRun{ - ObjectMeta: metav1.ObjectMeta{Name: stepRunName, Namespace: namespace}, - }, - } -} - -func (s *statefulEffectStore) appendStatus(status runsv1alpha1.StepRunStatus) { - s.mu.Lock() - defer s.mu.Unlock() - s.patches++ - s.statuses = append(s.statuses, status) - s.stepRun.Status.Effects = append(s.stepRun.Status.Effects, status.Effects...) -} - -func (s *statefulEffectStore) snapshot() (int, []runsv1alpha1.EffectRecord, []runsv1alpha1.StepRunStatus) { - s.mu.Lock() - defer s.mu.Unlock() - effects := append([]runsv1alpha1.EffectRecord(nil), s.stepRun.Status.Effects...) - statuses := append([]runsv1alpha1.StepRunStatus(nil), s.statuses...) - return s.patches, effects, statuses -} - -type statefulEffectPatcher struct { - store *statefulEffectStore -} - -func (p *statefulEffectPatcher) PatchStepRunStatus(_ context.Context, _ string, status runsv1alpha1.StepRunStatus) error { //nolint:lll - p.store.appendStatus(status) - return nil -} - -type statefulEffectReader struct { - store *statefulEffectStore - namespace string -} - -func (r *statefulEffectReader) Get(_ context.Context, _ client.ObjectKey, obj client.Object, _ ...client.GetOption) error { //nolint:lll - target := obj.(*runsv1alpha1.StepRun) - r.store.mu.Lock() - defer r.store.mu.Unlock() - *target = *r.store.stepRun.DeepCopy() - return nil -} - -func (r *statefulEffectReader) List(_ context.Context, _ client.ObjectList, _ ...client.ListOption) error { - return nil -} - -func (r *statefulEffectReader) GetNamespace() string { - return r.namespace -} - -func TestExecuteEffectOnce_DeduplicatesConcurrentCalls(t *testing.T) { - t.Setenv(contracts.StepRunNameEnv, "step-1") - t.Setenv(contracts.StepRunNamespaceEnv, "test-ns") - testResetEffectEmitter() - - prevReader := effectReaderFactory - prevFactory := effectClientFactory - prevClaim := effectClaimFactory - t.Cleanup(func() { - effectReaderFactory = prevReader - effectClientFactory = prevFactory - effectClaimFactory = prevClaim - testResetEffectEmitter() - }) - - store := newStatefulEffectStore("step-1", "test-ns") - effectReaderFactory = func() (effectReader, error) { - return &statefulEffectReader{store: store, namespace: "test-ns"}, nil - } - claimClient := newFakeEffectClusterClient(t, "test-ns") - effectClaimFactory = func() (effectClaimClient, error) { return claimClient, nil } - effectClientFactory = func() (effectPatcher, error) { - return &statefulEffectPatcher{store: store}, nil - } - - var calls atomic.Int32 - started := make(chan struct{}) - release := make(chan struct{}) - - type resultTuple struct { - result any - already bool - err error - } - results := make(chan resultTuple, 2) - - run := func() { - result, already, err := ExecuteEffectOnce(context.Background(), "effect-3", func(context.Context) (any, error) { - if calls.Add(1) == 1 { - close(started) - <-release - } - return map[string]any{"providerId": "xyz"}, nil - }) - results <- resultTuple{result: result, already: already, err: err} - } - - go run() - <-started - go run() - time.Sleep(20 * time.Millisecond) - close(release) - - first := <-results - second := <-results - - if calls.Load() != 1 { - t.Fatalf("expected effect function to execute once, got %d", calls.Load()) - } - patches, effects, _ := store.snapshot() - if patches != 1 { - t.Fatalf("expected one effect patch, got %d", patches) - } - if len(effects) != 1 { - t.Fatalf("expected one recorded effect, got %d", len(effects)) - } - - successes := 0 - alreadyRecorded := 0 - for _, item := range []resultTuple{first, second} { - if item.err == nil { - successes++ - if item.already { - t.Fatal("successful execution should not report already=true") - } - if item.result == nil { - t.Fatal("successful execution should return a result") - } - continue - } - if item.already && errors.Is(item.err, ErrEffectAlreadyRecorded) { - alreadyRecorded++ - continue - } - t.Fatalf("unexpected concurrent ExecuteEffectOnce result: already=%v err=%v", item.already, item.err) - } - if successes != 1 || alreadyRecorded != 1 { - t.Fatalf("expected one success and one already-recorded result, got success=%d already=%d", successes, alreadyRecorded) //nolint:lll - } -} - -func TestRecordEffect_TruncatesOversizedDetails(t *testing.T) { - t.Setenv(contracts.StepRunNameEnv, "step-1") - t.Setenv(effectMaxDetailsBytesEnv, "256") - testResetEffectEmitter() - - prevFactory := effectClientFactory - store := newStatefulEffectStore("step-1", "test-ns") - effectClientFactory = func() (effectPatcher, error) { - return &statefulEffectPatcher{store: store}, nil - } - t.Cleanup(func() { - effectClientFactory = prevFactory - testResetEffectEmitter() - }) - - err := RecordEffect(context.Background(), "effect-oversized", "succeeded", map[string]any{ - "secret": string(make([]byte, 512)), - }) - assert.NoError(t, err) - - _, _, statuses := store.snapshot() - if len(statuses) != 1 { - t.Fatalf("expected one recorded status patch, got %d", len(statuses)) - } - if len(statuses[0].Effects) != 1 { - t.Fatalf("expected one effect record, got %d", len(statuses[0].Effects)) - } - if statuses[0].Effects[0].Details == nil { - t.Fatal("expected truncated effect details payload") - } - - var payload map[string]any - if err := json.Unmarshal(statuses[0].Effects[0].Details.Raw, &payload); err != nil { - t.Fatalf("unmarshal truncated effect details: %v", err) - } - if payload["truncated"] != true { - t.Fatalf("expected truncated marker, got %#v", payload) - } - if payload["type"] != "map" { //nolint:goconst - t.Fatalf("expected map type summary, got %#v", payload["type"]) - } -} diff --git a/engram/client.go b/engram/client.go index d8da4da..ea76640 100644 --- a/engram/client.go +++ b/engram/client.go @@ -1,19 +1,3 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - // package engram defines the core interfaces that developers implement to create // components for the bobrapet ecosystem. These interfaces provide a structured, // type-safe framework for building everything from simple, single-task jobs to @@ -21,23 +5,15 @@ limitations under the License. package engram import ( - "bytes" "context" - "errors" "fmt" "io/fs" "log/slog" - "mime" "os" "path/filepath" - "sort" "strings" - "sync" - "time" "github.com/bubustack/bubu-sdk-go/k8s" - "github.com/bubustack/tractatus/envelope" - transportpb "github.com/bubustack/tractatus/gen/go/proto/transport/v1" "go.opentelemetry.io/otel/trace" ) @@ -49,356 +25,75 @@ type Secrets struct { rawSecrets map[string]string } -var errInvalidSecretDescriptor = errors.New("invalid secret descriptor") - -// ErrInvalidStreamMessage reports that a StreamMessage carries an unsupported or -// ambiguous payload shape. -var ErrInvalidStreamMessage = errors.New("invalid stream message") - -// ErrSecretExpansionFailed reports that env/file secret descriptor expansion -// failed and the caller should decide whether to proceed with partial secrets. -var ErrSecretExpansionFailed = errors.New("secret expansion failed") - -const ( - defaultSecretExpansionMaxFiles = 256 - defaultSecretExpansionMaxFileBytes = 1 * 1024 * 1024 - defaultSecretExpansionMaxTotalSize = 8 * 1024 * 1024 -) - -// NewSecrets creates a new Secrets object, expanding descriptor-style env/file references. -// Callers must pass a non-nil context; cancellation stops in-flight I/O and returns the -// secrets collected so far to honor shutdown deadlines. Nil contexts are treated as API -// misuse and fail closed with an empty secret set. -func NewSecrets(ctx context.Context, rawSecrets map[string]string) *Secrets { - secrets, err := NewSecretsWithError(ctx, rawSecrets) - if err != nil && ctx == nil { - slog.Default().Warn("engram.NewSecrets requires a non-nil context; returning empty secrets") - } - if secrets == nil { - return &Secrets{rawSecrets: make(map[string]string)} - } - return secrets -} - -// NewSecretsWithError expands descriptor-style env/file references and returns any expansion -// failure to callers that want to fail closed during SDK/runtime initialization. -func NewSecretsWithError(ctx context.Context, rawSecrets map[string]string) (*Secrets, error) { - if ctx == nil { - return nil, errors.New("engram.NewSecretsWithError requires a non-nil context") - } +// NewSecrets creates a new Secrets object. This is used internally by the SDK. +func NewSecrets(rawSecrets map[string]string) *Secrets { if rawSecrets == nil { rawSecrets = make(map[string]string) } - if len(rawSecrets) == 0 { - return &Secrets{rawSecrets: rawSecrets}, nil - } - - logger := slog.Default() + // Expand descriptor-style secrets provided via env by the operator. + // Supported formats for values in rawSecrets (from BUBU_SECRET_* envs): + // - "file:" → load all files in the directory as key/value + // - "env:" → load all environment variables starting with PREFIX + // Any other value is treated as a literal and stored under its key as-is. expanded := make(map[string]string, len(rawSecrets)) - logicalNames := make([]string, 0, len(rawSecrets)) - for logicalName := range rawSecrets { - logicalNames = append(logicalNames, logicalName) - } - sort.Strings(logicalNames) - issues := make([]error, 0, len(rawSecrets)) - for _, logicalName := range logicalNames { - descriptor := rawSecrets[logicalName] - if err := ctx.Err(); err != nil { - logger.Warn("secret expansion interrupted", "error", err) - return &Secrets{rawSecrets: expanded}, err - } + for logicalName, descriptor := range rawSecrets { switch { case strings.HasPrefix(descriptor, "file:"): - descriptorSecrets := make(map[string]string) - if err := expandSecretsFromFiles(ctx, descriptor, descriptorSecrets, logger); err != nil { - if errors.Is(err, context.Canceled) { - logger.Warn("secret file expansion canceled", secretExpansionLogArgs(logicalName, descriptor, err)...) - return &Secrets{rawSecrets: expanded}, err + dirPath := strings.TrimPrefix(descriptor, "file:") + // Best-effort directory read; do not fail hard for individual files + _ = filepath.WalkDir(dirPath, func(path string, d fs.DirEntry, err error) error { + if err != nil { + // Skip unreadable entries; callers can inspect logs if needed + return nil } - logger.Warn("secret file expansion incomplete", secretExpansionLogArgs(logicalName, descriptor, err)...) - issues = append(issues, secretExpansionError(logicalName, descriptor, err)) - continue - } - mergeExpandedSecrets(expanded, descriptorSecrets, logger, "file") - case strings.HasPrefix(descriptor, "env:"): - descriptorSecrets := make(map[string]string) - if err := expandSecretsFromEnv(ctx, descriptor, descriptorSecrets, logger); err != nil { - if errors.Is(err, context.Canceled) { - logger.Warn("secret env expansion canceled", secretExpansionLogArgs(logicalName, descriptor, err)...) - return &Secrets{rawSecrets: expanded}, err + if d.IsDir() { + return nil } - logger.Warn("secret env expansion incomplete", secretExpansionLogArgs(logicalName, descriptor, err)...) - issues = append(issues, secretExpansionError(logicalName, descriptor, err)) - continue - } - mergeExpandedSecrets(expanded, descriptorSecrets, logger, "env") - default: - storeExpandedSecret(expanded, logicalName, descriptor, true, logger, secretSourceLabel("literal", logicalName)) - } - } - - if len(issues) > 0 { - return &Secrets{rawSecrets: expanded}, errors.Join(issues...) - } - return &Secrets{rawSecrets: expanded}, nil -} - -func mergeExpandedSecrets(dest map[string]string, src map[string]string, logger *slog.Logger, sourceKind string) { - if len(src) == 0 { - return - } - keys := make([]string, 0, len(src)) - for key := range src { - keys = append(keys, key) - } - sort.Strings(keys) - for _, key := range keys { - storeExpandedSecret(dest, key, src[key], false, logger, secretSourceLabel(sourceKind, key)) - } -} - -func secretExpansionError(logicalName string, descriptor string, err error) error { - return fmt.Errorf( - "%w: secret %q (%s): %w", - ErrSecretExpansionFailed, - logicalName, - secretDescriptorKind(descriptor), - sanitizeSecretLogError(err), - ) -} - -func secretExpansionLogArgs(logicalName string, descriptor string, err error) []any { - args := []any{ - "secret", logicalName, - "descriptorKind", secretDescriptorKind(descriptor), - } - if err != nil { - args = append(args, "error", sanitizeSecretLogError(err)) - } - return args -} - -func secretDescriptorKind(descriptor string) string { - switch { - case strings.HasPrefix(descriptor, "file:"): - return "file" - case strings.HasPrefix(descriptor, "env:"): - return "env" - default: - return "literal" - } -} - -func secretSourceLabel(kind string, key string) string { - key = strings.TrimSpace(key) - if key == "" { - return kind - } - return kind + ":" + key -} - -func secretPathKey(rootPath string, path string) string { - if rootPath == "" || path == "" { - return "" - } - relPath, err := filepath.Rel(rootPath, path) - if err == nil && relPath != "." { - return filepath.ToSlash(relPath) - } - return filepath.Base(path) -} - -func sanitizeSecretLogError(err error) error { - if err == nil { - return nil - } - var pathErr *fs.PathError - if errors.As(err, &pathErr) { - return fmt.Errorf("%s: %w", pathErr.Op, pathErr.Err) - } - var linkErr *os.LinkError - if errors.As(err, &linkErr) { - return fmt.Errorf("%s: %w", linkErr.Op, linkErr.Err) - } - return err -} + // Use filename as the secret key + key := filepath.Base(path) + // Read the file content + b, readErr := os.ReadFile(path) + if readErr != nil { + return nil + } + expanded[key] = strings.TrimRight(string(b), "\n") + return nil + }) -func storeExpandedSecret( - dest map[string]string, - key string, - value string, - overwrite bool, - logger *slog.Logger, - source string, -) { - key = strings.TrimSpace(key) - if key == "" { - if logger != nil { - logger.Warn("skipping secret with empty key", "source", source) - } - return - } - if _, exists := dest[key]; exists { - if !overwrite { - if logger != nil { - logger.Warn("secret key collision; keeping existing value", "key", key, "source", source) + case strings.HasPrefix(descriptor, "env:"): + prefix := strings.TrimPrefix(descriptor, "env:") + for _, env := range os.Environ() { + parts := strings.SplitN(env, "=", 2) + if len(parts) != 2 { + continue + } + name, value := parts[0], parts[1] + if strings.HasPrefix(name, prefix) { + key := strings.TrimPrefix(name, prefix) + if key == "" { + continue + } + expanded[key] = value + } } - return - } - if logger != nil { - logger.Warn("secret key collision; overriding existing value", "key", key, "source", source) - } - } - dest[key] = value -} -func expandSecretsFromFiles( - ctx context.Context, - descriptor string, - dest map[string]string, - logger *slog.Logger, -) error { - rootPath := strings.TrimSpace(strings.TrimPrefix(descriptor, "file:")) - if rootPath == "" { - return fmt.Errorf("%w: file secret path must not be empty", errInvalidSecretDescriptor) - } - rootInfo, err := os.Lstat(rootPath) - if err != nil { - return err - } - if rootInfo.Mode()&fs.ModeSymlink != 0 { - return fmt.Errorf("%w: file secret path must not be a symlink", errInvalidSecretDescriptor) - } - if !rootInfo.IsDir() && !rootInfo.Mode().IsRegular() { - return fmt.Errorf("%w: file secret path must reference a regular file or directory", errInvalidSecretDescriptor) - } - state := &secretFileExpansionState{} - if !rootInfo.IsDir() { - return expandSecretFile(ctx, rootPath, filepath.Base(rootPath), rootInfo, state, dest, logger) - } - return filepath.WalkDir(rootPath, func(path string, d fs.DirEntry, err error) error { - if err != nil { - return err - } - if d.IsDir() { - return nil - } - if d.Type()&fs.ModeSymlink != 0 { - logger.Warn("skipping symlinked secret file", "key", secretPathKey(rootPath, path)) - return nil - } - if !d.Type().IsRegular() { - logger.Warn("skipping non-regular secret file", "key", secretPathKey(rootPath, path), "mode", d.Type()) - return nil - } - info, infoErr := d.Info() - if infoErr != nil { - return infoErr - } - relPath, relErr := filepath.Rel(rootPath, path) - if relErr != nil { - return fmt.Errorf("resolve relative secret path: %w", relErr) + default: + // Literal value; store under the provided logical key + expanded[logicalName] = descriptor } - key := filepath.ToSlash(relPath) - return expandSecretFile(ctx, path, key, info, state, dest, logger) - }) -} - -type secretFileExpansionState struct { - filesSeen int - totalBytes int -} - -func expandSecretFile( - ctx context.Context, - path string, - key string, - info fs.FileInfo, - state *secretFileExpansionState, - dest map[string]string, - logger *slog.Logger, -) error { - if ctxErr := ctx.Err(); ctxErr != nil { - return ctxErr - } - if state == nil { - state = &secretFileExpansionState{} } - if state.filesSeen >= defaultSecretExpansionMaxFiles { - return fmt.Errorf("%w: secret file count exceeds max %d", errInvalidSecretDescriptor, defaultSecretExpansionMaxFiles) - } - if info != nil && info.Size() > defaultSecretExpansionMaxFileBytes { - return fmt.Errorf( - "%w: secret file %s exceeds max size %d", - errInvalidSecretDescriptor, - key, - defaultSecretExpansionMaxFileBytes, - ) - } - content, readErr := os.ReadFile(path) - if readErr != nil { - logger.Warn("failed to load secret file content", "key", key, "error", sanitizeSecretLogError(readErr)) - return readErr - } - if len(content) > defaultSecretExpansionMaxFileBytes { - return fmt.Errorf( - "%w: secret file %s exceeds max size %d", - errInvalidSecretDescriptor, - key, - defaultSecretExpansionMaxFileBytes, - ) - } - if state.totalBytes+len(content) > defaultSecretExpansionMaxTotalSize { - return fmt.Errorf( - "%w: secret file expansion exceeds max total size %d", - errInvalidSecretDescriptor, - defaultSecretExpansionMaxTotalSize, - ) - } - state.filesSeen++ - state.totalBytes += len(content) - storeExpandedSecret(dest, key, strings.TrimRight(string(content), "\n"), false, logger, secretSourceLabel("file", key)) - return nil -} -func expandSecretsFromEnv( - ctx context.Context, - descriptor string, - dest map[string]string, - logger *slog.Logger, -) error { - prefix := strings.TrimPrefix(descriptor, "env:") - if prefix == "" { - return fmt.Errorf("%w: env secret prefix must not be empty", errInvalidSecretDescriptor) - } - for _, envVar := range os.Environ() { - if err := ctx.Err(); err != nil { - return err - } - parts := strings.SplitN(envVar, "=", 2) - if len(parts) != 2 { - continue - } - name, value := parts[0], parts[1] - if !strings.HasPrefix(name, prefix) { - continue - } - key := strings.TrimPrefix(name, prefix) - if key == "" { - logger.Warn("secret env prefix matched empty key", "prefix", prefix, "envVar", name) - continue - } - storeExpandedSecret(dest, key, value, false, logger, secretSourceLabel("env", key)) + // If expansion yielded nothing (e.g., invalid descriptors), fall back to rawSecrets + if len(expanded) == 0 && len(rawSecrets) > 0 { + // Keep behavior backwards compatible + return &Secrets{rawSecrets: rawSecrets} } - return nil + return &Secrets{rawSecrets: expanded} } // Get returns a specific secret by its key. func (s *Secrets) Get(key string) (string, bool) { - if s == nil { - return "", false - } val, ok := s.rawSecrets[key] return val, ok } @@ -406,9 +101,6 @@ func (s *Secrets) Get(key string) (string, bool) { // GetAll returns a copy of the secret keys. The values are redacted // to prevent accidental logging of sensitive data. func (s *Secrets) GetAll() map[string]string { - if s == nil || len(s.rawSecrets) == 0 { - return map[string]string{} - } redacted := make(map[string]string, len(s.rawSecrets)) for k := range s.rawSecrets { redacted[k] = "[REDACTED]" @@ -416,44 +108,17 @@ func (s *Secrets) GetAll() map[string]string { return redacted } -// Names returns the available secret keys in sorted order without exposing values. -func (s *Secrets) Names() []string { - if s == nil || len(s.rawSecrets) == 0 { - return []string{} - } - names := make([]string, 0, len(s.rawSecrets)) - for name := range s.rawSecrets { - names = append(names, name) - } - sort.Strings(names) - return names -} - -// Select returns a copy of the requested plaintext secrets only. -// Missing keys are ignored so callers can safely request optional values. -func (s *Secrets) Select(keys ...string) map[string]string { - if s == nil || len(s.rawSecrets) == 0 || len(keys) == 0 { - return map[string]string{} - } - selected := make(map[string]string, len(keys)) - for _, key := range keys { - if value, ok := s.rawSecrets[key]; ok { - selected[key] = value - } - } - return selected -} - // Format implements fmt.Formatter to prevent accidental logging of secrets. // It ensures that printing the Secrets struct (e.g., with %+v) does not leak values. func (s *Secrets) Format(f fmt.State, verb rune) { _, _ = f.Write([]byte("[redacted secrets]")) } -// LogValue implements slog.LogValuer to prevent structured logs from serializing -// plaintext secret values or keys. -func (s *Secrets) LogValue() slog.Value { - return slog.StringValue("[redacted secrets]") +// Raw returns the underlying raw secrets map. This should be used with extreme +// caution and only when direct, unredacted access is absolutely necessary. +// It is the developer's responsibility to ensure these values are not logged. +func (s *Secrets) Raw() map[string]string { + return s.rawSecrets } // ExecutionContext provides metadata and utilities for a single execution of an Engram. @@ -461,29 +126,17 @@ func (s *Secrets) LogValue() slog.Value { // runtime, such as logging, tracing, and information about the current Story. // This context is passed to the `Process` method of a `BatchEngram`. type ExecutionContext struct { - logger *slog.Logger - tracer trace.Tracer - storyInfo StoryInfo - celContext map[string]any + logger *slog.Logger + tracer trace.Tracer + storyInfo StoryInfo } // NewExecutionContext is a constructor used internally by the SDK runtime. func NewExecutionContext(logger *slog.Logger, tracer trace.Tracer, storyInfo StoryInfo) *ExecutionContext { - return NewExecutionContextWithCELContext(logger, tracer, storyInfo, nil) -} - -// NewExecutionContextWithCELContext is a constructor that also attaches CEL context data. -func NewExecutionContextWithCELContext( - logger *slog.Logger, - tracer trace.Tracer, - storyInfo StoryInfo, - celContext map[string]any, -) *ExecutionContext { return &ExecutionContext{ - logger: logger, - tracer: tracer, - storyInfo: storyInfo, - celContext: cloneConfigMap(celContext), + logger: logger, + tracer: tracer, + storyInfo: storyInfo, } } @@ -520,12 +173,6 @@ func (e *ExecutionContext) Tracer() trace.Tracer { return e.tracer } -// CELContext returns a defensive copy of the CEL context map provided by the -// controller (inputs + steps) so callers cannot mutate SDK-owned runtime state. -func (e *ExecutionContext) CELContext() map[string]any { - return cloneConfigMap(e.celContext) -} - // Result is the universal return type for a BatchEngram's Process method. // It encapsulates the output data. The SDK uses this structure to determine // the output of the step. @@ -537,12 +184,6 @@ type Result struct { Data any } -// NewResultFrom wraps the provided data in a Result. It keeps examples and callers -// working with a single helper so future metadata can be attached centrally. -func NewResultFrom(data any) *Result { - return &Result{Data: data} -} - // Engram is the foundational interface for all executable components in bobrapet. // It establishes a common initialization contract. // @@ -588,266 +229,16 @@ type Impulse[C any] interface { // StreamMessage represents a single message in a bidirectional stream with metadata. // Metadata enables tracing and correlation across streaming pipeline steps. type StreamMessage struct { - // Kind declares the semantic intent of the packet (e.g., "data", "heartbeat"). - Kind string - // MessageID is an optional caller-defined identifier that assists with deduplication. - MessageID string - // Timestamp captures when the packet was produced. Zero-value timestamps are omitted. - Timestamp time.Time // Metadata contains tracing information (StoryRunID, StepName, etc.) from DataPacket. // This should be propagated through the streaming pipeline to maintain observability. Metadata map[string]string - // Payload is the JSON-encoded data to be processed. Prefer Binary for new code paths. + // Payload is the JSON-encoded data to be processed. Payload []byte - // Audio carries PCM audio frames when present. - Audio *AudioFrame - // Video carries encoded or raw video frames when present. - Video *VideoFrame - // Binary carries arbitrary non-audio/video frames when present. - Binary *BinaryFrame // Inputs contains the evaluated step 'with:' configuration (CEL-resolved per packet). - // This is analogous to BUBU_TRIGGER_DATA in batch mode - dynamic configuration that can + // This is analogous to BUBU_INPUTS in batch mode - dynamic configuration that can // reference outputs from previous steps. The Hub evaluates this before forwarding. // Empty if the step has no 'with:' block or evaluation failed. Inputs []byte - // Transports mirrors the Story's declared transports, allowing engrams to decide whether - // to keep payloads on the hot path (e.g., LiveKit) or fall back to storage without - // rereading pod environment. - Transports []TransportDescriptor - // Envelope carries optional stream sequencing metadata from the transport layer. - // When set, it can be used for ordering and replay-aware processing. - Envelope *transportpb.StreamEnvelope -} - -// InboundMessage wraps a transport-delivered StreamMessage and lets the SDK runtime -// track when user processing is complete for delivery policies that require it. -// Best-effort traffic ignores completion signals. -type InboundMessage struct { - StreamMessage - receipt *streamMessageReceipt -} - -type streamMessageReceipt struct { - once sync.Once - onProcessed func() -} - -// NewInboundMessage wraps a StreamMessage for inbound streaming delivery. -func NewInboundMessage(msg StreamMessage) InboundMessage { - return InboundMessage{StreamMessage: msg} -} - -// BindProcessingReceipt attaches an SDK-managed completion hook to an inbound -// message. Runtime code uses this to defer transport acknowledgements until user -// processing explicitly completes. External callers normally do not need it. -func BindProcessingReceipt(msg InboundMessage, onProcessed func()) InboundMessage { - if onProcessed == nil { - return msg - } - msg.receipt = &streamMessageReceipt{onProcessed: onProcessed} - return msg -} - -// Done notifies the runtime that processing of this inbound message completed -// successfully. Messages without an attached processing receipt ignore this call, -// so best-effort traffic does not require special handling. -func (m InboundMessage) Done() { - if m.receipt == nil { - return - } - m.receipt.once.Do(func() { - if m.receipt.onProcessed != nil { - m.receipt.onProcessed() - } - }) -} - -// Validate reports whether the message uses a transport shape the SDK can encode -// without dropping or reinterpreting data. -func (m StreamMessage) Validate() error { //nolint:gocyclo - kind := strings.TrimSpace(m.Kind) - if kind != m.Kind { - return fmt.Errorf("%w: stream message kind must not have surrounding whitespace", ErrInvalidStreamMessage) - } - if strings.TrimSpace(m.MessageID) != m.MessageID { - return fmt.Errorf("%w: stream message message_id must not have surrounding whitespace", ErrInvalidStreamMessage) - } - for key := range m.Metadata { - if strings.TrimSpace(key) == "" { - return fmt.Errorf("%w: stream message metadata keys must not be empty", ErrInvalidStreamMessage) - } - if strings.TrimSpace(key) != key { - return fmt.Errorf("%w: stream message metadata key %q must not have surrounding whitespace", ErrInvalidStreamMessage, key) //nolint:lll - } - } - if err := validateStreamKind(kind); err != nil { - return err - } - if err := validateReservedStreamKindUsage(m, kind); err != nil { - return err - } - frameKinds := make([]string, 0, 3) - if m.Audio != nil { - frameKinds = append(frameKinds, "audio") - } - if m.Video != nil { - frameKinds = append(frameKinds, "video") - } - if m.Binary != nil { - frameKinds = append(frameKinds, "binary") - } - if len(frameKinds) > 1 { - return fmt.Errorf("%w: multiple frame payloads set (%s)", ErrInvalidStreamMessage, strings.Join(frameKinds, ", ")) - } - if err := validateAudioFrame(m.Audio); err != nil { - return err - } - if err := validateVideoFrame(m.Video); err != nil { - return err - } - if err := validateBinaryFrame(m.Binary, m); err != nil { - return err - } - if m.Binary != nil && len(m.Payload) > 0 && !bytes.Equal(m.Payload, m.Binary.Payload) { - return fmt.Errorf("%w: payload and binary payload must match when both are set", ErrInvalidStreamMessage) - } - return nil -} - -func validateStreamKind(kind string) error { - for _, r := range kind { - switch { - case r >= 'a' && r <= 'z': - case r >= 'A' && r <= 'Z': - case r >= '0' && r <= '9': - case r == '.', r == '-', r == '_': - default: - return fmt.Errorf("%w: stream message kind %q contains unsupported characters", ErrInvalidStreamMessage, kind) - } - } - return nil -} - -func validateReservedStreamKindUsage(msg StreamMessage, kind string) error { - switch strings.ToLower(kind) { - case StreamMessageKindError: - if len(msg.Payload) == 0 { - return fmt.Errorf("%w: error messages require payload", ErrInvalidStreamMessage) - } - if msg.Audio != nil || msg.Video != nil || msg.Binary != nil { - return fmt.Errorf("%w: error messages must not carry media frames", ErrInvalidStreamMessage) - } - case StreamMessageKindHeartbeat, StreamMessageKindNoop: - if carriesReservedKindBody(msg) { - return fmt.Errorf("%w: %s messages must not carry payload, metadata, or frames", ErrInvalidStreamMessage, strings.ToLower(kind)) //nolint:lll - } - } - return nil -} - -func carriesReservedKindBody(msg StreamMessage) bool { - return len(msg.Payload) > 0 || - len(msg.Inputs) > 0 || - len(msg.Transports) > 0 || - len(msg.Metadata) > 0 || - msg.Audio != nil || - msg.Video != nil || - msg.Binary != nil || - msg.Envelope != nil || - strings.TrimSpace(msg.MessageID) != "" || - !msg.Timestamp.IsZero() -} - -func carriesStructuredEnvelopeFields(msg StreamMessage) bool { - return strings.TrimSpace(msg.Kind) != "" || - strings.TrimSpace(msg.MessageID) != "" || - !msg.Timestamp.IsZero() || - len(msg.Metadata) > 0 || - len(msg.Payload) > 0 || - len(msg.Inputs) > 0 || - len(msg.Transports) > 0 || - msg.Envelope != nil -} - -func validateAudioFrame(audio *AudioFrame) error { - if audio == nil { - return nil - } - if strings.TrimSpace(audio.Codec) != audio.Codec { - return fmt.Errorf("%w: audio frame codec must not have surrounding whitespace", ErrInvalidStreamMessage) - } - if len(audio.PCM) == 0 { - return fmt.Errorf("%w: audio frame missing pcm payload", ErrInvalidStreamMessage) - } - if audio.SampleRateHz <= 0 { - return fmt.Errorf("%w: audio frame sample rate must be positive", ErrInvalidStreamMessage) - } - if audio.Channels <= 0 { - return fmt.Errorf("%w: audio frame channels must be positive", ErrInvalidStreamMessage) - } - if audio.Timestamp < 0 { - return fmt.Errorf("%w: audio frame timestamp must not be negative", ErrInvalidStreamMessage) - } - return nil -} - -func validateVideoFrame(video *VideoFrame) error { - if video == nil { - return nil - } - if strings.TrimSpace(video.Codec) != video.Codec { - return fmt.Errorf("%w: video frame codec must not have surrounding whitespace", ErrInvalidStreamMessage) - } - if len(video.Payload) == 0 { - return fmt.Errorf("%w: video frame missing payload", ErrInvalidStreamMessage) - } - if !video.Raw && strings.TrimSpace(video.Codec) == "" { - return fmt.Errorf("%w: encoded video frame requires codec", ErrInvalidStreamMessage) - } - if video.Raw && (video.Width == 0 || video.Height == 0) { - return fmt.Errorf("%w: raw video frame requires width and height", ErrInvalidStreamMessage) - } - if video.Timestamp < 0 { - return fmt.Errorf("%w: video frame timestamp must not be negative", ErrInvalidStreamMessage) - } - return nil -} - -func validateBinaryFrame(binary *BinaryFrame, msg StreamMessage) error { - if binary == nil { - return nil - } - if len(binary.Payload) == 0 { - return fmt.Errorf("%w: binary frame missing payload", ErrInvalidStreamMessage) - } - if binary.Timestamp < 0 { - return fmt.Errorf("%w: binary frame timestamp must not be negative", ErrInvalidStreamMessage) - } - mimeType := strings.TrimSpace(binary.MimeType) - if mimeType != binary.MimeType { - return fmt.Errorf("%w: binary frame mime type must not have surrounding whitespace", ErrInvalidStreamMessage) - } - if mimeType != "" { - parsedMediaType, _, err := mime.ParseMediaType(mimeType) - if err != nil { - return fmt.Errorf("%w: binary frame mime type %q is invalid", ErrInvalidStreamMessage, mimeType) - } - mimeType = strings.ToLower(parsedMediaType) - } - if mimeType == envelope.MIMEType && !carriesStructuredEnvelopeFields(msg) { - return fmt.Errorf( - "%w: binary frame MIME type %q is reserved for envelope payloads", - ErrInvalidStreamMessage, - envelope.MIMEType, - ) - } - if mimeType == envelope.MIMEType && !bytes.Equal(binary.Payload, msg.Payload) { - return fmt.Errorf( - "%w: reserved envelope MIME type requires binary payload to mirror the structured payload", - ErrInvalidStreamMessage, - ) - } - return nil } // StreamingEngram is the interface for components that handle real-time, @@ -857,10 +248,9 @@ func validateBinaryFrame(binary *BinaryFrame, msg StreamMessage) error { type StreamingEngram[C any] interface { Engram[C] // Stream is the core method for handling bidirectional data flow with metadata. - // The SDK provides inbound messages plus an outbound StreamMessage channel. - // Metadata should be propagated to enable tracing across the streaming pipeline. - // The method should process messages from `in`, call Done on messages it handled - // successfully (or intentionally dropped), and write results to `out` until the - // input channel is closed or the context is canceled. - Stream(ctx context.Context, in <-chan InboundMessage, out chan<- StreamMessage) error + // The SDK provides channels for receiving and sending StreamMessage which includes + // both payload and metadata. Metadata should be propagated to enable tracing across + // the streaming pipeline. The method should process messages from `in` and write + // results to `out` until the input channel is closed or the context is canceled. + Stream(ctx context.Context, in <-chan StreamMessage, out chan<- StreamMessage) error } diff --git a/engram/client_test.go b/engram/client_test.go index f1cdd37..29e4908 100644 --- a/engram/client_test.go +++ b/engram/client_test.go @@ -1,40 +1,17 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - package engram import ( - "bytes" "context" - "errors" "fmt" "log/slog" "os" "path/filepath" - "strings" "testing" - "time" - "github.com/bubustack/bubu-sdk-go/pkg/observability" - "github.com/bubustack/tractatus/envelope" "github.com/stretchr/testify/assert" + "go.opentelemetry.io/otel" ) -const testSecretValue = "value" - func TestSecrets_Get(t *testing.T) { tests := []struct { name string @@ -72,7 +49,7 @@ func TestSecrets_Get(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - s := NewSecrets(context.Background(), tt.secrets) + s := NewSecrets(tt.secrets) gotValue, gotExists := s.Get(tt.key) if gotValue != tt.wantValue { @@ -91,7 +68,7 @@ func TestSecrets_GetAll(t *testing.T) { "KEY2": "value2", } - s := NewSecrets(context.Background(), secrets) + s := NewSecrets(secrets) got := s.GetAll() if len(got) != len(secrets) { @@ -106,46 +83,6 @@ func TestSecrets_GetAll(t *testing.T) { } } -func TestSecrets_NamesReturnsSortedCopy(t *testing.T) { - s := NewSecrets(context.Background(), map[string]string{ - "zeta": "3", - "alpha": "1", - "beta": "2", - }) - - names := s.Names() - assert.Equal(t, []string{"alpha", "beta", "zeta"}, names) - - names[0] = "mutated" - assert.Equal(t, []string{"alpha", "beta", "zeta"}, s.Names()) -} - -func TestSecrets_SelectReturnsRequestedValuesOnly(t *testing.T) { - s := NewSecrets(context.Background(), map[string]string{ - "API_KEY": "secret123", - "TOKEN": "token456", - }) - - selected := s.Select("TOKEN", "MISSING") - assert.Equal(t, map[string]string{"TOKEN": "token456"}, selected) - - selected["TOKEN"] = "changed" //nolint:goconst - value, ok := s.Get("TOKEN") - assert.True(t, ok) - assert.Equal(t, "token456", value) -} - -func TestSecrets_AccessorsHandleNilReceiver(t *testing.T) { - var s *Secrets - - value, ok := s.Get("missing") - assert.False(t, ok) - assert.Empty(t, value) - assert.Empty(t, s.GetAll()) - assert.Empty(t, s.Names()) - assert.Empty(t, s.Select("missing")) -} - func TestNewSecrets_EnvPrefixExpansion(t *testing.T) { err := os.Setenv("PAY_apiKey", "abc") if err != nil { @@ -166,7 +103,7 @@ func TestNewSecrets_EnvPrefixExpansion(t *testing.T) { } }() - s := NewSecrets(context.Background(), map[string]string{"payments": "env:PAY_"}) + s := NewSecrets(map[string]string{"payments": "env:PAY_"}) if v, ok := s.Get("apiKey"); !ok || v != "abc" { t.Fatalf("env expansion failed for apiKey: ok=%v v=%q", ok, v) } @@ -184,7 +121,7 @@ func TestNewSecrets_FileDirExpansion(t *testing.T) { t.Fatal(err) } - s := NewSecrets(context.Background(), map[string]string{"db": "file:" + dir}) + s := NewSecrets(map[string]string{"db": "file:" + dir}) if v, ok := s.Get("username"); !ok || v != "alice" { t.Fatalf("file expansion failed for username: ok=%v v=%q", ok, v) } @@ -193,255 +130,14 @@ func TestNewSecrets_FileDirExpansion(t *testing.T) { } } -func TestNewSecrets_FileDirExpansionPreservesRelativePathsForNestedFiles(t *testing.T) { - dir := t.TempDir() - if err := os.MkdirAll(filepath.Join(dir, "db", "writer"), 0o755); err != nil { - t.Fatal(err) - } - if err := os.MkdirAll(filepath.Join(dir, "db", "reader"), 0o755); err != nil { - t.Fatal(err) - } - if err := os.WriteFile(filepath.Join(dir, "db", "writer", "password"), []byte("writer-secret"), 0o600); err != nil { - t.Fatal(err) - } - if err := os.WriteFile(filepath.Join(dir, "db", "reader", "password"), []byte("reader-secret"), 0o600); err != nil { - t.Fatal(err) - } - - s := NewSecrets(context.Background(), map[string]string{"db": "file:" + dir}) - if v, ok := s.Get("db/writer/password"); !ok || v != "writer-secret" { - t.Fatalf("nested file expansion failed for db/writer/password: ok=%v v=%q", ok, v) - } - if v, ok := s.Get("db/reader/password"); !ok || v != "reader-secret" { - t.Fatalf("nested file expansion failed for db/reader/password: ok=%v v=%q", ok, v) - } - if _, ok := s.Get("password"); ok { - t.Fatal("nested file expansion must not collapse duplicate basenames") - } -} - -func TestNewSecrets_FileDirExpansionRejectsSymlinkedFiles(t *testing.T) { - dir := t.TempDir() - target := filepath.Join(t.TempDir(), "outside-token") - if err := os.WriteFile(target, []byte("top-secret"), 0o600); err != nil { - t.Fatal(err) - } - link := filepath.Join(dir, "token") - if err := os.Symlink(target, link); err != nil { - t.Skipf("symlink creation unavailable: %v", err) - } - - s := NewSecrets(context.Background(), map[string]string{"db": "file:" + dir}) - if _, ok := s.Get("token"); ok { - t.Fatal("symlinked files must not be imported as secrets") - } -} - -func TestNewSecrets_FileDirExpansionRejectsOversizedFiles(t *testing.T) { - dir := t.TempDir() - oversized := strings.Repeat("x", defaultSecretExpansionMaxFileBytes+1) - if err := os.WriteFile(filepath.Join(dir, "huge"), []byte(oversized), 0o600); err != nil { - t.Fatal(err) - } - - s := NewSecrets(context.Background(), map[string]string{"db": "file:" + dir}) - if _, ok := s.Get("huge"); ok { - t.Fatal("oversized secret files must not be imported") - } -} - -func TestNewSecrets_FileDirExpansionRejectsTooManyFiles(t *testing.T) { - dir := t.TempDir() - for i := range defaultSecretExpansionMaxFiles + 1 { - name := filepath.Join(dir, fmt.Sprintf("secret-%03d", i)) - if err := os.WriteFile(name, []byte(testSecretValue), 0o600); err != nil { - t.Fatal(err) - } - } - - s := NewSecrets(context.Background(), map[string]string{"db": "file:" + dir}) - names := s.Names() - if len(names) != 0 { - t.Fatalf( - "expected descriptor to fail closed when file count exceeds %d, got %d imported secrets", - defaultSecretExpansionMaxFiles, - len(names), - ) - } -} - -func TestNewSecrets_DoesNotFallbackToRawDescriptorOnFileExpansionFailure(t *testing.T) { - missingDir := filepath.Join(t.TempDir(), "missing") - s := NewSecrets(context.Background(), map[string]string{"db": "file:" + missingDir}) - - if _, ok := s.Get("db"); ok { - t.Fatal("failed file expansion must not expose the raw descriptor as a secret value") - } -} - -func TestNewSecrets_RedactsDescriptorInWarningLogs(t *testing.T) { - var buf bytes.Buffer - prev := slog.Default() - logger := slog.New(slog.NewJSONHandler(&buf, &slog.HandlerOptions{Level: slog.LevelDebug})) - slog.SetDefault(logger) - defer slog.SetDefault(prev) - - missingDir := filepath.Join(t.TempDir(), "missing") - NewSecrets(context.Background(), map[string]string{ - "db": "file:" + missingDir, - "badEnv": "env:", - }) - - output := buf.String() - if strings.Contains(output, missingDir) { - t.Fatalf("warning logs must not include raw secret paths: %s", output) - } - if strings.Contains(output, "file:"+missingDir) { - t.Fatalf("warning logs must not include raw file descriptors: %s", output) - } - if strings.Contains(output, "\"descriptor\":") { - t.Fatalf("warning logs must not include a raw descriptor field: %s", output) - } - if !strings.Contains(output, "\"secret\":\"db\"") || !strings.Contains(output, "\"descriptorKind\":\"file\"") { - t.Fatalf("warning logs should retain the logical secret name and kind for file failures: %s", output) - } - if !strings.Contains(output, "\"secret\":\"badEnv\"") || !strings.Contains(output, "\"descriptorKind\":\"env\"") { - t.Fatalf("warning logs should retain the logical secret name and kind for env failures: %s", output) - } -} - -func TestNewSecretsWithErrorReturnsPartialSecretsAndRedactedErrors(t *testing.T) { - missingDir := filepath.Join(t.TempDir(), "missing") - - secrets, err := NewSecretsWithError(context.Background(), map[string]string{ - "literal": testSecretValue, - "db": "file:" + missingDir, - "badEnv": "env:", - }) - if err == nil { - t.Fatal("expected secret expansion error") - } - if !errors.Is(err, ErrSecretExpansionFailed) { - t.Fatalf("expected ErrSecretExpansionFailed, got %v", err) - } - if secrets == nil { - t.Fatal("expected partial secrets to be returned") - } - if v, ok := secrets.Get("literal"); !ok || v != testSecretValue { - t.Fatalf("expected literal secret to be preserved, ok=%v v=%q", ok, v) - } - if _, ok := secrets.Get("db"); ok { - t.Fatal("failed file expansion must not expose the raw descriptor") - } - if _, ok := secrets.Get("badEnv"); ok { - t.Fatal("failed env expansion must not expose the raw descriptor") - } - msg := err.Error() - if !strings.Contains(msg, `secret "db" (file)`) { - t.Fatalf("expected file secret name in error, got: %s", msg) - } - if !strings.Contains(msg, `secret "badEnv" (env)`) { - t.Fatalf("expected env secret name in error, got: %s", msg) - } - if strings.Contains(msg, missingDir) { - t.Fatalf("error must not leak raw secret path: %s", msg) - } - if strings.Contains(msg, "file:"+missingDir) { - t.Fatalf("error must not leak raw file descriptor: %s", msg) - } - if strings.Contains(msg, "env:") { - t.Fatalf("error must not leak raw env descriptor: %s", msg) - } -} - -func TestNewSecretsWithErrorRejectsNilContext(t *testing.T) { - _, err := NewSecretsWithError(nil, map[string]string{"key": testSecretValue}) //nolint:staticcheck - if err == nil { - t.Fatal("expected nil context to return an error") - } -} - -func TestNewSecretsNilContextFailsClosed(t *testing.T) { - secrets := NewSecrets(nil, map[string]string{"literal": testSecretValue}) //nolint:staticcheck - if secrets == nil { - t.Fatal("expected empty secrets, got nil") - } - if _, ok := secrets.Get("literal"); ok { - t.Fatal("expected nil-context NewSecrets call to fail closed") - } - if got := secrets.GetAll(); len(got) != 0 { - t.Fatalf("expected no secrets after nil-context failure, got %v", got) - } -} - -func TestNewSecretsWithErrorFailsClosedForUnreadableDirectoryEntries(t *testing.T) { - dir := t.TempDir() - goodPath := filepath.Join(dir, "good.txt") - badPath := filepath.Join(dir, "bad.txt") - if err := os.WriteFile(goodPath, []byte("good"), 0o600); err != nil { - t.Fatalf("write good secret: %v", err) - } - if err := os.WriteFile(badPath, []byte("bad"), 0o600); err != nil { - t.Fatalf("write unreadable secret: %v", err) - } - if err := os.Chmod(badPath, 0o000); err != nil { - t.Fatalf("chmod unreadable secret: %v", err) - } - t.Cleanup(func() { - _ = os.Chmod(badPath, 0o600) - }) - if _, err := os.ReadFile(badPath); err == nil { - t.Skip("filesystem permissions still allow reading chmod 000 file") - } - - secrets, err := NewSecretsWithError(context.Background(), map[string]string{ - "literal": "value", - "dir": "file:" + dir, - }) - if err == nil { - t.Fatal("expected unreadable directory entry error") - } - if !errors.Is(err, ErrSecretExpansionFailed) { - t.Fatalf("expected ErrSecretExpansionFailed, got %v", err) - } - if secrets == nil { - t.Fatal("expected partial secrets for unrelated descriptors") - } - if got, ok := secrets.Get("literal"); !ok || got != "value" { - t.Fatalf("expected literal secret to survive, ok=%v got=%q", ok, got) - } - if _, ok := secrets.Get("good.txt"); ok { - t.Fatal("expected directory-backed secrets to fail closed when any entry is unreadable") - } -} - func TestSecrets_Format(t *testing.T) { - s := NewSecrets(context.Background(), map[string]string{"key": "value"}) + s := NewSecrets(map[string]string{"key": "value"}) output := fmt.Sprintf("%v", s) assert.Equal(t, "[redacted secrets]", output) } -func TestSecrets_LogValueRedactsStructuredLogging(t *testing.T) { - var buf bytes.Buffer - logger := slog.New(slog.NewJSONHandler(&buf, nil)) - secrets := NewSecrets(context.Background(), map[string]string{"apiKey": "secret-value"}) - - logger.Info("testing secrets", "secrets", secrets) - - output := buf.String() - if strings.Contains(output, "secret-value") { - t.Fatalf("structured logging must not include secret values: %s", output) - } - if strings.Contains(output, "apiKey") { - t.Fatalf("structured logging must not include secret keys by default: %s", output) - } - if !strings.Contains(output, "[redacted secrets]") { - t.Fatalf("structured logging should emit the redacted sentinel, got: %s", output) - } -} - func TestNewSecrets_NilInput(t *testing.T) { - s := NewSecrets(context.Background(), nil) + s := NewSecrets(nil) if s == nil { t.Fatal("NewSecrets(nil) returned nil") @@ -457,64 +153,9 @@ func TestNewSecrets_NilInput(t *testing.T) { } } -func TestNewSecrets_ContextCancellationStopsExpansion(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - cancel() - - s := NewSecrets(ctx, map[string]string{"literal": "value"}) - if _, ok := s.Get("literal"); ok { - t.Fatalf("expected literal secrets to be skipped when context is canceled") - } -} - -func TestNewSecrets_EmptyEnvPrefixDoesNotExpandWholeEnvironment(t *testing.T) { - if err := os.Setenv("BUBU_TEST_SECRET_ONE", "one"); err != nil { - t.Fatal(err) - } - if err := os.Setenv("BUBU_TEST_SECRET_TWO", "two"); err != nil { - t.Fatal(err) - } - defer func() { - _ = os.Unsetenv("BUBU_TEST_SECRET_ONE") - _ = os.Unsetenv("BUBU_TEST_SECRET_TWO") - }() - - s := NewSecrets(context.Background(), map[string]string{ - "literal": "value", - "bad": "env:", - }) - - if v, ok := s.Get("literal"); !ok || v != "value" { - t.Fatalf("literal secret should remain available, ok=%v v=%q", ok, v) - } - if _, ok := s.Get("BUBU_TEST_SECRET_ONE"); ok { - t.Fatal("empty env prefix must not import the full process environment") - } - if _, ok := s.Get("BUBU_TEST_SECRET_TWO"); ok { - t.Fatal("empty env prefix must not import the full process environment") - } -} - -func TestNewSecrets_LiteralSecretsOverrideExpandedCollisions(t *testing.T) { - if err := os.Setenv("PAY_token", "expanded-token"); err != nil { - t.Fatal(err) - } - defer func() { - _ = os.Unsetenv("PAY_token") - }() - - s := NewSecrets(context.Background(), map[string]string{ - "payments": "env:PAY_", - "token": "literal-token", - }) - if v, ok := s.Get("token"); !ok || v != "literal-token" { - t.Fatalf("literal secret should override expansion collisions, ok=%v v=%q", ok, v) - } -} - func TestNewExecutionContext(t *testing.T) { logger := slog.New(slog.NewJSONHandler(os.Stdout, nil)) - tracer := observability.Tracer("test") + tracer := otel.Tracer("test") storyInfo := StoryInfo{ StoryName: "test-story", StoryRunID: "run-123", @@ -551,475 +192,6 @@ func TestNewExecutionContext(t *testing.T) { } } -func TestNewExecutionContextWithCELContextClonesInput(t *testing.T) { - celContext := map[string]any{ - "inputs": map[string]any{ - "message": "original", - }, - "steps": []any{ - map[string]any{"name": "first"}, - }, - } - - ec := NewExecutionContextWithCELContext(nil, nil, StoryInfo{}, celContext) - - inputs := celContext["inputs"].(map[string]any) - inputs["message"] = "mutated" - steps := celContext["steps"].([]any) - steps[0].(map[string]any)["name"] = "changed" - - got := ec.CELContext() - if got["inputs"].(map[string]any)["message"] != "original" { - t.Fatalf("constructor must isolate CEL context from caller mutation, got %v", got["inputs"]) - } - if got["steps"].([]any)[0].(map[string]any)["name"] != "first" { - t.Fatalf("constructor must deep copy nested CEL context values, got %v", got["steps"]) - } -} - -func TestExecutionContextCELContextReturnsDefensiveCopy(t *testing.T) { - ec := NewExecutionContextWithCELContext(nil, nil, StoryInfo{}, map[string]any{ - "inputs": map[string]any{ - "message": "original", - }, - "steps": []any{ - map[string]any{"name": "first"}, - }, - }) - - first := ec.CELContext() - first["inputs"].(map[string]any)["message"] = "mutated" - first["steps"].([]any)[0].(map[string]any)["name"] = "changed" - - second := ec.CELContext() - if second["inputs"].(map[string]any)["message"] != "original" { - t.Fatalf("CELContext must return a defensive copy, got %v", second["inputs"]) - } - if second["steps"].([]any)[0].(map[string]any)["name"] != "first" { - t.Fatalf("CELContext must deep copy nested values, got %v", second["steps"]) - } -} - -func TestStreamMessageValidateRejectsMultipleFrameTypes(t *testing.T) { - msg := StreamMessage{ - Audio: &AudioFrame{PCM: []byte{0x01}}, - Video: &VideoFrame{Payload: []byte{0x02}, Codec: "vp8"}, - } - - err := msg.Validate() - if !errors.Is(err, ErrInvalidStreamMessage) { - t.Fatalf("expected ErrInvalidStreamMessage, got %v", err) - } - if !strings.Contains(err.Error(), "audio, video") { - t.Fatalf("expected frame names in validation error, got %v", err) - } -} - -func TestStreamMessageValidateRejectsBinaryPayloadMismatch(t *testing.T) { - msg := StreamMessage{ - Payload: []byte(`{"ok":true}`), - Binary: &BinaryFrame{ - Payload: []byte("raw"), - MimeType: "application/octet-stream", - }, - } - - err := msg.Validate() - if !errors.Is(err, ErrInvalidStreamMessage) { - t.Fatalf("expected ErrInvalidStreamMessage, got %v", err) - } - if !strings.Contains(err.Error(), "payload and binary payload must match") { - t.Fatalf("expected binary mismatch validation error, got %v", err) - } -} - -func TestStreamMessageValidateAllowsBinaryPayloadMirror(t *testing.T) { - msg := StreamMessage{ - Payload: []byte("raw"), - Binary: &BinaryFrame{ - Payload: []byte("raw"), - MimeType: "application/octet-stream", - }, - } - - if err := msg.Validate(); err != nil { - t.Fatalf("expected payload-backed binary mirror to remain valid, got %v", err) - } -} - -func TestStreamMessageValidateRejectsKindWithSurroundingWhitespace(t *testing.T) { - msg := StreamMessage{ - Kind: " telemetry ", - Payload: []byte(`{"ok":true}`), - } - - err := msg.Validate() - if !errors.Is(err, ErrInvalidStreamMessage) { - t.Fatalf("expected ErrInvalidStreamMessage, got %v", err) - } - if !strings.Contains(err.Error(), "must not have surrounding whitespace") { - t.Fatalf("expected kind whitespace validation error, got %v", err) - } -} - -func TestStreamMessageValidateRejectsMessageIDWithSurroundingWhitespace(t *testing.T) { - msg := StreamMessage{ - MessageID: " msg-1 ", - Payload: []byte(`{"ok":true}`), - } - - err := msg.Validate() - if !errors.Is(err, ErrInvalidStreamMessage) { - t.Fatalf("expected ErrInvalidStreamMessage, got %v", err) - } - if !strings.Contains(err.Error(), "message_id must not have surrounding whitespace") { - t.Fatalf("expected message_id whitespace validation error, got %v", err) - } -} - -func TestStreamMessageValidateRejectsEmptyMetadataKey(t *testing.T) { - msg := StreamMessage{ - Payload: []byte(`{"ok":true}`), - Metadata: map[string]string{"": "value"}, - } - - err := msg.Validate() - if !errors.Is(err, ErrInvalidStreamMessage) { - t.Fatalf("expected ErrInvalidStreamMessage, got %v", err) - } - if !strings.Contains(err.Error(), "metadata keys must not be empty") { - t.Fatalf("expected empty metadata key validation error, got %v", err) - } -} - -func TestStreamMessageValidateRejectsMetadataKeyWithSurroundingWhitespace(t *testing.T) { - msg := StreamMessage{ - Payload: []byte(`{"ok":true}`), - Metadata: map[string]string{" trace-id ": "abc"}, - } - - err := msg.Validate() - if !errors.Is(err, ErrInvalidStreamMessage) { - t.Fatalf("expected ErrInvalidStreamMessage, got %v", err) - } - if !strings.Contains(err.Error(), "metadata key") || !strings.Contains(err.Error(), "surrounding whitespace") { - t.Fatalf("expected metadata key whitespace validation error, got %v", err) - } -} - -func TestStreamMessageValidateRejectsErrorKindWithoutPayload(t *testing.T) { - msg := StreamMessage{Kind: StreamMessageKindError} - - err := msg.Validate() - if !errors.Is(err, ErrInvalidStreamMessage) { - t.Fatalf("expected ErrInvalidStreamMessage, got %v", err) - } - if !strings.Contains(err.Error(), "error messages require payload") { - t.Fatalf("expected error-kind payload validation error, got %v", err) - } -} - -func TestStreamMessageValidateRejectsHeartbeatWithPayload(t *testing.T) { - msg := StreamMessage{ - Kind: StreamMessageKindHeartbeat, - Payload: []byte(`{"ok":true}`), - } - - err := msg.Validate() - if !errors.Is(err, ErrInvalidStreamMessage) { - t.Fatalf("expected ErrInvalidStreamMessage, got %v", err) - } - if !strings.Contains(err.Error(), "heartbeat messages must not carry payload, metadata, or frames") { - t.Fatalf("expected heartbeat validation error, got %v", err) - } -} - -func TestStreamMessageValidateRejectsAudioWithoutPCM(t *testing.T) { - msg := StreamMessage{ - Audio: &AudioFrame{ - SampleRateHz: 16000, - Channels: 1, - }, - } - - err := msg.Validate() - if !errors.Is(err, ErrInvalidStreamMessage) { - t.Fatalf("expected ErrInvalidStreamMessage, got %v", err) - } - if !strings.Contains(err.Error(), "audio frame missing pcm payload") { - t.Fatalf("expected audio payload validation error, got %v", err) - } -} - -func TestStreamMessageValidateRejectsAudioCodecWithSurroundingWhitespace(t *testing.T) { - msg := StreamMessage{ - Audio: &AudioFrame{ - PCM: []byte{0x01}, - SampleRateHz: 16000, - Channels: 1, - Codec: " pcm16 ", - }, - } - - err := msg.Validate() - if !errors.Is(err, ErrInvalidStreamMessage) { - t.Fatalf("expected ErrInvalidStreamMessage, got %v", err) - } - if !strings.Contains(err.Error(), "audio frame codec must not have surrounding whitespace") { - t.Fatalf("expected audio codec whitespace validation error, got %v", err) - } -} - -func TestStreamMessageValidateRejectsAudioWithoutSampleRateOrChannels(t *testing.T) { - msg := StreamMessage{ - Audio: &AudioFrame{ - PCM: []byte{0x01}, - SampleRateHz: 0, - Channels: 0, - }, - } - - err := msg.Validate() - if !errors.Is(err, ErrInvalidStreamMessage) { - t.Fatalf("expected ErrInvalidStreamMessage, got %v", err) - } - if !strings.Contains(err.Error(), "sample rate must be positive") { - t.Fatalf("expected audio sample-rate validation error, got %v", err) - } -} - -func TestStreamMessageValidateRejectsRawVideoWithoutDimensions(t *testing.T) { - msg := StreamMessage{ - Video: &VideoFrame{ - Payload: []byte{0x02}, - Raw: true, - }, - } - - err := msg.Validate() - if !errors.Is(err, ErrInvalidStreamMessage) { - t.Fatalf("expected ErrInvalidStreamMessage, got %v", err) - } - if !strings.Contains(err.Error(), "raw video frame requires width and height") { - t.Fatalf("expected raw video dimension validation error, got %v", err) - } -} - -func TestStreamMessageValidateAllowsEncodedVideoWithoutDimensions(t *testing.T) { - msg := StreamMessage{ - Video: &VideoFrame{ - Payload: []byte{0x02}, - Codec: "vp8", - }, - } - - if err := msg.Validate(); err != nil { - t.Fatalf("expected encoded video without explicit dimensions to remain valid, got %v", err) - } -} - -func TestStreamMessageValidateRejectsEncodedVideoWithoutCodec(t *testing.T) { - msg := StreamMessage{ - Video: &VideoFrame{ - Payload: []byte{0x02}, - }, - } - - err := msg.Validate() - if !errors.Is(err, ErrInvalidStreamMessage) { - t.Fatalf("expected ErrInvalidStreamMessage, got %v", err) - } - if !strings.Contains(err.Error(), "encoded video frame requires codec") { - t.Fatalf("expected encoded video codec validation error, got %v", err) - } -} - -func TestStreamMessageValidateRejectsVideoCodecWithSurroundingWhitespace(t *testing.T) { - msg := StreamMessage{ - Video: &VideoFrame{ - Payload: []byte{0x02}, - Codec: " vp8 ", - }, - } - - err := msg.Validate() - if !errors.Is(err, ErrInvalidStreamMessage) { - t.Fatalf("expected ErrInvalidStreamMessage, got %v", err) - } - if !strings.Contains(err.Error(), "video frame codec must not have surrounding whitespace") { - t.Fatalf("expected video codec whitespace validation error, got %v", err) - } -} - -func TestStreamMessageValidateRejectsNegativeBinaryTimestamp(t *testing.T) { - msg := StreamMessage{ - Binary: &BinaryFrame{ - Payload: []byte{0x01}, - MimeType: "application/octet-stream", - Timestamp: -1 * time.Millisecond, - }, - } - - err := msg.Validate() - if !errors.Is(err, ErrInvalidStreamMessage) { - t.Fatalf("expected ErrInvalidStreamMessage, got %v", err) - } - if !strings.Contains(err.Error(), "binary frame timestamp must not be negative") { - t.Fatalf("expected negative binary timestamp validation error, got %v", err) - } -} - -func TestStreamMessageValidateRejectsBinaryWithoutPayload(t *testing.T) { - msg := StreamMessage{ - Binary: &BinaryFrame{ - MimeType: "application/octet-stream", - }, - } - - err := msg.Validate() - if !errors.Is(err, ErrInvalidStreamMessage) { - t.Fatalf("expected ErrInvalidStreamMessage, got %v", err) - } - if !strings.Contains(err.Error(), "binary frame missing payload") { - t.Fatalf("expected missing binary payload validation error, got %v", err) - } -} - -func TestStreamMessageValidateRejectsBinaryMimeTypeWithSurroundingWhitespace(t *testing.T) { - msg := StreamMessage{ - Binary: &BinaryFrame{ - Payload: []byte{0x01}, - MimeType: " application/octet-stream ", - }, - } - - err := msg.Validate() - if !errors.Is(err, ErrInvalidStreamMessage) { - t.Fatalf("expected ErrInvalidStreamMessage, got %v", err) - } - if !strings.Contains(err.Error(), "binary frame mime type must not have surrounding whitespace") { - t.Fatalf("expected binary MIME whitespace validation error, got %v", err) - } -} - -func TestStreamMessageValidateRejectsInvalidBinaryMimeType(t *testing.T) { - msg := StreamMessage{ - Binary: &BinaryFrame{ - Payload: []byte{0x01}, - MimeType: "not a mime type", - }, - } - - err := msg.Validate() - if !errors.Is(err, ErrInvalidStreamMessage) { - t.Fatalf("expected ErrInvalidStreamMessage, got %v", err) - } - if !strings.Contains(err.Error(), "mime type") || !strings.Contains(err.Error(), "invalid") { - t.Fatalf("expected invalid MIME validation error, got %v", err) - } -} - -func TestStreamMessageValidateRejectsReservedEnvelopeMimeWithoutEnvelopeFields(t *testing.T) { - msg := StreamMessage{ - Binary: &BinaryFrame{ - Payload: []byte("raw"), - MimeType: envelope.MIMEType, - }, - } - - err := msg.Validate() - if !errors.Is(err, ErrInvalidStreamMessage) { - t.Fatalf("expected ErrInvalidStreamMessage, got %v", err) - } - if !strings.Contains(err.Error(), "reserved for envelope payloads") { - t.Fatalf("expected reserved envelope MIME validation error, got %v", err) - } -} - -func TestStreamMessageValidateRejectsReservedEnvelopeMimePayloadMismatch(t *testing.T) { - msg := StreamMessage{ - Kind: "telemetry", - Binary: &BinaryFrame{ - Payload: []byte("raw"), - MimeType: envelope.MIMEType, - }, - } - - err := msg.Validate() - if !errors.Is(err, ErrInvalidStreamMessage) { - t.Fatalf("expected ErrInvalidStreamMessage, got %v", err) - } - if !strings.Contains(err.Error(), "mirror the structured payload") { - t.Fatalf("expected reserved envelope mirror validation error, got %v", err) - } -} - -func TestStreamMessageValidateRejectsReservedEnvelopeMimeWithParametersWithoutEnvelopeFields(t *testing.T) { - msg := StreamMessage{ - Binary: &BinaryFrame{ - Payload: []byte(`{"ok":true}`), - MimeType: envelope.MIMEType + "; charset=utf-8", - }, - } - - err := msg.Validate() - if !errors.Is(err, ErrInvalidStreamMessage) { - t.Fatalf("expected ErrInvalidStreamMessage, got %v", err) - } - if !strings.Contains(err.Error(), "reserved for envelope payloads") { - t.Fatalf("expected reserved envelope MIME validation error, got %v", err) - } -} - -func TestStreamMessageValidateRejectsReservedEnvelopeMimeCaseInsensitivePayloadMismatch(t *testing.T) { - msg := StreamMessage{ - Kind: "telemetry", - Payload: []byte(`{"ok":true}`), - Binary: &BinaryFrame{ - Payload: []byte(`{"ok":false}`), - MimeType: strings.ToUpper(envelope.MIMEType), - }, - } - - err := msg.Validate() - if !errors.Is(err, ErrInvalidStreamMessage) { - t.Fatalf("expected ErrInvalidStreamMessage, got %v", err) - } - if !strings.Contains(err.Error(), "mirror the structured payload") { - t.Fatalf("expected reserved envelope mirror validation error, got %v", err) - } -} - -func TestStreamMessageValidateAllowsReservedEnvelopeMimeWithParametersPayloadMirror(t *testing.T) { - msg := StreamMessage{ - Kind: "telemetry", - Payload: []byte(`{"ok":true}`), - Binary: &BinaryFrame{ - Payload: []byte(`{"ok":true}`), - MimeType: envelope.MIMEType + "; charset=utf-8", - }, - } - - if err := msg.Validate(); err != nil { - t.Fatalf("expected mirrored reserved envelope payload with parameters to remain valid, got %v", err) - } -} - -func TestStreamMessageValidateAllowsReservedEnvelopeMimePayloadMirror(t *testing.T) { - msg := StreamMessage{ - Kind: "telemetry", - Payload: []byte(`{"ok":true}`), - Binary: &BinaryFrame{ - Payload: []byte(`{"ok":true}`), - MimeType: envelope.MIMEType, - }, - } - - if err := msg.Validate(); err != nil { - t.Fatalf("expected mirrored reserved envelope payload to remain valid, got %v", err) - } -} - func TestResult(t *testing.T) { tests := []struct { name string @@ -1051,13 +223,6 @@ func TestResult(t *testing.T) { } } -func TestNewResultFrom(t *testing.T) { - payload := map[string]any{"message": "ok"} - got := NewResultFrom(payload) - assert.NotNil(t, got) - assert.Equal(t, payload, got.Data) -} - // Test that interfaces can be satisfied (compile-time check) type testBatchEngram struct{} @@ -1075,7 +240,7 @@ func (t *testStreamingEngram) Init(ctx context.Context, config string, secrets * return nil } -func (t *testStreamingEngram) Stream(ctx context.Context, in <-chan InboundMessage, out chan<- StreamMessage) error { +func (t *testStreamingEngram) Stream(ctx context.Context, in <-chan StreamMessage, out chan<- StreamMessage) error { return nil } diff --git a/engram/control.go b/engram/control.go deleted file mode 100644 index 83dad45..0000000 --- a/engram/control.go +++ /dev/null @@ -1,20 +0,0 @@ -package engram - -import "context" - -// ControlDirective represents a control-plane instruction flowing over the transport connector. -// Typical directive types include "start", "stop", or "codec-select". -type ControlDirective struct { - // Type identifies the control instruction, such as "start", "stop", or "codec-select". - Type string - // Metadata carries optional directive-specific key/value hints from the connector. - Metadata map[string]string -} - -// ControlDirectiveHandler can be implemented by StreamingEngrams that want to react to -// transport control directives emitted by connectors. -type ControlDirectiveHandler interface { - // HandleControlDirective processes an inbound directive. Returning a non-nil directive - // sends a response back to the connector. Implementations may return nil to skip replies. - HandleControlDirective(ctx context.Context, directive ControlDirective) (*ControlDirective, error) -} diff --git a/engram/idempotency.go b/engram/idempotency.go deleted file mode 100644 index 97350c0..0000000 --- a/engram/idempotency.go +++ /dev/null @@ -1,48 +0,0 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package engram - -import ( - "errors" - "fmt" - "strings" -) - -// ErrIdempotencyUnavailable indicates that the execution context lacks stable identifiers. -var ErrIdempotencyUnavailable = errors.New("idempotency unavailable: missing storyrun or steprun identity") - -// IdempotencyKey derives a stable key from the StoryRun and StepRun identity. -func IdempotencyKey(info StoryInfo) (string, error) { - storyRun := strings.TrimSpace(info.StoryRunID) - stepRun := strings.TrimSpace(info.StepRunID) - if storyRun == "" || stepRun == "" { - return "", ErrIdempotencyUnavailable - } - namespace := strings.TrimSpace(info.StepRunNamespace) - if namespace == "" { - return fmt.Sprintf("storyrun/%s/steprun/%s", storyRun, stepRun), nil - } - return fmt.Sprintf("ns/%s/storyrun/%s/steprun/%s", namespace, storyRun, stepRun), nil -} - -// IdempotencyKey returns a stable key for the current execution context. -func (e *ExecutionContext) IdempotencyKey() (string, error) { - if e == nil { - return "", ErrIdempotencyUnavailable - } - return IdempotencyKey(e.storyInfo) -} diff --git a/engram/idempotency_test.go b/engram/idempotency_test.go deleted file mode 100644 index 866b1ed..0000000 --- a/engram/idempotency_test.go +++ /dev/null @@ -1,33 +0,0 @@ -package engram - -import "testing" - -func TestIdempotencyKey(t *testing.T) { - info := StoryInfo{ - StoryRunID: "story-1", - StepRunID: "step-1", - StepRunNamespace: "default", - } - got, err := IdempotencyKey(info) - if err != nil { - t.Fatalf("IdempotencyKey() error = %v", err) - } - if got != "ns/default/storyrun/story-1/steprun/step-1" { - t.Fatalf("IdempotencyKey() = %q", got) - } - - info.StepRunNamespace = "" - got, err = IdempotencyKey(info) - if err != nil { - t.Fatalf("IdempotencyKey() without namespace error = %v", err) - } - if got != "storyrun/story-1/steprun/step-1" { - t.Fatalf("IdempotencyKey() without namespace = %q", got) - } -} - -func TestIdempotencyKey_MissingIdentity(t *testing.T) { - if _, err := IdempotencyKey(StoryInfo{}); err == nil { - t.Fatal("expected error for missing identity") - } -} diff --git a/engram/media_frame.go b/engram/media_frame.go deleted file mode 100644 index bcec249..0000000 --- a/engram/media_frame.go +++ /dev/null @@ -1,43 +0,0 @@ -package engram - -import "time" - -// AudioFrame represents PCM audio delivered through the streaming SDK. -type AudioFrame struct { - // PCM contains raw little-endian PCM audio bytes for the frame. - PCM []byte - // SampleRateHz is the sampling rate in hertz (for example, 16000). - SampleRateHz int32 - // Channels is the number of audio channels in PCM (for example, 1 for mono). - Channels int32 - // Codec optionally names the codec when the frame is encoded instead of raw PCM. - Codec string - // Timestamp is the media timeline position for this frame. - Timestamp time.Duration -} - -// VideoFrame represents encoded or raw video delivered through the streaming SDK. -type VideoFrame struct { - // Payload contains encoded bytes or raw pixel data for the frame. - Payload []byte - // Codec identifies the encoded video format (for example, "h264"), if applicable. - Codec string - // Width is the frame width in pixels. - Width uint32 - // Height is the frame height in pixels. - Height uint32 - // Timestamp is the media timeline position for this frame. - Timestamp time.Duration - // Raw reports whether Payload carries raw video pixels instead of encoded bytes. - Raw bool -} - -// BinaryFrame represents generic binary payloads exchanged over the streaming SDK. -type BinaryFrame struct { - // Payload carries opaque binary bytes. - Payload []byte - // MimeType identifies the payload media type (for example, "application/octet-stream"). - MimeType string - // Timestamp is an optional media timeline position for this frame. - Timestamp time.Duration -} diff --git a/engram/stream_kinds.go b/engram/stream_kinds.go deleted file mode 100644 index 63b15cb..0000000 --- a/engram/stream_kinds.go +++ /dev/null @@ -1,13 +0,0 @@ -package engram - -// StreamMessageKindData marks a StreamMessage as a normal application data packet. -const StreamMessageKindData = "data" - -// StreamMessageKindHeartbeat marks a StreamMessage as a transport liveness heartbeat. -const StreamMessageKindHeartbeat = "heartbeat" - -// StreamMessageKindNoop marks a StreamMessage as an intentionally empty no-op packet. -const StreamMessageKindNoop = "noop" - -// StreamMessageKindError marks a StreamMessage payload as a StructuredError envelope. -const StreamMessageKindError = "error" diff --git a/engram/transport.go b/engram/transport.go deleted file mode 100644 index 7783575..0000000 --- a/engram/transport.go +++ /dev/null @@ -1,93 +0,0 @@ -package engram - -import "reflect" - -// TransportDescriptor describes a named transport binding declared on the Story. -// Config carries arbitrary transport-specific settings (e.g. livekit/storage blocks). -type TransportDescriptor struct { - // Name is the transport binding name referenced by the Story. - Name string `json:"name"` - // Kind identifies the transport driver kind, such as "livekit" or "storage". - Kind string `json:"kind"` - // Mode selects the runtime behavior for the transport binding. - Mode string `json:"mode,omitempty"` - // Config carries arbitrary transport-specific settings. - Config map[string]any `json:"config,omitempty"` -} - -// Clone returns a deep copy of the descriptor to avoid callers mutating shared state. -func (t TransportDescriptor) Clone() TransportDescriptor { - clone := t - if t.Config != nil { - clone.Config = cloneConfigMap(t.Config) - } - return clone -} - -func cloneConfigMap(src map[string]any) map[string]any { - if src == nil { - return nil - } - out := make(map[string]any, len(src)) - for k, v := range src { - out[k] = cloneConfigValue(v) - } - return out -} - -func cloneConfigValue(value any) any { - if value == nil { - return nil - } - return cloneConfigReflectValue(reflect.ValueOf(value)).Interface() -} - -func cloneConfigReflectValue(value reflect.Value) reflect.Value { - if !value.IsValid() { - return value - } - switch value.Kind() { - case reflect.Interface: - if value.IsNil() { - return reflect.Zero(value.Type()) - } - cloned := cloneConfigReflectValue(value.Elem()) - out := reflect.New(value.Type()).Elem() - out.Set(cloned) - return out - case reflect.Pointer: - if value.IsNil() { - return reflect.Zero(value.Type()) - } - out := reflect.New(value.Type().Elem()) - out.Elem().Set(cloneConfigReflectValue(value.Elem())) - return out - case reflect.Map: - if value.IsNil() { - return reflect.Zero(value.Type()) - } - out := reflect.MakeMapWithSize(value.Type(), value.Len()) - iter := value.MapRange() - for iter.Next() { - out.SetMapIndex(iter.Key(), cloneConfigReflectValue(iter.Value())) - } - return out - case reflect.Slice: - if value.IsNil() { - return reflect.Zero(value.Type()) - } - out := reflect.MakeSlice(value.Type(), value.Len(), value.Len()) - for i := 0; i < value.Len(); i++ { - out.Index(i).Set(cloneConfigReflectValue(value.Index(i))) - } - return out - case reflect.Array: - out := reflect.New(value.Type()).Elem() - for i := 0; i < value.Len(); i++ { - out.Index(i).Set(cloneConfigReflectValue(value.Index(i))) - } - return out - default: - return value - } -} diff --git a/engram/transport_test.go b/engram/transport_test.go deleted file mode 100644 index e21b854..0000000 --- a/engram/transport_test.go +++ /dev/null @@ -1,57 +0,0 @@ -package engram - -import "testing" - -func TestTransportDescriptorCloneDeepCopiesTypedConfigCollections(t *testing.T) { - original := TransportDescriptor{ - Name: "rt", - Kind: "livekit", - Mode: "hot", - Config: map[string]any{ - "labels": map[string]string{ - "room": "alpha", - }, - "routes": []string{"primary", "backup"}, - "nested": []map[string]string{ - {"role": "writer"}, - }, - }, - } - - cloned := original.Clone() - - original.Config["labels"].(map[string]string)["room"] = "beta" - original.Config["routes"].([]string)[0] = "mutated" //nolint:goconst - original.Config["nested"].([]map[string]string)[0]["role"] = "reader" - - labels := cloned.Config["labels"].(map[string]string) - if labels["room"] != "alpha" { - t.Fatalf("expected typed map value to be cloned, got %q", labels["room"]) - } - routes := cloned.Config["routes"].([]string) - if routes[0] != "primary" { - t.Fatalf("expected typed slice value to be cloned, got %q", routes[0]) - } - nested := cloned.Config["nested"].([]map[string]string) - if nested[0]["role"] != "writer" { - t.Fatalf("expected nested typed collections to be cloned, got %q", nested[0]["role"]) - } -} - -func TestTransportDescriptorCloneKeepsOriginalTypeShapes(t *testing.T) { - original := TransportDescriptor{ - Config: map[string]any{ - "labels": map[string]string{"region": "us-east-1"}, - "routes": []string{"primary"}, - }, - } - - cloned := original.Clone() - - if _, ok := cloned.Config["labels"].(map[string]string); !ok { - t.Fatalf("expected typed map to preserve its concrete type, got %T", cloned.Config["labels"]) - } - if _, ok := cloned.Config["routes"].([]string); !ok { - t.Fatalf("expected typed slice to preserve its concrete type, got %T", cloned.Config["routes"]) - } -} diff --git a/env_resolver.go b/env_resolver.go deleted file mode 100644 index e8204c3..0000000 --- a/env_resolver.go +++ /dev/null @@ -1,39 +0,0 @@ -package sdk - -import ( - "os" - "strings" -) - -// envResolver merges binding-scoped overrides with process environment lookups. -type envResolver struct { - overrides map[string]string -} - -func newEnvResolver(overrides map[string]string) envResolver { - if len(overrides) == 0 { - return envResolver{} - } - clean := make(map[string]string, len(overrides)) - for key, value := range overrides { - name := strings.TrimSpace(key) - if name == "" { - continue - } - clean[name] = strings.TrimSpace(value) - } - return envResolver{overrides: clean} -} - -func (r envResolver) lookup(key string) string { - if r.overrides != nil { - if value, ok := r.overrides[key]; ok && strings.TrimSpace(value) != "" { - return value - } - } - return os.Getenv(key) -} - -func (r envResolver) Lookup(key string) string { - return r.lookup(key) -} diff --git a/errors.go b/errors.go deleted file mode 100644 index cf6f6ad..0000000 --- a/errors.go +++ /dev/null @@ -1,80 +0,0 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package sdk - -import ( - "errors" - "fmt" - "time" -) - -// ErrBatchTimeout is a sentinel used with errors.Is to detect batch timeouts. -var ErrBatchTimeout = errors.New("bubu batch execution timed out") - -// ErrStoryRunNotFound indicates that a requested StoryRun could not be located. -var ErrStoryRunNotFound = errors.New("storyrun not found") - -// ErrImpulseSessionExists indicates that a dispatcher session key is already active. -var ErrImpulseSessionExists = errors.New("impulse session already active") - -// ErrImpulseSessionNotFound indicates that a dispatcher session key has no active session. -var ErrImpulseSessionNotFound = errors.New("impulse session not found") - -// BatchTimeoutError conveys that a batch engram exceeded its configured timeout. -type BatchTimeoutError struct { - // Timeout is the configured duration limit that was exceeded. - Timeout time.Duration - // Cause is the underlying timeout-related error, when one is available. - Cause error -} - -// Error implements the error interface. -func (e *BatchTimeoutError) Error() string { - if e == nil { - return ErrBatchTimeout.Error() - } - if e.Cause != nil { - return fmt.Sprintf("batch execution timed out after %s: %v", e.Timeout, e.Cause) - } - return fmt.Sprintf("batch execution timed out after %s", e.Timeout) -} - -// Unwrap exposes the underlying cause for errors.Unwrap / errors.Is checks. -func (e *BatchTimeoutError) Unwrap() error { - if e == nil { - return nil - } - return e.Cause -} - -// Is allows errors.Is(err, ErrBatchTimeout) to match *BatchTimeoutError values. -func (e *BatchTimeoutError) Is(target error) bool { - return target == ErrBatchTimeout -} - -// BatchExitCode returns the recommended container exit code for an error. -// Timeout errors map to 124 (GNU timeout), all other non-nil errors default to 1. -func BatchExitCode(err error) int { - switch { - case err == nil: - return 0 - case errors.Is(err, ErrBatchTimeout): - return 124 - default: - return 1 - } -} diff --git a/go.mod b/go.mod index 0705843..b77d302 100644 --- a/go.mod +++ b/go.mod @@ -1,58 +1,44 @@ module github.com/bubustack/bubu-sdk-go -go 1.26.2 +go 1.24.5 require ( - github.com/bubustack/bobrapet v0.1.6 - github.com/bubustack/core v0.1.3 - github.com/bubustack/tractatus v0.1.2 + github.com/aws/aws-sdk-go-v2 v1.39.3 + github.com/aws/aws-sdk-go-v2/config v1.31.13 + github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.19.13 + github.com/aws/aws-sdk-go-v2/service/s3 v1.88.5 + github.com/bubustack/bobrapet v0.1.2 + github.com/bubustack/bobravoz-grpc v0.1.0 github.com/google/uuid v1.6.0 github.com/mitchellh/mapstructure v1.5.0 github.com/stretchr/testify v1.11.1 - github.com/xeipuuv/gojsonschema v1.2.0 - go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 - go.opentelemetry.io/otel v1.43.0 - go.opentelemetry.io/otel/metric v1.43.0 - go.opentelemetry.io/otel/trace v1.43.0 - golang.org/x/sync v0.20.0 - golang.org/x/time v0.9.0 - google.golang.org/grpc v1.80.0 - google.golang.org/protobuf v1.36.11 - k8s.io/api v0.35.3 - k8s.io/apimachinery v0.35.3 - k8s.io/client-go v0.35.3 - k8s.io/utils v0.0.0-20260319190234-28399d86e0b5 - sigs.k8s.io/controller-runtime v0.23.3 + go.opentelemetry.io/otel v1.38.0 + go.opentelemetry.io/otel/metric v1.38.0 + go.opentelemetry.io/otel/trace v1.38.0 + golang.org/x/sync v0.17.0 + google.golang.org/grpc v1.76.0 + google.golang.org/protobuf v1.36.10 + k8s.io/apimachinery v0.34.1 + k8s.io/client-go v0.34.1 + sigs.k8s.io/controller-runtime v0.22.3 ) require ( - dario.cat/mergo v1.0.1 // indirect - github.com/Masterminds/goutils v1.1.1 // indirect - github.com/Masterminds/semver/v3 v3.4.0 // indirect - github.com/Masterminds/sprig/v3 v3.3.0 // indirect - github.com/aws/aws-sdk-go-v2 v1.41.5 // indirect - github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.8 // indirect - github.com/aws/aws-sdk-go-v2/config v1.32.14 // indirect - github.com/aws/aws-sdk-go-v2/credentials v1.19.14 // indirect - github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.21 // indirect - github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.22.12 // indirect - github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.21 // indirect - github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.21 // indirect - github.com/aws/aws-sdk-go-v2/internal/ini v1.8.6 // indirect - github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.22 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.7 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.13 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.21 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.21 // indirect - github.com/aws/aws-sdk-go-v2/service/s3 v1.98.0 // indirect - github.com/aws/aws-sdk-go-v2/service/signin v1.0.9 // indirect - github.com/aws/aws-sdk-go-v2/service/sso v1.30.15 // indirect - github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.19 // indirect - github.com/aws/aws-sdk-go-v2/service/sts v1.41.10 // indirect - github.com/aws/smithy-go v1.24.2 // indirect - github.com/beorn7/perks v1.0.1 // indirect - github.com/cenkalti/backoff/v5 v5.0.3 // indirect - github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.2 // indirect + github.com/aws/aws-sdk-go-v2/credentials v1.18.17 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.10 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.10 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.10 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.10 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.2 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.1 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.10 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.10 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.29.7 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.2 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.38.7 // indirect + github.com/aws/smithy-go v1.23.1 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/emicklei/go-restful/v3 v3.12.2 // indirect github.com/evanphx/json-patch/v5 v5.9.11 // indirect @@ -62,53 +48,40 @@ require ( github.com/go-openapi/jsonpointer v0.21.0 // indirect github.com/go-openapi/jsonreference v0.20.2 // indirect github.com/go-openapi/swag v0.23.0 // indirect + github.com/gogo/protobuf v1.3.2 // indirect github.com/google/gnostic-models v0.7.0 // indirect - github.com/google/go-cmp v0.7.0 // indirect - github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect - github.com/huandu/xstrings v1.5.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/mailru/easyjson v0.7.7 // indirect - github.com/mitchellh/copystructure v1.2.0 // indirect - github.com/mitchellh/reflectwalk v1.0.2 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/prometheus/client_golang v1.23.2 // indirect - github.com/prometheus/client_model v0.6.2 // indirect - github.com/prometheus/common v0.66.1 // indirect - github.com/prometheus/procfs v0.16.1 // indirect - github.com/shopspring/decimal v1.4.0 // indirect - github.com/spf13/cast v1.7.0 // indirect - github.com/spf13/pflag v1.0.9 // indirect + github.com/spf13/pflag v1.0.6 // indirect github.com/stretchr/objx v0.5.2 // indirect github.com/x448/float16 v0.8.4 // indirect - github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f // indirect - github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect - go.opentelemetry.io/auto/sdk v1.2.1 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0 // indirect - go.opentelemetry.io/otel/sdk v1.43.0 // indirect - go.opentelemetry.io/proto/otlp v1.10.0 // indirect - go.yaml.in/yaml/v2 v2.4.3 // indirect + go.opentelemetry.io/auto/sdk v1.1.0 // indirect + go.opentelemetry.io/otel/sdk/metric v1.38.0 // indirect + go.yaml.in/yaml/v2 v2.4.2 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect - golang.org/x/crypto v0.49.0 // indirect - golang.org/x/net v0.52.0 // indirect - golang.org/x/oauth2 v0.35.0 // indirect - golang.org/x/sys v0.42.0 // indirect - golang.org/x/term v0.41.0 // indirect - golang.org/x/text v0.35.0 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 // indirect - gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect + golang.org/x/net v0.44.0 // indirect + golang.org/x/oauth2 v0.30.0 // indirect + golang.org/x/sys v0.36.0 // indirect + golang.org/x/term v0.35.0 // indirect + golang.org/x/text v0.29.0 // indirect + golang.org/x/time v0.9.0 // indirect + golang.org/x/tools v0.37.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 // indirect + gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/apiextensions-apiserver v0.35.3 // indirect + k8s.io/api v0.34.1 // indirect k8s.io/klog/v2 v2.130.1 // indirect - k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect - sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect + k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b // indirect + k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect + sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect sigs.k8s.io/randfill v1.0.0 // indirect - sigs.k8s.io/structured-merge-diff/v6 v6.3.2-0.20260122202528-d9cc6641c482 // indirect + sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect sigs.k8s.io/yaml v1.6.0 // indirect ) diff --git a/go.sum b/go.sum index e761ad8..cd5fefa 100644 --- a/go.sum +++ b/go.sum @@ -1,61 +1,49 @@ -dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s= -dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= -github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI= -github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= -github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs= -github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0= -github.com/aws/aws-sdk-go-v2 v1.41.5 h1:dj5kopbwUsVUVFgO4Fi5BIT3t4WyqIDjGKCangnV/yY= -github.com/aws/aws-sdk-go-v2 v1.41.5/go.mod h1:mwsPRE8ceUUpiTgF7QmQIJ7lgsKUPQOUl3o72QBrE1o= -github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.8 h1:eBMB84YGghSocM7PsjmmPffTa+1FBUeNvGvFou6V/4o= -github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.8/go.mod h1:lyw7GFp3qENLh7kwzf7iMzAxDn+NzjXEAGjKS2UOKqI= -github.com/aws/aws-sdk-go-v2/config v1.32.14 h1:opVIRo/ZbbI8OIqSOKmpFaY7IwfFUOCCXBsUpJOwDdI= -github.com/aws/aws-sdk-go-v2/config v1.32.14/go.mod h1:U4/V0uKxh0Tl5sxmCBZ3AecYny4UNlVmObYjKuuaiOo= -github.com/aws/aws-sdk-go-v2/credentials v1.19.14 h1:n+UcGWAIZHkXzYt87uMFBv/l8THYELoX6gVcUvgl6fI= -github.com/aws/aws-sdk-go-v2/credentials v1.19.14/go.mod h1:cJKuyWB59Mqi0jM3nFYQRmnHVQIcgoxjEMAbLkpr62w= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.21 h1:NUS3K4BTDArQqNu2ih7yeDLaS3bmHD0YndtA6UP884g= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.21/go.mod h1:YWNWJQNjKigKY1RHVJCuupeWDrrHjRqHm0N9rdrWzYI= -github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.22.12 h1:vhbHvVM9Til68SOR3Dds7zi51PaUlzexmh4Lf/uv+Ok= -github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.22.12/go.mod h1:jq4soyz7xX5bfkxVKQu1BwkopF2QbQUTs5n7iIg3D8Q= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.21 h1:Rgg6wvjjtX8bNHcvi9OnXWwcE0a2vGpbwmtICOsvcf4= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.21/go.mod h1:A/kJFst/nm//cyqonihbdpQZwiUhhzpqTsdbhDdRF9c= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.21 h1:PEgGVtPoB6NTpPrBgqSE5hE/o47Ij9qk/SEZFbUOe9A= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.21/go.mod h1:p+hz+PRAYlY3zcpJhPwXlLC4C+kqn70WIHwnzAfs6ps= -github.com/aws/aws-sdk-go-v2/internal/ini v1.8.6 h1:qYQ4pzQ2Oz6WpQ8T3HvGHnZydA72MnLuFK9tJwmrbHw= -github.com/aws/aws-sdk-go-v2/internal/ini v1.8.6/go.mod h1:O3h0IK87yXci+kg6flUKzJnWeziQUKciKrLjcatSNcY= -github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.22 h1:rWyie/PxDRIdhNf4DzRk0lvjVOqFJuNnO8WwaIRVxzQ= -github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.22/go.mod h1:zd/JsJ4P7oGfUhXn1VyLqaRZwPmZwg44Jf2dS84Dm3Y= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.7 h1:5EniKhLZe4xzL7a+fU3C2tfUN4nWIqlLesfrjkuPFTY= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.7/go.mod h1:x0nZssQ3qZSnIcePWLvcoFisRXJzcTVvYpAAdYX8+GI= -github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.13 h1:JRaIgADQS/U6uXDqlPiefP32yXTda7Kqfx+LgspooZM= -github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.13/go.mod h1:CEuVn5WqOMilYl+tbccq8+N2ieCy0gVn3OtRb0vBNNM= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.21 h1:c31//R3xgIJMSC8S6hEVq+38DcvUlgFY0FM6mSI5oto= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.21/go.mod h1:r6+pf23ouCB718FUxaqzZdbpYFyDtehyZcmP5KL9FkA= -github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.21 h1:ZlvrNcHSFFWURB8avufQq9gFsheUgjVD9536obIknfM= -github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.21/go.mod h1:cv3TNhVrssKR0O/xxLJVRfd2oazSnZnkUeTf6ctUwfQ= -github.com/aws/aws-sdk-go-v2/service/s3 v1.98.0 h1:foqo/ocQ7WqKwy3FojGtZQJo0FR4vto9qnz9VaumbCo= -github.com/aws/aws-sdk-go-v2/service/s3 v1.98.0/go.mod h1:uoA43SdFwacedBfSgfFSjjCvYe8aYBS7EnU5GZ/YKMM= -github.com/aws/aws-sdk-go-v2/service/signin v1.0.9 h1:QKZH0S178gCmFEgst8hN0mCX1KxLgHBKKY/CLqwP8lg= -github.com/aws/aws-sdk-go-v2/service/signin v1.0.9/go.mod h1:7yuQJoT+OoH8aqIxw9vwF+8KpvLZ8AWmvmUWHsGQZvI= -github.com/aws/aws-sdk-go-v2/service/sso v1.30.15 h1:lFd1+ZSEYJZYvv9d6kXzhkZu07si3f+GQ1AaYwa2LUM= -github.com/aws/aws-sdk-go-v2/service/sso v1.30.15/go.mod h1:WSvS1NLr7JaPunCXqpJnWk1Bjo7IxzZXrZi1QQCkuqM= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.19 h1:dzztQ1YmfPrxdrOiuZRMF6fuOwWlWpD2StNLTceKpys= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.19/go.mod h1:YO8TrYtFdl5w/4vmjL8zaBSsiNp3w0L1FfKVKenZT7w= -github.com/aws/aws-sdk-go-v2/service/sts v1.41.10 h1:p8ogvvLugcR/zLBXTXrTkj0RYBUdErbMnAFFp12Lm/U= -github.com/aws/aws-sdk-go-v2/service/sts v1.41.10/go.mod h1:60dv0eZJfeVXfbT1tFJinbHrDfSJ2GZl4Q//OSSNAVw= -github.com/aws/smithy-go v1.24.2 h1:FzA3bu/nt/vDvmnkg+R8Xl46gmzEDam6mZ1hzmwXFng= -github.com/aws/smithy-go v1.24.2/go.mod h1:YE2RhdIuDbA5E5bTdciG9KrW3+TiEONeUWCqxX9i1Fc= +github.com/aws/aws-sdk-go-v2 v1.39.3 h1:h7xSsanJ4EQJXG5iuW4UqgP7qBopLpj84mpkNx3wPjM= +github.com/aws/aws-sdk-go-v2 v1.39.3/go.mod h1:yWSxrnioGUZ4WVv9TgMrNUeLV3PFESn/v+6T/Su8gnM= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.2 h1:t9yYsydLYNBk9cJ73rgPhPWqOh/52fcWDQB5b1JsKSY= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.2/go.mod h1:IusfVNTmiSN3t4rhxWFaBAqn+mcNdwKtPcV16eYdgko= +github.com/aws/aws-sdk-go-v2/config v1.31.13 h1:wcqQB3B0PgRPUF5ZE/QL1JVOyB0mbPevHFoAMpemR9k= +github.com/aws/aws-sdk-go-v2/config v1.31.13/go.mod h1:ySB5D5ybwqGbT6c3GszZ+u+3KvrlYCUQNo62+hkKOFk= +github.com/aws/aws-sdk-go-v2/credentials v1.18.17 h1:skpEwzN/+H8cdrrtT8y+rvWJGiWWv0DeNAe+4VTf+Vs= +github.com/aws/aws-sdk-go-v2/credentials v1.18.17/go.mod h1:Ed+nXsaYa5uBINovJhcAWkALvXw2ZLk36opcuiSZfJM= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.10 h1:UuGVOX48oP4vgQ36oiKmW9RuSeT8jlgQgBFQD+HUiHY= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.10/go.mod h1:vM/Ini41PzvudT4YkQyE/+WiQJiQ6jzeDyU8pQKwCac= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.19.13 h1:9XV2TkOvCs6Fis10b4scQbv/eDPhklhU/65GikPxXAA= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.19.13/go.mod h1:X5gq64GsjuOIJRIUzR3x3Du96zUF+U1if3Qw/qNx1k8= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.10 h1:mj/bdWleWEh81DtpdHKkw41IrS+r3uw1J/VQtbwYYp8= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.10/go.mod h1:7+oEMxAZWP8gZCyjcm9VicI0M61Sx4DJtcGfKYv2yKQ= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.10 h1:wh+/mn57yhUrFtLIxyFPh2RgxgQz/u+Yrf7hiHGHqKY= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.10/go.mod h1:7zirD+ryp5gitJJ2m1BBux56ai8RIRDykXZrJSp540w= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 h1:WKuaxf++XKWlHWu9ECbMlha8WOEGm0OUEZqm4K/Gcfk= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4/go.mod h1:ZWy7j6v1vWGmPReu0iSGvRiise4YI5SkR3OHKTZ6Wuc= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.10 h1:FHw90xCTsofzk6vjU808TSuDtDfOOKPNdz5Weyc3tUI= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.10/go.mod h1:n8jdIE/8F3UYkg8O4IGkQpn2qUmapg/1K1yl29/uf/c= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.2 h1:xtuxji5CS0JknaXoACOunXOYOQzgfTvGAc9s2QdCJA4= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.2/go.mod h1:zxwi0DIR0rcRcgdbl7E2MSOvxDyyXGBlScvBkARFaLQ= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.1 h1:ne+eepnDB2Wh5lHKzELgEncIqeVlQ1rSF9fEa4r5I+A= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.1/go.mod h1:u0Jkg0L+dcG1ozUq21uFElmpbmjBnhHR5DELHIme4wg= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.10 h1:DRND0dkCKtJzCj4Xl4OpVbXZgfttY5q712H9Zj7qc/0= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.10/go.mod h1:tGGNmJKOTernmR2+VJ0fCzQRurcPZj9ut60Zu5Fi6us= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.10 h1:DA+Hl5adieRyFvE7pCvBWm3VOZTRexGVkXw33SUqNoY= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.10/go.mod h1:L+A89dH3/gr8L4ecrdzuXUYd1znoko6myzndVGZx/DA= +github.com/aws/aws-sdk-go-v2/service/s3 v1.88.5 h1:FlGScxzCGNzT+2AvHT1ZGMvxTwAMa6gsooFb1pO/AiM= +github.com/aws/aws-sdk-go-v2/service/s3 v1.88.5/go.mod h1:N/iojY+8bW3MYol9NUMuKimpSbPEur75cuI1SmtonFM= +github.com/aws/aws-sdk-go-v2/service/sso v1.29.7 h1:fspVFg6qMx0svs40YgRmE7LZXh9VRZvTT35PfdQR6FM= +github.com/aws/aws-sdk-go-v2/service/sso v1.29.7/go.mod h1:BQTKL3uMECaLaUV3Zc2L4Qybv8C6BIXjuu1dOPyxTQs= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.2 h1:scVnW+NLXasGOhy7HhkdT9AGb6kjgW7fJ5xYkUaqHs0= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.2/go.mod h1:FRNCY3zTEWZXBKm2h5UBUPvCVDOecTad9KhynDyGBc0= +github.com/aws/aws-sdk-go-v2/service/sts v1.38.7 h1:VEO5dqFkMsl8QZ2yHsFDJAIZLAkEbaYDB+xdKi0Feic= +github.com/aws/aws-sdk-go-v2/service/sts v1.38.7/go.mod h1:L1xxV3zAdB+qVrVW/pBIrIAnHFWHo6FBbFe4xOGsG/o= +github.com/aws/smithy-go v1.23.1 h1:sLvcH6dfAFwGkHLZ7dGiYF7aK6mg4CgKA/iDKjLDt9M= +github.com/aws/smithy-go v1.23.1/go.mod h1:LEj2LM3rBRQJxPZTB4KuzZkaZYnZPnvgIhb4pu07mx0= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= -github.com/bubustack/bobrapet v0.1.6 h1:kf7A1GsQvpIBgWE8FIoYMke/nMpXgBAzw6+G2tB8HYM= -github.com/bubustack/bobrapet v0.1.6/go.mod h1:2eZ3mnhnvdO5Y1vkCrEDUfg0V9Menv76pdNIlfSEL/M= -github.com/bubustack/core v0.1.3 h1:rFyj8EyC0agZZOOw9nGcirdNGqL5ArJUfEFPAAtdpb4= -github.com/bubustack/core v0.1.3/go.mod h1:UlEBsFdlyVdGVZVb9yfBoVM33DyxYQv3n921G1ll7Ng= -github.com/bubustack/tractatus v0.1.2 h1:PtcEisKiWGelflXB4NGtSl1O9G6BUXZ8xKNx4m2hMRQ= -github.com/bubustack/tractatus v0.1.2/go.mod h1:ku8Grbskvqov4CRHasQNJVsf1Ie/FZz9ak3Yap+vX8I= -github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= -github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= +github.com/bubustack/bobrapet v0.1.2 h1:53rtFcNWPy/aQABwWZ7lOm+1EkY3re3a9usmFUpIzXc= +github.com/bubustack/bobrapet v0.1.2/go.mod h1:FVPQn48l8q5tZTK1kDfsbv6ew9lS6thkky7a2+Z5GYI= +github.com/bubustack/bobravoz-grpc v0.1.0 h1:mEXWh3jzJnkdE5+6jzylkXOmy2X/ZEn13oY30RpdTqk= +github.com/bubustack/bobravoz-grpc v0.1.0/go.mod h1:GlqTtlu5auq29bMcE14OJFwGoAnNdUAnpDHDOHi0nBs= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= @@ -66,10 +54,6 @@ github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU= github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM= -github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= -github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= -github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= -github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= @@ -89,6 +73,8 @@ github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+Gr github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo= @@ -96,18 +82,16 @@ github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7O github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20260115054156-294ebfa9ad83 h1:z2ogiKUYzX5Is6zr/vP9vJGqPwcdqsWjOt+V8J7+bTc= -github.com/google/pprof v0.0.0-20260115054156-294ebfa9ad83/go.mod h1:MxpfABSjhmINe3F1It9d+8exIHFvUqtLIRCdOGNXqiI= +github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8= +github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 h1:HWRh5R2+9EifMyIHV7ZV+MIZqgz+PMpZ14Jynv3O2Zs= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l85lZYYOm7LV6om4ceouMv45c= -github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI= -github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= @@ -117,12 +101,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= -github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw= -github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= -github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ= -github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -131,10 +111,12 @@ github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFd github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= -github.com/onsi/ginkgo/v2 v2.28.1 h1:S4hj+HbZp40fNKuLUQOYLDgZLwNUVn19N3Atb98NCyI= -github.com/onsi/ginkgo/v2 v2.28.1/go.mod h1:CLtbVInNckU3/+gC8LzkGUb9oF+e8W8TdUsxPwvdOgE= -github.com/onsi/gomega v1.39.1 h1:1IJLAad4zjPn2PsnhH70V4DKRFlrCzGBNrNaru+Vf28= -github.com/onsi/gomega v1.39.1/go.mod h1:hL6yVALoTOxeWudERyfppUcZXjMwIMLnuSfruD2lcfg= +github.com/onsi/ginkgo/v2 v2.26.0 h1:1J4Wut1IlYZNEAWIV3ALrT9NfiaGW2cDCJQSFQMs/gE= +github.com/onsi/ginkgo/v2 v2.26.0/go.mod h1:qhEywmzWTBUY88kfO0BRvX4py7scov9yR+Az2oavUzw= +github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A= +github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= @@ -145,14 +127,10 @@ github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9Z github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA= github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= -github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= -github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= -github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k= -github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME= -github.com/spf13/cast v1.7.0 h1:ntdiHjuueXFgm5nzDRdOS4yfT43P5Fnud6DH50rz/7w= -github.com/spf13/cast v1.7.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= -github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY= -github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= +github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= +github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= @@ -166,105 +144,112 @@ github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= -github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f h1:J9EGpcZtP0E/raorCMxlFGSTBrsSlaDGf3jU/qvAE2c= -github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= -github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0= -github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= -github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17UxZ74= -github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y= -go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= -go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 h1:YH4g8lQroajqUwWbq/tr2QX1JFmEXaDLgG+ew9bLMWo= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0/go.mod h1:fvPi2qXDqFs8M4B4fmJhE92TyQs9Ydjlg3RvfUp+NbQ= -go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I= -go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 h1:88Y4s2C8oTui1LGM6bTWkw0ICGcOLCAI5l6zsD1j20k= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0/go.mod h1:Vl1/iaggsuRlrHf/hfPJPvVag77kKyvrLeD10kpMl+A= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0 h1:RAE+JPfvEmvy+0LzyUA25/SGawPwIUbZ6u0Wug54sLc= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0/go.mod h1:AGmbycVGEsRx9mXMZ75CsOyhSP6MFIcj/6dnG+vhVjk= -go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM= -go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY= -go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg= -go.opentelemetry.io/otel/sdk v1.43.0/go.mod h1:P+IkVU3iWukmiit/Yf9AWvpyRDlUeBaRg6Y+C58QHzg= -go.opentelemetry.io/otel/sdk/metric v1.43.0 h1:S88dyqXjJkuBNLeMcVPRFXpRw2fuwdvfCGLEo89fDkw= -go.opentelemetry.io/otel/sdk/metric v1.43.0/go.mod h1:C/RJtwSEJ5hzTiUz5pXF1kILHStzb9zFlIEe85bhj6A= -go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A= -go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0= -go.opentelemetry.io/proto/otlp v1.10.0 h1:IQRWgT5srOCYfiWnpqUYz9CVmbO8bFmKcwYxpuCSL2g= -go.opentelemetry.io/proto/otlp v1.10.0/go.mod h1:/CV4QoCR/S9yaPj8utp3lvQPoqMtxXdzn7ozvvozVqk= -go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= -go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= +go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= +go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= +go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM= +go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA= +go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI= +go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E= +go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg= +go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM= +go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA= +go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE= +go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs= +go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= +go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= -go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= -go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8= +go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= +go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= -golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4= -golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA= -golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8= -golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w= -golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0= -golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw= -golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ= -golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= -golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= -golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= -golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= -golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= -golang.org/x/term v0.41.0 h1:QCgPso/Q3RTJx2Th4bDLqML4W6iJiaXFq2/ftQF13YU= -golang.org/x/term v0.41.0/go.mod h1:3pfBgksrReYfZ5lvYM0kSO0LIkAl4Yl2bXOkKP7Ec2A= -golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8= -golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.28.0 h1:gQBtGhjxykdjY9YhZpSlZIsbnaE2+PgjfLWUQTnoZ1U= +golang.org/x/mod v0.28.0/go.mod h1:yfB/L0NOf/kmEbXjzCPOx1iK1fRutOydrCMsqRhEBxI= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.44.0 h1:evd8IRDyfNBMBTTY5XRF1vaZlD+EmWx6x8PkhR04H/I= +golang.org/x/net v0.44.0/go.mod h1:ECOoLqd5U3Lhyeyo/QDCEVQ4sNgYsqvCZ722XogGieY= +golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= +golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= +golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= +golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ= +golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= +golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY= golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= -golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k= -golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0= -gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= -gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= -gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4= -gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E= -google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 h1:VPWxll4HlMw1Vs/qXtN7BvhZqsS9cdAittCNvVENElA= -google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:7QBABkRtR8z+TEnmXTqIqwJLlzrZKVfAUm7tY3yGv0M= -google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 h1:m8qni9SQFH0tJc1X0vmnpw/0t+AImlSvp30sEupozUg= -google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8= -google.golang.org/grpc v1.80.0 h1:Xr6m2WmWZLETvUNvIUmeD5OAagMw3FiKmMlTdViWsHM= -google.golang.org/grpc v1.80.0/go.mod h1:ho/dLnxwi3EDJA4Zghp7k2Ec1+c2jqup0bFkw07bwF4= -google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= -google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE= +golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= +gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 h1:eaY8u2EuxbRv7c3NiGK0/NedzVsCcV6hDuU5qPX5EGE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5/go.mod h1:M4/wBTSeyLxupu3W3tJtOgB14jILAS/XWPSSa3TAlJc= +google.golang.org/grpc v1.76.0 h1:UnVkv1+uMLYXoIz6o7chp59WfQUYA2ex/BXQ9rHZu7A= +google.golang.org/grpc v1.76.0/go.mod h1:Ju12QI8M6iQJtbcsV+awF5a4hfJMLi4X0JLo94ULZ6c= +google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE= +google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnfEbYzo= -gopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= +gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -k8s.io/api v0.35.3 h1:pA2fiBc6+N9PDf7SAiluKGEBuScsTzd2uYBkA5RzNWQ= -k8s.io/api v0.35.3/go.mod h1:9Y9tkBcFwKNq2sxwZTQh1Njh9qHl81D0As56tu42GA4= -k8s.io/apiextensions-apiserver v0.35.3 h1:2fQUhEO7P17sijylbdwt0nBdXP0TvHrHj0KeqHD8FiU= -k8s.io/apiextensions-apiserver v0.35.3/go.mod h1:tK4Kz58ykRpwAEkXUb634HD1ZAegEElktz/B3jgETd8= -k8s.io/apimachinery v0.35.3 h1:MeaUwQCV3tjKP4bcwWGgZ/cp/vpsRnQzqO6J6tJyoF8= -k8s.io/apimachinery v0.35.3/go.mod h1:jQCgFZFR1F4Ik7hvr2g84RTJSZegBc8yHgFWKn//hns= -k8s.io/client-go v0.35.3 h1:s1lZbpN4uI6IxeTM2cpdtrwHcSOBML1ODNTCCfsP1pg= -k8s.io/client-go v0.35.3/go.mod h1:RzoXkc0mzpWIDvBrRnD+VlfXP+lRzqQjCmKtiwZ8Q9c= +k8s.io/api v0.34.1 h1:jC+153630BMdlFukegoEL8E/yT7aLyQkIVuwhmwDgJM= +k8s.io/api v0.34.1/go.mod h1:SB80FxFtXn5/gwzCoN6QCtPD7Vbu5w2n1S0J5gFfTYk= +k8s.io/apiextensions-apiserver v0.34.1 h1:NNPBva8FNAPt1iSVwIE0FsdrVriRXMsaWFMqJbII2CI= +k8s.io/apiextensions-apiserver v0.34.1/go.mod h1:hP9Rld3zF5Ay2Of3BeEpLAToP+l4s5UlxiHfqRaRcMc= +k8s.io/apimachinery v0.34.1 h1:dTlxFls/eikpJxmAC7MVE8oOeP1zryV7iRyIjB0gky4= +k8s.io/apimachinery v0.34.1/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw= +k8s.io/client-go v0.34.1 h1:ZUPJKgXsnKwVwmKKdPfw4tB58+7/Ik3CrjOEhsiZ7mY= +k8s.io/client-go v0.34.1/go.mod h1:kA8v0FP+tk6sZA0yKLRG67LWjqufAoSHA2xVGKw9Of8= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= -k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 h1:Y3gxNAuB0OBLImH611+UDZcmKS3g6CthxToOb37KgwE= -k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912/go.mod h1:kdmbQkyfwUagLfXIad1y2TdrjPFWp2Q89B3qkRwf/pQ= -k8s.io/utils v0.0.0-20260319190234-28399d86e0b5 h1:kBawHLSnx/mYHmRnNUf9d4CpjREbeZuxoSGOX/J+aYM= -k8s.io/utils v0.0.0-20260319190234-28399d86e0b5/go.mod h1:xDxuJ0whA3d0I4mf/C4ppKHxXynQ+fxnkmQH0vTHnuk= -sigs.k8s.io/controller-runtime v0.23.3 h1:VjB/vhoPoA9l1kEKZHBMnQF33tdCLQKJtydy4iqwZ80= -sigs.k8s.io/controller-runtime v0.23.3/go.mod h1:B6COOxKptp+YaUT5q4l6LqUJTRpizbgf9KSRNdQGns0= -sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg= -sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= +k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b h1:MloQ9/bdJyIu9lb1PzujOPolHyvO06MXG5TUIj2mNAA= +k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b/go.mod h1:UZ2yyWbFTpuhSbFhv24aGNOdoRdJZgsIObGBUaYVsts= +k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y= +k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/controller-runtime v0.22.3 h1:I7mfqz/a/WdmDCEnXmSPm8/b/yRTy6JsKKENTijTq8Y= +sigs.k8s.io/controller-runtime v0.22.3/go.mod h1:+QX1XUpTXN4mLoblf4tqr5CQcyHPAki2HLXqQMY6vh8= +sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= +sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= -sigs.k8s.io/structured-merge-diff/v6 v6.3.2-0.20260122202528-d9cc6641c482 h1:2WOzJpHUBVrrkDjU4KBT8n5LDcj824eX0I5UKcgeRUs= -sigs.k8s.io/structured-merge-diff/v6 v6.3.2-0.20260122202528-d9cc6641c482/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE= +sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco= +sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE= sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4= diff --git a/impulse.go b/impulse.go index e05f48f..88e2f32 100644 --- a/impulse.go +++ b/impulse.go @@ -1,26 +1,11 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - package sdk import ( "context" + "encoding/json" "fmt" + "os" - "github.com/bubustack/bobrapet/pkg/storage" "github.com/bubustack/bubu-sdk-go/engram" "github.com/bubustack/bubu-sdk-go/k8s" "github.com/bubustack/bubu-sdk-go/runtime" @@ -30,11 +15,12 @@ import ( // // This function infers config type C from the impulse implementation, providing compile-time // type safety. It orchestrates the complete lifecycle: -// 1. Load execution context from environment (BUBU_STEP_CONFIG, BUBU_TRIGGER_DATA, etc.) -// 2. Unmarshal config into type C -// 3. Call impulse.Init with typed config and secrets -// 4. Create pre-configured Kubernetes client with namespace resolution -// 5. Call impulse.Run with client, transferring control to long-running process +// 1. Load execution context from environment (BUBU_CONFIG, BUBU_IMPULSE_WITH, etc.) +// 2. Merge BUBU_IMPULSE_WITH JSON into config if provided (for operator injection) +// 3. Unmarshal config into type C +// 4. Call impulse.Init with typed config and secrets +// 5. Create pre-configured Kubernetes client with namespace resolution +// 6. Call impulse.Run with client, transferring control to long-running process // // The impulse's Run method should block until work completes or context is canceled. // Typical use cases: webhook listeners, message queue consumers, schedulers, event watchers. @@ -68,38 +54,32 @@ import ( // } // } func RunImpulse[C any](ctx context.Context, i engram.Impulse[C]) error { - ctx, _ = withDefaultLogger(ctx) - defer publishCapturedLogs(ctx) - logger := LoggerFromContext(ctx) - logger.Info("Initializing Bubu SDK for Impulse execution") + LoggerFromContext(ctx).Info("Initializing Bubu SDK for Impulse execution") execCtxData, err := runtime.LoadExecutionContextData() if err != nil { return fmt.Errorf("failed to load execution context: %w", err) } - logExecutionContextDebug(logger, execCtxData) - sm, err := storage.SharedManager(ctx) - if err != nil { - return fmt.Errorf("failed to create storage manager: %w", err) - } - configMap, err := hydrateConfig(ctx, sm, execCtxData.Config, execCtxData.CELContext) - if err != nil { - return fmt.Errorf("failed to hydrate config: %w", err) + // If provided, merge BUBU_IMPULSE_WITH JSON into config before unmarshaling. + if withStr := os.Getenv("BUBU_IMPULSE_WITH"); withStr != "" { + var withMap map[string]any + if err := json.Unmarshal([]byte(withStr), &withMap); err != nil { + return fmt.Errorf("failed to unmarshal BUBU_IMPULSE_WITH: %w", err) + } + for k, v := range withMap { + execCtxData.Config[k] = v + } } + // Unmarshal config. - config, err := runtime.UnmarshalFromMap[C](configMap) + config, err := runtime.UnmarshalFromMap[C](execCtxData.Config) if err != nil { return fmt.Errorf("failed to unmarshal config: %w", err) } - secrets, err := engram.NewSecretsWithError(ctx, execCtxData.Secrets) - if err != nil { - return fmt.Errorf("failed to expand secrets: %w", err) - } + secrets := engram.NewSecrets(execCtxData.Secrets) - if err := callWithPanicRecoveryNoValue("impulse Init", func() error { - return i.Init(ctx, config, secrets) - }); err != nil { + if err := i.Init(ctx, config, secrets); err != nil { return fmt.Errorf("impulse initialization failed: %w", err) } @@ -108,11 +88,6 @@ func RunImpulse[C any](ctx context.Context, i engram.Impulse[C]) error { return fmt.Errorf("failed to create k8s client: %w", err) } - logger.Info("Starting Impulse") - if err := callWithPanicRecoveryNoValue("impulse Run", func() error { - return i.Run(ctx, k8sClient) - }); err != nil { - return fmt.Errorf("impulse execution failed: %w", err) - } - return nil + LoggerFromContext(ctx).Info("Starting Impulse") + return i.Run(ctx, k8sClient) } diff --git a/integration/envtest/env.go b/integration/envtest/env.go deleted file mode 100644 index 126dfac..0000000 --- a/integration/envtest/env.go +++ /dev/null @@ -1,151 +0,0 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package envtest - -import ( - "os" - "path/filepath" - "testing" - "time" - - bubuk8s "github.com/bubustack/bubu-sdk-go/k8s" - "github.com/bubustack/core/contracts" - "github.com/stretchr/testify/require" - "k8s.io/apimachinery/pkg/runtime" - clientgoscheme "k8s.io/client-go/kubernetes/scheme" - "sigs.k8s.io/controller-runtime/pkg/client" - ctrlenvtest "sigs.k8s.io/controller-runtime/pkg/envtest" -) - -type sdkEnvtestHarness struct { //nolint:unused - apiClient client.Client - sdkClient *bubuk8s.Client - scheme *runtime.Scheme - namespace string -} - -const ( - triggerResolverPollIntervalEnv = "BUBU_ENVTEST_TRIGGER_RESOLVER_POLL_INTERVAL" //nolint:unused - triggerResolverRequestTimeoutEnv = "BUBU_ENVTEST_TRIGGER_RESOLVER_REQUEST_TIMEOUT" //nolint:unused - triggerResolverStopTimeoutEnv = "BUBU_ENVTEST_TRIGGER_RESOLVER_STOP_TIMEOUT" //nolint:unused -) - -func triggerResolverPollInterval() time.Duration { //nolint:unused - return parsePositiveDurationEnv(triggerResolverPollIntervalEnv, 20*time.Millisecond) -} - -func triggerResolverRequestTimeout() time.Duration { //nolint:unused - return parsePositiveDurationEnv(triggerResolverRequestTimeoutEnv, 500*time.Millisecond) -} - -func triggerResolverStopTimeout() time.Duration { //nolint:unused - return parsePositiveDurationEnv(triggerResolverStopTimeoutEnv, 2*time.Second) -} - -func parsePositiveDurationEnv(key string, fallback time.Duration) time.Duration { - raw := os.Getenv(key) - if raw == "" { - return fallback - } - parsed, err := time.ParseDuration(raw) - if err != nil || parsed <= 0 { - return fallback - } - return parsed -} - -func waitForSignal(done <-chan struct{}, timeout time.Duration) bool { - if timeout <= 0 { - timeout = time.Millisecond - } - timer := time.NewTimer(timeout) - defer timer.Stop() - - select { - case <-done: - return true - case <-timer.C: - return false - } -} - -func resolveCRDPath(t *testing.T) string { //nolint:unused - t.Helper() - - if override := os.Getenv("BOBRAPET_CRD_PATH"); override != "" { - if info, err := os.Stat(override); err == nil && info.IsDir() { - return override - } - t.Fatalf("BOBRAPET_CRD_PATH=%q does not exist or is not a directory", override) - } - - candidates := []string{ - filepath.Join("..", "..", "bobrapet", "config", "crd", "bases"), - filepath.Join("..", "..", "..", "bobrapet", "config", "crd", "bases"), - } - for _, candidate := range candidates { - if info, err := os.Stat(candidate); err == nil && info.IsDir() { - return candidate - } - } - - t.Skip("bobrapet CRDs not found; set BOBRAPET_CRD_PATH or run tests within the bobrapet+bubu-sdk-go workspace") - return "" -} - -func setupSDKEnvtest(t *testing.T, addToSchemes ...func(*runtime.Scheme) error) *sdkEnvtestHarness { //nolint:unused - t.Helper() - - if testing.Short() { - t.Skip("skipping integration envtest in short mode") - } - if os.Getenv("KUBEBUILDER_ASSETS") == "" { - t.Skip("KUBEBUILDER_ASSETS not set; skipping envtest smoke test") - } - - testEnv := &ctrlenvtest.Environment{ - CRDDirectoryPaths: []string{resolveCRDPath(t)}, - } - - cfg, err := testEnv.Start() - require.NoError(t, err) - t.Cleanup(func() { - require.NoError(t, testEnv.Stop()) - }) - - scheme := runtime.NewScheme() - require.NoError(t, clientgoscheme.AddToScheme(scheme)) - for _, addToScheme := range addToSchemes { - require.NoError(t, addToScheme(scheme)) - } - - apiClient, err := client.New(cfg, client.Options{Scheme: scheme}) - require.NoError(t, err) - - const namespace = "default" - t.Setenv(contracts.TargetStoryNamespaceEnv, namespace) - - sdkClient, err := bubuk8s.NewClientForConfig(cfg) - require.NoError(t, err) - - return &sdkEnvtestHarness{ - apiClient: apiClient, - sdkClient: sdkClient, - scheme: scheme, - namespace: namespace, - } -} diff --git a/integration/envtest/impulse_stats_smoke_test.go b/integration/envtest/impulse_stats_smoke_test.go deleted file mode 100644 index 871191c..0000000 --- a/integration/envtest/impulse_stats_smoke_test.go +++ /dev/null @@ -1,124 +0,0 @@ -//go:build integration - -package envtest - -import ( - "context" - "testing" - "time" - - bubuv1alpha1 "github.com/bubustack/bobrapet/api/v1alpha1" - "github.com/bubustack/bobrapet/pkg/refs" - bubuk8s "github.com/bubustack/bubu-sdk-go/k8s" - "github.com/stretchr/testify/require" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -func newImpulseForStatsTest(name, namespace string) *bubuv1alpha1.Impulse { - return &bubuv1alpha1.Impulse{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - }, - Spec: bubuv1alpha1.ImpulseSpec{ - TemplateRef: refs.ImpulseTemplateReference{ - Name: "impulse-template", - }, - StoryRef: refs.StoryReference{ - ObjectReference: refs.ObjectReference{Name: "story-for-stats"}, - }, - }, - } -} - -func TestUpdateImpulseTriggerStats_AppliesDeltaLive(t *testing.T) { - h := setupSDKEnvtest(t, bubuv1alpha1.AddToScheme) - apiClient := h.apiClient - sdkClient := h.sdkClient - namespace := h.namespace - impulse := newImpulseForStatsTest("impulse-stats-live", namespace) - require.NoError(t, apiClient.Create(context.Background(), impulse)) - - lastTrigger := time.Date(2026, time.March, 27, 10, 0, 0, 0, time.FixedZone("UTC+4", 4*60*60)) - lastSuccess := lastTrigger.Add(30 * time.Second) - lastThrottled := lastTrigger.Add(45 * time.Second) - - require.NoError(t, sdkClient.UpdateImpulseTriggerStats(context.Background(), impulse.Name, namespace, bubuk8s.TriggerStatsDelta{ - TriggersReceived: 3, - StoriesLaunched: 2, - FailedTriggers: 1, - ThrottledTriggers: 1, - LastTrigger: lastTrigger, - LastSuccess: &lastSuccess, - LastThrottled: &lastThrottled, - })) - - updated := &bubuv1alpha1.Impulse{} - require.NoError(t, apiClient.Get( - context.Background(), - client.ObjectKey{Name: impulse.Name, Namespace: namespace}, - updated, - )) - - require.Equal(t, int64(3), updated.Status.TriggersReceived) - require.Equal(t, int64(2), updated.Status.StoriesLaunched) - require.Equal(t, int64(1), updated.Status.FailedTriggers) - require.Equal(t, int64(1), updated.Status.ThrottledTriggers) - require.NotNil(t, updated.Status.LastTrigger) - require.NotNil(t, updated.Status.LastSuccess) - require.NotNil(t, updated.Status.LastThrottled) - require.Equal(t, updated.Generation, updated.Status.ObservedGeneration) - require.True(t, updated.Status.LastTrigger.Time.Equal(lastTrigger.UTC())) - require.True(t, updated.Status.LastSuccess.Time.Equal(lastSuccess.UTC())) - require.True(t, updated.Status.LastThrottled.Time.Equal(lastThrottled.UTC())) -} - -func TestUpdateImpulseTriggerStats_PreservesUnsetTimestampsLive(t *testing.T) { - h := setupSDKEnvtest(t, bubuv1alpha1.AddToScheme) - apiClient := h.apiClient - sdkClient := h.sdkClient - namespace := h.namespace - impulse := newImpulseForStatsTest("impulse-stats-preserve-live", namespace) - require.NoError(t, apiClient.Create(context.Background(), impulse)) - - baseTrigger := metav1.NewTime(time.Date(2026, time.March, 27, 9, 0, 0, 0, time.UTC)) - baseSuccess := metav1.NewTime(time.Date(2026, time.March, 27, 9, 1, 0, 0, time.UTC)) - baseThrottled := metav1.NewTime(time.Date(2026, time.March, 27, 9, 2, 0, 0, time.UTC)) - - statusObj := impulse.DeepCopy() - statusObj.Status = bubuv1alpha1.ImpulseStatus{ - TriggersReceived: 10, - StoriesLaunched: 6, - FailedTriggers: 2, - ThrottledTriggers: 4, - LastTrigger: &baseTrigger, - LastSuccess: &baseSuccess, - LastThrottled: &baseThrottled, - } - require.NoError(t, apiClient.Status().Update(context.Background(), statusObj)) - - nextTrigger := time.Date(2026, time.March, 27, 10, 0, 0, 0, time.UTC) - require.NoError(t, sdkClient.UpdateImpulseTriggerStats(context.Background(), impulse.Name, namespace, bubuk8s.TriggerStatsDelta{ - TriggersReceived: 1, - LastTrigger: nextTrigger, - })) - - updated := &bubuv1alpha1.Impulse{} - require.NoError(t, apiClient.Get( - context.Background(), - client.ObjectKey{Name: impulse.Name, Namespace: namespace}, - updated, - )) - - require.Equal(t, int64(11), updated.Status.TriggersReceived) - require.Equal(t, int64(6), updated.Status.StoriesLaunched) - require.Equal(t, int64(2), updated.Status.FailedTriggers) - require.Equal(t, int64(4), updated.Status.ThrottledTriggers) - require.NotNil(t, updated.Status.LastTrigger) - require.NotNil(t, updated.Status.LastSuccess) - require.NotNil(t, updated.Status.LastThrottled) - require.True(t, updated.Status.LastTrigger.Time.Equal(nextTrigger.UTC())) - require.True(t, updated.Status.LastSuccess.Time.Equal(baseSuccess.Time)) - require.True(t, updated.Status.LastThrottled.Time.Equal(baseThrottled.Time)) -} diff --git a/integration/envtest/steprun_status_smoke_test.go b/integration/envtest/steprun_status_smoke_test.go deleted file mode 100644 index a2148f6..0000000 --- a/integration/envtest/steprun_status_smoke_test.go +++ /dev/null @@ -1,152 +0,0 @@ -//go:build integration - -package envtest - -import ( - "context" - "testing" - "time" - - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - "github.com/bubustack/bobrapet/pkg/enums" - "github.com/bubustack/bobrapet/pkg/refs" - "github.com/stretchr/testify/require" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -func TestPatchStepRunStatus_DedupesEffectsByKeyWhenSeqZero(t *testing.T) { - h := setupSDKEnvtest(t, runsv1alpha1.AddToScheme) - apiClient := h.apiClient - sdkClient := h.sdkClient - namespace := h.namespace - - stepRun := &runsv1alpha1.StepRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: "effect-dedupe-live", - Namespace: namespace, - }, - Spec: runsv1alpha1.StepRunSpec{ - StoryRunRef: refs.StoryRunReference{ - ObjectReference: refs.ObjectReference{Name: "storyrun-effect-dedupe"}, - }, - StepID: "step-1", - }, - } - require.NoError(t, apiClient.Create(context.Background(), stepRun)) - - now := metav1.NewTime(time.Now().UTC()) - firstPatch := runsv1alpha1.StepRunStatus{ - Effects: []runsv1alpha1.EffectRecord{{ - Seq: 0, - Key: "effect-1", - Status: "succeeded", - EmittedAt: &now, - Details: &runtime.RawExtension{Raw: []byte(`{"source":"first"}`)}, - }}, - } - require.NoError(t, sdkClient.PatchStepRunStatus(context.Background(), stepRun.Name, firstPatch)) - - secondPatch := runsv1alpha1.StepRunStatus{ - Effects: []runsv1alpha1.EffectRecord{{ - Seq: 0, - Key: "effect-1", - Status: "succeeded", - EmittedAt: &now, - Details: &runtime.RawExtension{Raw: []byte(`{"source":"second"}`)}, - }}, - } - require.NoError(t, sdkClient.PatchStepRunStatus(context.Background(), stepRun.Name, secondPatch)) - - updated := &runsv1alpha1.StepRun{} - require.NoError(t, apiClient.Get( - context.Background(), - client.ObjectKey{Name: stepRun.Name, Namespace: namespace}, - updated, - )) - require.Len(t, updated.Status.Effects, 1) - require.Equal(t, "effect-1", updated.Status.Effects[0].Key) - require.Equal(t, "succeeded", updated.Status.Effects[0].Status) - require.NotNil(t, updated.Status.Effects[0].Details) - require.JSONEq(t, `{"source":"first"}`, string(updated.Status.Effects[0].Details.Raw)) -} - -func TestPatchStepRunStatus_PreservesNeedsWhenIncomingOmitsThemLive(t *testing.T) { - h := setupSDKEnvtest(t, runsv1alpha1.AddToScheme) - apiClient := h.apiClient - sdkClient := h.sdkClient - namespace := h.namespace - - stepRun := &runsv1alpha1.StepRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: "step-needs-preserve-live", - Namespace: namespace, - }, - Spec: runsv1alpha1.StepRunSpec{ - StoryRunRef: refs.StoryRunReference{ - ObjectReference: refs.ObjectReference{Name: "storyrun-needs-preserve"}, - }, - StepID: "step-1", - }, - } - require.NoError(t, apiClient.Create(context.Background(), stepRun)) - - withStatus := stepRun.DeepCopy() - withStatus.Status.Phase = enums.PhasePending - withStatus.Status.Needs = []string{"prior-step"} - require.NoError(t, apiClient.Status().Update(context.Background(), withStatus)) - - require.NoError(t, sdkClient.PatchStepRunStatus(context.Background(), stepRun.Name, runsv1alpha1.StepRunStatus{ - Phase: enums.PhaseRunning, - })) - - updated := &runsv1alpha1.StepRun{} - require.NoError(t, apiClient.Get( - context.Background(), - client.ObjectKey{Name: stepRun.Name, Namespace: namespace}, - updated, - )) - require.Equal(t, enums.PhaseRunning, updated.Status.Phase) - require.Equal(t, []string{"prior-step"}, updated.Status.Needs) -} - -func TestPatchStepRunStatus_ExplicitEmptyNeedsClearsLive(t *testing.T) { - h := setupSDKEnvtest(t, runsv1alpha1.AddToScheme) - apiClient := h.apiClient - sdkClient := h.sdkClient - namespace := h.namespace - - stepRun := &runsv1alpha1.StepRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: "step-needs-clear-live", - Namespace: namespace, - }, - Spec: runsv1alpha1.StepRunSpec{ - StoryRunRef: refs.StoryRunReference{ - ObjectReference: refs.ObjectReference{Name: "storyrun-needs-clear"}, - }, - StepID: "step-1", - }, - } - require.NoError(t, apiClient.Create(context.Background(), stepRun)) - - withStatus := stepRun.DeepCopy() - withStatus.Status.Phase = enums.PhasePending - withStatus.Status.Needs = []string{"prior-step"} - require.NoError(t, apiClient.Status().Update(context.Background(), withStatus)) - - require.NoError(t, sdkClient.PatchStepRunStatus(context.Background(), stepRun.Name, runsv1alpha1.StepRunStatus{ - Phase: enums.PhaseRunning, - Needs: []string{}, - })) - - updated := &runsv1alpha1.StepRun{} - require.NoError(t, apiClient.Get( - context.Background(), - client.ObjectKey{Name: stepRun.Name, Namespace: namespace}, - updated, - )) - require.Equal(t, enums.PhaseRunning, updated.Status.Phase) - require.Empty(t, updated.Status.Needs) -} diff --git a/integration/envtest/story_smoke_test.go b/integration/envtest/story_smoke_test.go deleted file mode 100644 index f614c2c..0000000 --- a/integration/envtest/story_smoke_test.go +++ /dev/null @@ -1,468 +0,0 @@ -//go:build integration - -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ -package envtest - -import ( - "context" - "encoding/json" - "fmt" - "testing" - "time" - - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - bubuv1alpha1 "github.com/bubustack/bobrapet/api/v1alpha1" - "github.com/bubustack/bobrapet/pkg/enums" - "github.com/bubustack/bobrapet/pkg/refs" - runsidentity "github.com/bubustack/bobrapet/pkg/runs/identity" - "github.com/bubustack/bubu-sdk-go/k8s" - sdkerrors "github.com/bubustack/bubu-sdk-go/pkg/errors" - "github.com/bubustack/core/contracts" - - "github.com/stretchr/testify/require" - apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -type storyEnvtestHarness struct { - apiClient client.Client - sdkClient *k8s.Client - namespace string -} - -func setupStoryEnvtest(t *testing.T) *storyEnvtestHarness { - t.Helper() - - base := setupSDKEnvtest(t, runsv1alpha1.AddToScheme, bubuv1alpha1.AddToScheme) - - return &storyEnvtestHarness{ - apiClient: base.apiClient, - sdkClient: base.sdkClient, - namespace: base.namespace, - } -} - -func createTestStory(t *testing.T, apiClient client.Client, namespace, name string) *bubuv1alpha1.Story { - t.Helper() - - story := &bubuv1alpha1.Story{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - }, - Spec: bubuv1alpha1.StorySpec{ - Steps: []bubuv1alpha1.Step{{ - Name: "start", - Type: enums.StepTypeCondition, - }}, - }, - } - require.NoError(t, apiClient.Create(context.Background(), story)) - return story -} - -func startStoryTriggerResolver(t *testing.T, apiClient client.Client, namespace string) { - t.Helper() - - ctx, cancel := context.WithCancel(context.Background()) - done := make(chan struct{}) - pollInterval := triggerResolverPollInterval() - requestTimeout := triggerResolverRequestTimeout() - stopTimeout := triggerResolverStopTimeout() - - go func() { - defer close(done) - ticker := time.NewTicker(pollInterval) - defer ticker.Stop() - for { - select { - case <-ctx.Done(): - return - case <-ticker.C: - resolveCtx, resolveCancel := context.WithTimeout(ctx, requestTimeout) - resolvePendingStoryTriggers(resolveCtx, apiClient, namespace) - resolveCancel() - } - } - }() - - t.Cleanup(func() { - cancel() - if !waitForSignal(done, stopTimeout) { - t.Fatalf("story trigger resolver did not stop within %s", stopTimeout) - } - }) -} - -func resolvePendingStoryTriggers(ctx context.Context, apiClient client.Client, namespace string) { - list := &runsv1alpha1.StoryTriggerList{} - if err := apiClient.List(ctx, list, client.InNamespace(namespace)); err != nil { - return - } - for i := range list.Items { - trigger := list.Items[i].DeepCopy() - if trigger.Status.Decision != "" { - continue - } - _ = resolveStoryTrigger(ctx, apiClient, trigger) - } -} - -func resolveStoryTrigger(ctx context.Context, apiClient client.Client, trigger *runsv1alpha1.StoryTrigger) error { - if err := apiClient.Get(ctx, trigger.Spec.StoryRef.ToNamespacedName(trigger), &bubuv1alpha1.Story{}); err != nil { - if apierrors.IsNotFound(err) { - return updateStoryTriggerStatus(ctx, apiClient, trigger, runsv1alpha1.StoryTriggerDecisionRejected, "StoryNotFound", err.Error(), nil) - } - return err - } - - inputHash, err := runsidentity.ComputeTriggerInputHashFromRawExtension(trigger.Spec.Inputs) - if err != nil { - return updateStoryTriggerStatus(ctx, apiClient, trigger, runsv1alpha1.StoryTriggerDecisionRejected, "InputHashMismatch", err.Error(), nil) - } - if expected := trigger.Spec.DeliveryIdentity.InputHash; expected != "" && expected != inputHash { - return updateStoryTriggerStatus(ctx, apiClient, trigger, runsv1alpha1.StoryTriggerDecisionRejected, "InputHashMismatch", "spec.deliveryIdentity.inputHash does not match spec.inputs", nil) - } - - storyRun := desiredStoryRunForTrigger(trigger, inputHash) - decision := runsv1alpha1.StoryTriggerDecisionCreated - reason := "StoryRunCreated" - message := fmt.Sprintf("created StoryRun %s/%s", storyRun.Namespace, storyRun.Name) - - if err := apiClient.Create(ctx, storyRun); err != nil { - if !apierrors.IsAlreadyExists(err) { - return err - } - - existing := &runsv1alpha1.StoryRun{} - if getErr := apiClient.Get(ctx, client.ObjectKeyFromObject(storyRun), existing); getErr != nil { - return getErr - } - if !storyRunMatchesTrigger(existing, trigger, inputHash) { - return updateStoryTriggerStatus(ctx, apiClient, trigger, runsv1alpha1.StoryTriggerDecisionRejected, "StoryRunConflict", fmt.Sprintf("existing StoryRun %s/%s does not match StoryTrigger storyRef and inputs", existing.Namespace, existing.Name), nil) - } - if storyRunOriginatesFromTrigger(existing, trigger) { - decision = runsv1alpha1.StoryTriggerDecisionCreated - reason = "StoryRunCreated" - message = fmt.Sprintf("recovered previously created StoryRun %s/%s", existing.Namespace, existing.Name) - } else { - decision = runsv1alpha1.StoryTriggerDecisionReused - reason = "StoryRunReused" - message = fmt.Sprintf("reused existing StoryRun %s/%s", existing.Namespace, existing.Name) - } - storyRun = existing - } - - return updateStoryTriggerStatus(ctx, apiClient, trigger, decision, reason, message, storyRun) -} - -func desiredStoryRunForTrigger(trigger *runsv1alpha1.StoryTrigger, inputHash string) *runsv1alpha1.StoryRun { - identity := runsidentity.StoryTriggerIdentity(trigger.Spec.DeliveryIdentity.Key, trigger.Spec.DeliveryIdentity.SubmissionID) - storyNamespace := trigger.Spec.StoryRef.ToNamespacedName(trigger).Namespace - - annotations := map[string]string{ - runsidentity.StoryRunTriggerRequestNameAnnotation: trigger.Name, - runsidentity.StoryRunTriggerRequestUIDAnnotation: string(trigger.GetUID()), - } - if key := trigger.Spec.DeliveryIdentity.Key; key != "" { - annotations[runsidentity.StoryRunTriggerTokenAnnotation] = key - annotations[runsidentity.StoryRunTriggerInputHashAnnotation] = inputHash - } - - var impulseRef *refs.ImpulseReference - if trigger.Spec.ImpulseRef != nil { - impulseRef = trigger.Spec.ImpulseRef.DeepCopy() - } - - var inputs *runtime.RawExtension - if trigger.Spec.Inputs != nil { - inputs = trigger.Spec.Inputs.DeepCopy() - } - - return &runsv1alpha1.StoryRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: runsidentity.DeriveStoryRunName(storyNamespace, trigger.Spec.StoryRef.Name, identity), - Namespace: trigger.Namespace, - Annotations: annotations, - }, - Spec: runsv1alpha1.StoryRunSpec{ - StoryRef: *trigger.Spec.StoryRef.DeepCopy(), - ImpulseRef: impulseRef, - Inputs: inputs, - }, - } -} - -func storyRunMatchesTrigger(existing *runsv1alpha1.StoryRun, trigger *runsv1alpha1.StoryTrigger, triggerHash string) bool { - if existing == nil || trigger == nil { - return false - } - if existing.Spec.StoryRef.ToNamespacedName(existing) != trigger.Spec.StoryRef.ToNamespacedName(trigger) { - return false - } - if existing.Spec.StoryRef.Version != trigger.Spec.StoryRef.Version { - return false - } - existingHash, err := runsidentity.ComputeTriggerInputHashFromRawExtension(existing.Spec.Inputs) - if err != nil { - return false - } - return existingHash == triggerHash -} - -func storyRunOriginatesFromTrigger(existing *runsv1alpha1.StoryRun, trigger *runsv1alpha1.StoryTrigger) bool { - if existing == nil || trigger == nil { - return false - } - annotations := existing.GetAnnotations() - if len(annotations) == 0 { - return false - } - if annotations[runsidentity.StoryRunTriggerRequestNameAnnotation] != trigger.Name { - return false - } - uid := annotations[runsidentity.StoryRunTriggerRequestUIDAnnotation] - return uid != "" && uid == string(trigger.GetUID()) -} - -func updateStoryTriggerStatus( - ctx context.Context, - apiClient client.Client, - trigger *runsv1alpha1.StoryTrigger, - decision runsv1alpha1.StoryTriggerDecision, - reason string, - message string, - storyRun *runsv1alpha1.StoryRun, -) error { - current := &runsv1alpha1.StoryTrigger{} - if err := apiClient.Get(ctx, client.ObjectKeyFromObject(trigger), current); err != nil { - return err - } - now := metav1.Now() - current.Status.ObservedGeneration = current.Generation - if current.Status.AcceptedAt == nil { - current.Status.AcceptedAt = &now - } - current.Status.CompletedAt = &now - current.Status.Decision = decision - current.Status.Reason = reason - current.Status.Message = message - current.Status.StoryRunRef = storyRunRef(storyRun) - return apiClient.Status().Update(ctx, current) -} - -func storyRunRef(storyRun *runsv1alpha1.StoryRun) *refs.StoryRunReference { - if storyRun == nil { - return nil - } - ref := &refs.StoryRunReference{ - ObjectReference: refs.ObjectReference{Name: storyRun.Name}, - } - if storyRun.Namespace != "" { - namespace := storyRun.Namespace - ref.Namespace = &namespace - } - return ref -} - -func TestStorySubmissionSmoke(t *testing.T) { - h := setupStoryEnvtest(t) - story := createTestStory(t, h.apiClient, h.namespace, "smoke-story") - startStoryTriggerResolver(t, h.apiClient, h.namespace) - - run, err := h.sdkClient.TriggerStory(context.Background(), story.Name, h.namespace, map[string]any{"hello": "world"}) - require.NoError(t, err) - require.NotNil(t, run) - - fetched := &runsv1alpha1.StoryRun{} - require.NoError(t, h.apiClient.Get(context.Background(), client.ObjectKey{Name: run.Name, Namespace: h.namespace}, fetched)) - require.Equal(t, "smoke-story", fetched.Spec.StoryRef.Name) - require.NotNil(t, fetched.Spec.Inputs) - require.Contains(t, string(fetched.Spec.Inputs.Raw), "hello") - - triggers := &runsv1alpha1.StoryTriggerList{} - require.NoError(t, h.apiClient.List(context.Background(), triggers, client.InNamespace(h.namespace))) - require.Len(t, triggers.Items, 1) - require.Equal(t, runsv1alpha1.StoryTriggerDecisionCreated, triggers.Items[0].Status.Decision) - require.NotNil(t, triggers.Items[0].Status.StoryRunRef) - require.Equal(t, run.Name, triggers.Items[0].Status.StoryRunRef.Name) -} - -func TestStorySubmissionReusesStoryRunForSameTriggerToken(t *testing.T) { - h := setupStoryEnvtest(t) - story := createTestStory(t, h.apiClient, h.namespace, "smoke-story-dedupe") - startStoryTriggerResolver(t, h.apiClient, h.namespace) - - inputs := map[string]any{"hello": "world"} - token := "test-trigger-token" - triggerCtx := k8s.WithTriggerToken(context.Background(), token) - - first, err := h.sdkClient.TriggerStory(triggerCtx, story.Name, h.namespace, inputs) - require.NoError(t, err) - require.NotNil(t, first) - require.NotEmpty(t, first.Name) - - second, err := h.sdkClient.TriggerStory(triggerCtx, story.Name, h.namespace, inputs) - require.NoError(t, err) - require.NotNil(t, second) - require.Equal(t, first.Name, second.Name) - - list := &runsv1alpha1.StoryRunList{} - require.NoError(t, h.apiClient.List(context.Background(), list, client.InNamespace(h.namespace))) - require.Len(t, list.Items, 1) - require.Equal(t, first.Name, list.Items[0].Name) - - triggers := &runsv1alpha1.StoryTriggerList{} - require.NoError(t, h.apiClient.List(context.Background(), triggers, client.InNamespace(h.namespace))) - require.Len(t, triggers.Items, 1) - require.Equal(t, runsv1alpha1.StoryTriggerDecisionCreated, triggers.Items[0].Status.Decision) -} - -func TestStorySubmissionRejectsTokenReuseWithDifferentInputs(t *testing.T) { - h := setupStoryEnvtest(t) - story := createTestStory(t, h.apiClient, h.namespace, "smoke-story-token-mismatch") - startStoryTriggerResolver(t, h.apiClient, h.namespace) - - token := "test-trigger-token-mismatch" - triggerCtx := k8s.WithTriggerToken(context.Background(), token) - - first, err := h.sdkClient.TriggerStory(triggerCtx, story.Name, h.namespace, map[string]any{"hello": "world"}) - require.NoError(t, err) - require.NotNil(t, first) - - second, err := h.sdkClient.TriggerStory(triggerCtx, story.Name, h.namespace, map[string]any{"hello": "different"}) - require.Error(t, err) - require.Nil(t, second) - require.Contains(t, err.Error(), "different immutable request identity") - - list := &runsv1alpha1.StoryRunList{} - require.NoError(t, h.apiClient.List(context.Background(), list, client.InNamespace(h.namespace))) - require.Len(t, list.Items, 1) - require.Equal(t, first.Name, list.Items[0].Name) -} - -func TestStorySubmissionReusesPreexistingTokenStoryRun(t *testing.T) { - h := setupStoryEnvtest(t) - story := createTestStory(t, h.apiClient, h.namespace, "smoke-story-preexisting") - startStoryTriggerResolver(t, h.apiClient, h.namespace) - - token := "test-trigger-token-preexisting" - inputs := map[string]any{"hello": "world"} - inputBytes, err := json.Marshal(inputs) - require.NoError(t, err) - - preexistingName := runsidentity.DeriveStoryRunName(h.namespace, story.Name, token) - storyRefNamespace := h.namespace - preexisting := &runsv1alpha1.StoryRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: preexistingName, - Namespace: h.namespace, - }, - Spec: runsv1alpha1.StoryRunSpec{ - StoryRef: refs.StoryReference{ - ObjectReference: refs.ObjectReference{ - Name: story.Name, - Namespace: &storyRefNamespace, - }, - }, - Inputs: &runtime.RawExtension{Raw: inputBytes}, - }, - } - require.NoError(t, h.apiClient.Create(context.Background(), preexisting)) - - triggerCtx := k8s.WithTriggerToken(context.Background(), token) - got, err := h.sdkClient.TriggerStory(triggerCtx, story.Name, h.namespace, inputs) - require.NoError(t, err) - require.NotNil(t, got) - require.Equal(t, preexistingName, got.Name) - - list := &runsv1alpha1.StoryRunList{} - require.NoError(t, h.apiClient.List(context.Background(), list, client.InNamespace(h.namespace))) - require.Len(t, list.Items, 1) - require.Equal(t, preexistingName, list.Items[0].Name) - - triggers := &runsv1alpha1.StoryTriggerList{} - require.NoError(t, h.apiClient.List(context.Background(), triggers, client.InNamespace(h.namespace))) - require.Len(t, triggers.Items, 1) - require.Equal(t, runsv1alpha1.StoryTriggerDecisionReused, triggers.Items[0].Status.Decision) -} - -func TestStorySubmissionRejectsPreexistingTokenStoryRunWithDifferentInputs(t *testing.T) { - h := setupStoryEnvtest(t) - story := createTestStory(t, h.apiClient, h.namespace, "smoke-story-preexisting-mismatch") - startStoryTriggerResolver(t, h.apiClient, h.namespace) - - token := "test-trigger-token-preexisting-mismatch" - existingInputs := map[string]any{"hello": "world"} - existingInputBytes, err := json.Marshal(existingInputs) - require.NoError(t, err) - - preexistingName := runsidentity.DeriveStoryRunName(h.namespace, story.Name, token) - storyRefNamespace := h.namespace - preexisting := &runsv1alpha1.StoryRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: preexistingName, - Namespace: h.namespace, - }, - Spec: runsv1alpha1.StoryRunSpec{ - StoryRef: refs.StoryReference{ - ObjectReference: refs.ObjectReference{ - Name: story.Name, - Namespace: &storyRefNamespace, - }, - }, - Inputs: &runtime.RawExtension{Raw: existingInputBytes}, - }, - } - require.NoError(t, h.apiClient.Create(context.Background(), preexisting)) - - triggerCtx := k8s.WithTriggerToken(context.Background(), token) - got, err := h.sdkClient.TriggerStory(triggerCtx, story.Name, h.namespace, map[string]any{"hello": "different"}) - require.Error(t, err) - require.Nil(t, got) - require.Contains(t, err.Error(), "does not match StoryTrigger storyRef and inputs") - - list := &runsv1alpha1.StoryRunList{} - require.NoError(t, h.apiClient.List(context.Background(), list, client.InNamespace(h.namespace))) - require.Len(t, list.Items, 1) - require.Equal(t, preexistingName, list.Items[0].Name) - - triggers := &runsv1alpha1.StoryTriggerList{} - require.NoError(t, h.apiClient.List(context.Background(), triggers, client.InNamespace(h.namespace))) - require.Len(t, triggers.Items, 1) - require.Equal(t, runsv1alpha1.StoryTriggerDecisionRejected, triggers.Items[0].Status.Decision) -} - -func TestStorySubmissionPendingTimeoutReturnsRetryable(t *testing.T) { - t.Setenv(contracts.K8sOperationTimeoutEnv, "150ms") - - h := setupStoryEnvtest(t) - story := createTestStory(t, h.apiClient, h.namespace, "smoke-story-pending") - - got, err := h.sdkClient.TriggerStory(context.Background(), story.Name, h.namespace, map[string]any{"hello": "world"}) - require.Error(t, err) - require.Nil(t, got) - require.ErrorIs(t, err, sdkerrors.ErrRetryable) - require.Contains(t, err.Error(), "still pending") - - triggers := &runsv1alpha1.StoryTriggerList{} - require.NoError(t, h.apiClient.List(context.Background(), triggers, client.InNamespace(h.namespace))) - require.Len(t, triggers.Items, 1) - require.Empty(t, triggers.Items[0].Status.Decision) -} diff --git a/integration/envtest/storyrun_stop_smoke_test.go b/integration/envtest/storyrun_stop_smoke_test.go deleted file mode 100644 index 6a1882a..0000000 --- a/integration/envtest/storyrun_stop_smoke_test.go +++ /dev/null @@ -1,96 +0,0 @@ -//go:build integration - -package envtest - -import ( - "context" - "testing" - "time" - - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - "github.com/bubustack/bobrapet/pkg/enums" - "github.com/bubustack/bobrapet/pkg/refs" - "github.com/stretchr/testify/require" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -func TestStopStoryRun_RequestsGracefulCancelLive(t *testing.T) { - h := setupSDKEnvtest(t, runsv1alpha1.AddToScheme) - apiClient := h.apiClient - sdkClient := h.sdkClient - namespace := h.namespace - storyRun := &runsv1alpha1.StoryRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: "stop-storyrun-running-live", - Namespace: namespace, - }, - Spec: runsv1alpha1.StoryRunSpec{ - StoryRef: refs.StoryReference{ - ObjectReference: refs.ObjectReference{Name: "story-stop-live"}, - }, - }, - } - require.NoError(t, apiClient.Create(context.Background(), storyRun)) - - startedAt := metav1.NewTime(time.Now().UTC().Add(-2 * time.Second)) - statusObj := storyRun.DeepCopy() - statusObj.Status.Phase = enums.PhaseRunning - statusObj.Status.StartedAt = &startedAt - require.NoError(t, apiClient.Status().Update(context.Background(), statusObj)) - - require.NoError(t, sdkClient.StopStoryRun(context.Background(), storyRun.Name, namespace)) - - updated := &runsv1alpha1.StoryRun{} - require.NoError(t, apiClient.Get( - context.Background(), - client.ObjectKey{Name: storyRun.Name, Namespace: namespace}, - updated, - )) - - require.Equal(t, enums.PhaseRunning, updated.Status.Phase) - require.Empty(t, updated.Status.Message) - require.Nil(t, updated.Status.FinishedAt) - require.NotNil(t, updated.Status.StartedAt) - require.Empty(t, updated.Status.Duration) - require.NotNil(t, updated.Spec.CancelRequested) - require.True(t, *updated.Spec.CancelRequested) -} - -func TestStopStoryRun_RequestsGracefulCancelForPausedRunLive(t *testing.T) { - h := setupSDKEnvtest(t, runsv1alpha1.AddToScheme) - apiClient := h.apiClient - sdkClient := h.sdkClient - namespace := h.namespace - storyRun := &runsv1alpha1.StoryRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: "stop-storyrun-paused-live", - Namespace: namespace, - }, - Spec: runsv1alpha1.StoryRunSpec{ - StoryRef: refs.StoryReference{ - ObjectReference: refs.ObjectReference{Name: "story-stop-live-paused"}, - }, - }, - } - require.NoError(t, apiClient.Create(context.Background(), storyRun)) - - statusObj := storyRun.DeepCopy() - statusObj.Status.Phase = enums.PhasePaused - statusObj.Status.Message = "paused by controller" - require.NoError(t, apiClient.Status().Update(context.Background(), statusObj)) - - require.NoError(t, sdkClient.StopStoryRun(context.Background(), storyRun.Name, namespace)) - - updated := &runsv1alpha1.StoryRun{} - require.NoError(t, apiClient.Get( - context.Background(), - client.ObjectKey{Name: storyRun.Name, Namespace: namespace}, - updated, - )) - require.Equal(t, enums.PhasePaused, updated.Status.Phase) - require.Equal(t, "paused by controller", updated.Status.Message) - require.Nil(t, updated.Status.FinishedAt) - require.NotNil(t, updated.Spec.CancelRequested) - require.True(t, *updated.Spec.CancelRequested) -} diff --git a/integration/envtest/timing_test.go b/integration/envtest/timing_test.go deleted file mode 100644 index 60b0815..0000000 --- a/integration/envtest/timing_test.go +++ /dev/null @@ -1,70 +0,0 @@ -package envtest - -import ( - "testing" - "time" -) - -func TestParsePositiveDurationEnv(t *testing.T) { - const key = "BUBU_ENVTEST_DURATION_TEST" - const fallback = 250 * time.Millisecond - - t.Run("missing uses fallback", func(t *testing.T) { - t.Setenv(key, "") - got := parsePositiveDurationEnv(key, fallback) - if got != fallback { - t.Fatalf("expected fallback %s, got %s", fallback, got) - } - }) - - t.Run("invalid uses fallback", func(t *testing.T) { - t.Setenv(key, "not-a-duration") - got := parsePositiveDurationEnv(key, fallback) - if got != fallback { - t.Fatalf("expected fallback %s, got %s", fallback, got) - } - }) - - t.Run("non-positive uses fallback", func(t *testing.T) { - t.Setenv(key, "0s") - got := parsePositiveDurationEnv(key, fallback) - if got != fallback { - t.Fatalf("expected fallback %s, got %s", fallback, got) - } - - t.Setenv(key, "-1s") - got = parsePositiveDurationEnv(key, fallback) - if got != fallback { - t.Fatalf("expected fallback %s, got %s", fallback, got) - } - }) - - t.Run("valid positive duration overrides fallback", func(t *testing.T) { - t.Setenv(key, "125ms") - got := parsePositiveDurationEnv(key, fallback) - if got != 125*time.Millisecond { - t.Fatalf("expected 125ms, got %s", got) - } - }) -} - -func TestWaitForSignal(t *testing.T) { - t.Run("returns true when signal arrives", func(t *testing.T) { - done := make(chan struct{}) - go func() { - time.Sleep(5 * time.Millisecond) - close(done) - }() - - if ok := waitForSignal(done, 200*time.Millisecond); !ok { - t.Fatalf("expected waitForSignal to return true") - } - }) - - t.Run("returns false on timeout", func(t *testing.T) { - done := make(chan struct{}) - if ok := waitForSignal(done, 10*time.Millisecond); ok { - t.Fatalf("expected waitForSignal to return false") - } - }) -} diff --git a/k8s/client.go b/k8s/client.go index f2c09d8..bd26f9e 100644 --- a/k8s/client.go +++ b/k8s/client.go @@ -1,55 +1,19 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - package k8s import ( - "bytes" "context" - "crypto/sha256" - "encoding/hex" "encoding/json" - "errors" "fmt" - "log/slog" - "math/rand" - "net/url" "os" - "sort" "strconv" - "strings" - "sync" - "syscall" "time" - catalogv1alpha1 "github.com/bubustack/bobrapet/api/catalog/v1alpha1" runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - transportv1alpha1 "github.com/bubustack/bobrapet/api/transport/v1alpha1" - bubuv1alpha1 "github.com/bubustack/bobrapet/api/v1alpha1" "github.com/bubustack/bobrapet/pkg/enums" - runsidentity "github.com/bubustack/bobrapet/pkg/runs/identity" - "github.com/bubustack/bobrapet/pkg/storage" - "github.com/bubustack/bubu-sdk-go/pkg/env" - sdkerrors "github.com/bubustack/bubu-sdk-go/pkg/errors" + "github.com/bubustack/bubu-sdk-go/pkg/kube/apply" "github.com/bubustack/bubu-sdk-go/pkg/metrics" - "github.com/bubustack/core/contracts" - coreidentity "github.com/bubustack/core/runtime/identity" + "github.com/bubustack/bubu-sdk-go/storage" "github.com/google/uuid" - coordinationv1 "k8s.io/api/coordination/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" @@ -60,79 +24,15 @@ import ( "github.com/bubustack/bobrapet/pkg/refs" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/util/retry" -) - -const ( - metadataNameMaxLength = 253 - maxSignalEvents = 256 - maxEffectRecords = 256 ) -type triggerTokenContextKey struct{} - var ( scheme = runtime.NewScheme() - - // sharedClientMu protects the lazy singleton returned by SharedClient. - sharedClientMu sync.Mutex - sharedClientVal *Client ) -// SharedClient returns a process-wide singleton Kubernetes client. Unlike NewClient, -// it does not allocate a new HTTP transport on every call. Initialization is retried -// on each call until it succeeds, so transient API-server unavailability does not -// permanently break the SDK. -func SharedClient() (*Client, error) { - sharedClientMu.Lock() - defer sharedClientMu.Unlock() - if sharedClientVal != nil { - return sharedClientVal, nil - } - c, err := NewClient() - if err != nil { - return nil, err - } - sharedClientVal = c - return sharedClientVal, nil -} - func init() { utilruntime.Must(clientgoscheme.AddToScheme(scheme)) - utilruntime.Must(coordinationv1.AddToScheme(scheme)) utilruntime.Must(runsv1alpha1.AddToScheme(scheme)) - utilruntime.Must(transportv1alpha1.AddToScheme(scheme)) - utilruntime.Must(bubuv1alpha1.AddToScheme(scheme)) - utilruntime.Must(catalogv1alpha1.AddToScheme(scheme)) -} - -// WithTriggerToken attaches an idempotency token used by TriggerStory to derive -// a stable trigger-delivery identity for StoryTrigger requests. -// Nil contexts are accepted; when a non-empty token is provided, the helper -// falls back to context.Background() so token attachment stays panic-free. -func WithTriggerToken(ctx context.Context, token string) context.Context { - if token == "" { - return ctx - } - if ctx == nil { - ctx = context.Background() - } - return context.WithValue(ctx, triggerTokenContextKey{}, token) -} - -func triggerTokenFromContext(ctx context.Context) string { - if ctx == nil { - return "" - } - if v, ok := ctx.Value(triggerTokenContextKey{}).(string); ok { - return v - } - return "" -} - -// TriggerTokenFromContext returns the trigger token stored in the context, if any. -func TriggerTokenFromContext(ctx context.Context) string { - return triggerTokenFromContext(ctx) } // Client is a wrapper around the controller-runtime Kubernetes client that provides @@ -146,7 +46,7 @@ type Client struct { func NewClient() (*Client, error) { config, err := GetConfig() if err != nil { - return nil, fmt.Errorf("failed to create kubernetes client: %w", err) + return nil, err } kubeClient, err := client.New(config, client.Options{ Scheme: scheme, @@ -175,13 +75,6 @@ func GetConfig() (*rest.Config, error) { return config, nil } -func resolveUserAgent() string { - if v := os.Getenv(contracts.K8sUserAgentEnv); v != "" { - return v - } - return "bubu-sdk-go" -} - // buildBaseRestConfig returns in-cluster config or falls back to KUBECONFIG. func buildBaseRestConfig() (*rest.Config, error) { if cfg, err := rest.InClusterConfig(); err == nil { @@ -218,34 +111,31 @@ func applyDefaultRestConfigSettings(config *rest.Config) { } } -// applyEnvOverridesToRestConfig applies env var overrides for timeout and user-agent. -// QPS and Burst defaults are set by applyDefaultRestConfigSettings. +// applyEnvOverridesToRestConfig applies env var overrides for QPS/Burst/Timeout. func applyEnvOverridesToRestConfig(config *rest.Config) { - if v := os.Getenv(contracts.K8sTimeoutEnv); v != "" { + if v := os.Getenv("BUBU_K8S_QPS"); v != "" { + if f, err := strconv.ParseFloat(v, 32); err == nil && f > 0 { + config.QPS = float32(f) + } + } + if v := os.Getenv("BUBU_K8S_BURST"); v != "" { + if i, err := strconv.Atoi(v); err == nil && i > 0 { + config.Burst = i + } + } + if v := os.Getenv("BUBU_K8S_TIMEOUT"); v != "" { if d, err := time.ParseDuration(v); err == nil && d > 0 { config.Timeout = d } } - // User-Agent for all requests from this client. - config.UserAgent = "bubu-sdk-go" - if v := os.Getenv(contracts.K8sUserAgentEnv); v != "" { - config.UserAgent = v - } } -// NewClientForConfig creates a new Kubernetes client for the given config. -func NewClientForConfig(config *rest.Config) (*Client, error) { - kubeClient, err := client.New(config, client.Options{ - Scheme: scheme, - }) - if err != nil { - return nil, fmt.Errorf("failed to create kubernetes client: %w", err) +// resolveUserAgent computes the User-Agent to use. +func resolveUserAgent() string { + if v := os.Getenv("BUBU_K8S_USER_AGENT"); v != "" { + return v } - - return &Client{ - Client: kubeClient, - namespace: getPodNamespace(), - }, nil + return "bubu-sdk-go" } // GetNamespace returns the Kubernetes namespace that the client is configured to use. @@ -255,26 +145,22 @@ func (c *Client) GetNamespace() string { // getOperationTimeout returns the timeout for K8s operations from env or default func getOperationTimeout() time.Duration { - return env.GetDuration(contracts.K8sOperationTimeoutEnv, 30*time.Second) + if v := os.Getenv("BUBU_K8S_OPERATION_TIMEOUT"); v != "" { + if d, err := time.ParseDuration(v); err == nil && d > 0 { + return d + } + } + return 30 * time.Second // Default: 30s } -// getMaxPatchRetries returns the configured number of retries after the initial -// patch attempt, sourced from env or the default. +// getMaxPatchRetries returns the max retries for conflict-retry from env or default func getMaxPatchRetries() int { - raw := strings.TrimSpace(os.Getenv(contracts.K8sPatchMaxRetriesEnv)) - if raw == "" { - return 5 - } - retries, err := strconv.Atoi(raw) - if err != nil { - slog.Warn("ignoring invalid env var integer, using default", - "key", contracts.K8sPatchMaxRetriesEnv, "value", raw, "default", 5) - return 5 - } - if retries < 0 { - return 0 + if v := os.Getenv("BUBU_K8S_PATCH_MAX_RETRIES"); v != "" { + if i, err := strconv.Atoi(v); err == nil && i > 0 { + return i + } } - return retries + return 5 // Default: 5 retries } // isValidPhaseTransition checks if a phase transition is legal according to the StepRun state machine. @@ -310,15 +196,12 @@ func isValidPhaseTransition(from, to enums.Phase) bool { enums.PhaseRunning: true, enums.PhaseFailed: true, enums.PhaseCanceled: true, - enums.PhaseTimeout: true, // timeout before pod starts }, enums.PhaseRunning: { enums.PhaseSucceeded: true, enums.PhaseFailed: true, enums.PhaseCanceled: true, enums.PhasePaused: true, - enums.PhaseTimeout: true, - enums.PhaseAborted: true, // force-kill / saga abort }, enums.PhasePaused: { enums.PhaseRunning: true, @@ -326,8 +209,8 @@ func isValidPhaseTransition(from, to enums.Phase) bool { enums.PhaseFailed: true, }, enums.PhaseFailed: { - enums.PhaseRunning: true, // Controller retry: Failed → Running - enums.PhaseSucceeded: true, // Late success: SDK completed work after controller set Failed + // Allow retry: Failed → Running + enums.PhaseRunning: true, }, enums.PhaseSucceeded: { // Terminal state: no transitions allowed @@ -341,27 +224,19 @@ func isValidPhaseTransition(from, to enums.Phase) bool { enums.PhaseAborted: { // Terminal state: no transitions allowed }, - enums.PhaseFinished: { - // Terminal state: no transitions allowed - }, - enums.PhaseSkipped: { - // Terminal state: no transitions allowed - }, enums.PhaseCompensated: { // Terminal state: no transitions allowed }, enums.PhaseBlocked: { // Non-terminal: can transition when dependencies become available - enums.PhasePending: true, - enums.PhaseRunning: true, - enums.PhaseFailed: true, - enums.PhaseCanceled: true, // canceled while blocked + enums.PhasePending: true, + enums.PhaseRunning: true, + enums.PhaseFailed: true, }, enums.PhaseScheduling: { // Non-terminal: transitions when pod is scheduled or fails to schedule - enums.PhaseRunning: true, - enums.PhaseFailed: true, - enums.PhaseCanceled: true, // canceled while scheduling + enums.PhaseRunning: true, + enums.PhaseFailed: true, }, } @@ -383,8 +258,7 @@ func validateStatusTransition(existing *runsv1alpha1.StepRunStatus, incoming *ru } if !isValidPhaseTransition(existing.Phase, incoming.Phase) { - return fmt.Errorf("%w: %s → %s (not allowed by state machine)", - sdkerrors.ErrInvalidTransition, existing.Phase, incoming.Phase) + return fmt.Errorf("invalid phase transition: %s → %s (not allowed by state machine)", existing.Phase, incoming.Phase) } return nil } @@ -409,8 +283,6 @@ func mergeStepRunStatus( mergeRetryFields(&merged, incoming) mergePayloads(&merged, incoming) mergeNeeds(&merged, incoming) - mergeSignalEvents(&merged, incoming) - mergeEffects(&merged, incoming) return merged, nil } @@ -433,10 +305,7 @@ func mergeTimingFields(merged *runsv1alpha1.StepRunStatus, incoming *runsv1alpha } func mergeProcessDetails(merged *runsv1alpha1.StepRunStatus, incoming *runsv1alpha1.StepRunStatus) { - // Only apply ExitCode when the incoming patch explicitly sets a non-zero value. - // This prevents logs-only or signal-only partial patches from resetting a - // previously recorded non-zero exit code back to 0. - if incoming.ExitCode != 0 && incoming.ExitCode != merged.ExitCode { + if incoming.ExitCode != merged.ExitCode { merged.ExitCode = incoming.ExitCode } if incoming.ExitClass != "" { @@ -460,655 +329,120 @@ func mergeRetryFields(merged *runsv1alpha1.StepRunStatus, incoming *runsv1alpha1 } func mergePayloads(merged *runsv1alpha1.StepRunStatus, incoming *runsv1alpha1.StepRunStatus) { - if incoming == nil { - return - } - // Only overwrite when the incoming patch actually provides the field. - // Signal/Log/Effect patches send status with only their fields set; copying - // nil Output/Logs/Error would wipe existing data (e.g. result output). if incoming.Output != nil { merged.Output = incoming.Output } - if incoming.Logs != nil { - merged.Logs = incoming.Logs - } if incoming.Error != nil { merged.Error = incoming.Error } } func mergeNeeds(merged *runsv1alpha1.StepRunStatus, incoming *runsv1alpha1.StepRunStatus) { - if incoming == nil { - return - } - // Preserve controller-managed dependency state unless the caller explicitly - // sets Needs (including an explicit empty slice to clear it). - if incoming.Needs != nil { + if len(incoming.Needs) > 0 { merged.Needs = incoming.Needs } - if len(incoming.Signals) > 0 { - if merged.Signals == nil { - merged.Signals = make(map[string]runtime.RawExtension, len(incoming.Signals)) - } - for key, raw := range incoming.Signals { - if len(raw.Raw) == 0 { - delete(merged.Signals, key) - continue - } - merged.Signals[key] = raw - } - } -} - -func mergeSignalEvents(merged *runsv1alpha1.StepRunStatus, incoming *runsv1alpha1.StepRunStatus) { - if incoming == nil || len(incoming.SignalEvents) == 0 { - return - } - existing := make(map[uint64]struct{}, len(merged.SignalEvents)) - var maxSeq uint64 - for _, evt := range merged.SignalEvents { - existing[evt.Seq] = struct{}{} - if evt.Seq > maxSeq { - maxSeq = evt.Seq - } - } - for _, evt := range incoming.SignalEvents { - if evt.Seq == 0 { - maxSeq++ - evt.Seq = maxSeq - } else if evt.Seq > maxSeq { - maxSeq = evt.Seq - } - if _, ok := existing[evt.Seq]; ok { - continue - } - merged.SignalEvents = append(merged.SignalEvents, evt) - existing[evt.Seq] = struct{}{} - } - if len(merged.SignalEvents) > maxSignalEvents { - sort.Slice(merged.SignalEvents, func(i, j int) bool { return merged.SignalEvents[i].Seq < merged.SignalEvents[j].Seq }) //nolint:lll - merged.SignalEvents = merged.SignalEvents[len(merged.SignalEvents)-maxSignalEvents:] - } -} - -func mergeEffects(merged *runsv1alpha1.StepRunStatus, incoming *runsv1alpha1.StepRunStatus) { - if incoming == nil || len(incoming.Effects) == 0 { - return - } - existing := make(map[uint64]struct{}, len(merged.Effects)) - existingKeys := make(map[string]struct{}, len(merged.Effects)) - var maxSeq uint64 - for _, evt := range merged.Effects { - existing[evt.Seq] = struct{}{} - if key := strings.TrimSpace(evt.Key); key != "" { - existingKeys[key] = struct{}{} - } - if evt.Seq > maxSeq { - maxSeq = evt.Seq - } - } - for _, evt := range incoming.Effects { - key := strings.TrimSpace(evt.Key) - if evt.Seq == 0 && key != "" { - if _, ok := existingKeys[key]; ok { - continue - } - } - if evt.Seq == 0 { - maxSeq++ - evt.Seq = maxSeq - } else if evt.Seq > maxSeq { - maxSeq = evt.Seq - } - if _, ok := existing[evt.Seq]; ok { - continue - } - merged.Effects = append(merged.Effects, evt) - existing[evt.Seq] = struct{}{} - if key != "" { - existingKeys[key] = struct{}{} - } - } - if len(merged.Effects) > maxEffectRecords { - sort.Slice(merged.Effects, func(i, j int) bool { return merged.Effects[i].Seq < merged.Effects[j].Seq }) - merged.Effects = merged.Effects[len(merged.Effects)-maxEffectRecords:] - } -} - -// deriveStorageInputKey produces a deterministic storage key for Story inputs. -// When a trigger token is provided, the key is augmented with a content fingerprint -// so retries with different payloads cannot overwrite previously offloaded data. -func deriveStorageInputKey(storyNamespace, storyName, token, inputHash string) string { - if token == "" { - return uuid.New().String() - } - - base := deriveStoryRunName(storyNamespace, storyName, token) - if inputHash == "" { - return base - } - - suffix := inputHash - if len(suffix) > 12 { - suffix = suffix[:12] - } - return appendSuffixWithLimit(base, suffix) -} - -func deriveStoryRunName(storyNamespace, storyName, token string) string { - return runsidentity.DeriveStoryRunName(storyNamespace, storyName, token) -} - -func computeInputFingerprint(inputs map[string]any) string { - if len(inputs) == 0 { - return "" - } - payload, err := json.Marshal(inputs) - if err != nil { - return "" - } - sum := sha256.Sum256(payload) - return hex.EncodeToString(sum[:]) -} - -func appendSuffixWithLimit(base, suffix string) string { - if suffix == "" { - return base - } - joined := fmt.Sprintf("%s-%s", base, suffix) - if len(joined) <= metadataNameMaxLength { - return joined - } - maxPrefix := max(metadataNameMaxLength-1-len(suffix), 1) - prefix := base - if len(prefix) > maxPrefix { - prefix = prefix[:maxPrefix] - prefix = strings.Trim(prefix, "-") - if prefix == "" { - prefix = suffix[:1] - } - } - return fmt.Sprintf("%s-%s", prefix, suffix) } -func sameNamespace(a, b *string) bool { - if a == nil && b == nil { - return true - } - if a == nil || b == nil { - return false - } - return *a == *b -} - -func normalizeInputs(raw *runtime.RawExtension) []byte { - if raw == nil || len(raw.Raw) == 0 { - return nil - } - return bytes.TrimSpace(raw.Raw) -} - -const storyTriggerPollInterval = 50 * time.Millisecond - -// TriggerStory submits a StoryTrigger request for the configured Story and waits -// for the controller to resolve it into a StoryRun. -// Callers need RBAC for `storytriggers` `create`/`get` and `storyruns` `get`. +// TriggerStory creates a new StoryRun for the configured story with the provided inputs. func (c *Client) TriggerStory( - ctx context.Context, storyName string, storyNamespace string, inputs map[string]any, + ctx context.Context, storyName string, inputs map[string]any, ) (*runsv1alpha1.StoryRun, error) { - if ctx == nil { - return nil, fmt.Errorf("context must not be nil") - } - policy, err := loadTriggerDeliveryPolicyFromEnv() - if err != nil { - return nil, err - } - throttle, err := getTriggerThrottle() - if err != nil { - return nil, err - } - if throttle != nil { - waited, release, acquireErr := throttle.Acquire(ctx) - if acquireErr != nil { - return nil, acquireErr - } - if release != nil { - defer release() - } - if waited { - c.recordTriggerThrottle(ctx) - } - } - token := triggerTokenFromContext(ctx) - if token == "" { - token = strings.TrimSpace(os.Getenv(contracts.TriggerTokenEnv)) - } - resolvedToken, err := resolveTriggerTokenForPolicy(ctx, c.namespace, storyName, storyNamespace, inputs, policy, token) - if err != nil { - return nil, err - } - if resolvedToken != "" { - ctx = WithTriggerToken(ctx, resolvedToken) - } - request, err := c.buildStoryTriggerRequest(ctx, storyName, storyNamespace, inputs, resolvedToken, policy) - if err != nil { - return nil, err - } - var retryPolicy *triggerRetryPolicy - if policy != nil { - retryPolicy = policy.retry - } - return retryTriggerStory(ctx, retryPolicy, func(callCtx context.Context) (*runsv1alpha1.StoryRun, error) { - return c.triggerStoryOnce(callCtx, request) - }) -} - -func (c *Client) buildStoryTriggerRequest( - ctx context.Context, - storyName string, - storyNamespace string, - inputs map[string]any, - resolvedToken string, - policy *triggerDeliveryPolicy, -) (*runsv1alpha1.StoryTrigger, error) { - if ctx == nil { - return nil, fmt.Errorf("context must not be nil") - } - if inputs == nil { - inputs = map[string]any{} - } - - apiCtx, cancel := context.WithTimeout(ctx, getOperationTimeout()) + // Set timeout for this operation + ctx, cancel := context.WithTimeout(ctx, getOperationTimeout()) defer cancel() - targetNamespace := strings.TrimSpace(storyNamespace) - if targetNamespace == "" { - targetNamespace = c.namespace - } - - storyVersion := c.resolveStoryVersion(apiCtx, targetNamespace, storyName) - storageCtx := ctx - if schemaID := storyInputSchemaID(targetNamespace, storyName); schemaID != "" || storyVersion != "" { - storageCtx = storage.WithStorageSchema(storageCtx, schemaID, storyVersion) - } - - dedupeMode, dedupeKey := resolveTriggerRequestIdentity(policy, resolvedToken) - submissionID := deriveStoryTriggerSubmissionID(dedupeKey) - storageIdentity := dedupeKey - if storageIdentity == "" { - storageIdentity = submissionID - } + start := time.Now() - finalInputs, err := c.offloadInputsIfNecessary(storageCtx, targetNamespace, storyName, storageIdentity, inputs) + // Offload inputs if they are too large, replacing them with a storage reference. + finalInputs, err := c.offloadInputsIfNecessary(ctx, storyName, inputs) if err != nil { + metrics.RecordK8sOperation(ctx, "TriggerStory", time.Since(start).Seconds(), false) return nil, fmt.Errorf("failed to offload inputs: %w", err) } inputBytes, err := json.Marshal(finalInputs) if err != nil { + metrics.RecordK8sOperation(ctx, "TriggerStory", time.Since(start).Seconds(), false) return nil, fmt.Errorf("failed to marshal inputs: %w", err) } - inputHash, err := runsidentity.ComputeTriggerInputHash(inputBytes) - if err != nil { - return nil, fmt.Errorf("failed to compute trigger input hash: %w", err) - } - storyRef := refs.StoryReference{ - ObjectReference: refs.ObjectReference{Name: storyName}, - } - if storyVersion != "" { - storyRef.Version = storyVersion - } - if storyNamespace != "" { - ns := storyNamespace - storyRef.Namespace = &ns + // Idempotent creation via Server-Side Apply with deterministic name when BUBU_TRIGGER_TOKEN is set. + // If no token is provided, fall back to GenerateName for unique creation semantics. + token := os.Getenv("BUBU_TRIGGER_TOKEN") + if token == "" { + storyRun := &runsv1alpha1.StoryRun{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: c.namespace, + GenerateName: fmt.Sprintf("%s-", storyName), + }, + Spec: runsv1alpha1.StoryRunSpec{ + StoryRef: refs.StoryReference{ + ObjectReference: refs.ObjectReference{ + Name: storyName, + }, + }, + Inputs: &runtime.RawExtension{Raw: inputBytes}, + }, + } + if err := c.Create(ctx, storyRun); err != nil { + metrics.RecordK8sOperation(ctx, "TriggerStory", time.Since(start).Seconds(), false) + return nil, fmt.Errorf("failed to create storyrun: %w", err) + } + metrics.RecordK8sOperation(ctx, "TriggerStory", time.Since(start).Seconds(), true) + return storyRun, nil } - labels, annotations := parentMetadataFromEnv() - request := &runsv1alpha1.StoryTrigger{ + // Derive deterministic name from story + token + // Token is expected to be unique for the deduplicated trigger instance + name := fmt.Sprintf("%s-%s", storyName, token) + applyObj := &runsv1alpha1.StoryRun{ ObjectMeta: metav1.ObjectMeta{ - Name: runsidentity.DeriveStoryTriggerName(targetNamespace, storyName, dedupeKey, submissionID), - Namespace: c.namespace, - Labels: labels, - Annotations: annotations, + Name: name, + Namespace: c.namespace, }, - Spec: runsv1alpha1.StoryTriggerSpec{ - StoryRef: storyRef, - ImpulseRef: resolveImpulseRefFromEnv(), - Inputs: &runtime.RawExtension{Raw: inputBytes}, - DeliveryIdentity: runsv1alpha1.TriggerDeliveryIdentity{ - Mode: &dedupeMode, - SubmissionID: submissionID, + Spec: runsv1alpha1.StoryRunSpec{ + StoryRef: refs.StoryReference{ + ObjectReference: refs.ObjectReference{ + Name: storyName, + }, }, + Inputs: &runtime.RawExtension{Raw: inputBytes}, }, } - if dedupeKey != "" { - request.Spec.DeliveryIdentity.Key = dedupeKey - request.Spec.DeliveryIdentity.InputHash = inputHash - } - request.SetGroupVersionKind(runsv1alpha1.GroupVersion.WithKind("StoryTrigger")) - return request, nil -} - -func resolveTriggerRequestIdentity(policy *triggerDeliveryPolicy, resolvedToken string) (bubuv1alpha1.TriggerDedupeMode, string) { //nolint:lll - mode := bubuv1alpha1.TriggerDedupeNone - key := strings.TrimSpace(resolvedToken) - - if key != "" { - mode = bubuv1alpha1.TriggerDedupeToken - } - if policy == nil || policy.dedupe == nil { - return mode, key - } - - switch policy.dedupe.mode { - case "key": //nolint:goconst - return bubuv1alpha1.TriggerDedupeKey, key - case "token": //nolint:goconst - return bubuv1alpha1.TriggerDedupeToken, key - case "none", "": //nolint:goconst - if key != "" { - return bubuv1alpha1.TriggerDedupeToken, key - } - return bubuv1alpha1.TriggerDedupeNone, "" - default: - return mode, key - } -} - -func deriveStoryTriggerSubmissionID(dedupeKey string) string { - if strings.TrimSpace(dedupeKey) != "" { - return dedupeKey - } - return uuid.New().String() -} - -func (c *Client) triggerStoryOnce(ctx context.Context, request *runsv1alpha1.StoryTrigger) (*runsv1alpha1.StoryRun, error) { //nolint:lll - if ctx == nil { - return nil, fmt.Errorf("context must not be nil") - } - if request == nil { - return nil, fmt.Errorf("story trigger request must not be nil") - } - - start := time.Now() - createCtx, cancel := context.WithTimeout(ctx, getOperationTimeout()) - defer cancel() - - current := request.DeepCopy() - if err := c.Create(createCtx, current); err != nil { - if !apierrors.IsAlreadyExists(err) { - metrics.RecordK8sOperation(createCtx, "TriggerStory", time.Since(start).Seconds(), false) - return nil, wrapK8sError(err, "failed to create storytrigger '%s' in namespace '%s'", request.Name, request.Namespace) //nolint:lll - } - - existing := &runsv1alpha1.StoryTrigger{} - if getErr := c.Get(createCtx, types.NamespacedName{Name: request.Name, Namespace: request.Namespace}, existing); getErr != nil { //nolint:lll - metrics.RecordK8sOperation(createCtx, "TriggerStory", time.Since(start).Seconds(), false) - return nil, wrapK8sError(getErr, "storytrigger '%s' already exists but could not be retrieved", request.Name) - } - if !storyTriggerRequestMatches(existing, request) { - metrics.RecordK8sOperation(createCtx, "TriggerStory", time.Since(start).Seconds(), false) - return nil, fmt.Errorf( - "storytrigger '%s' already exists with different immutable request identity; reuse is not allowed", - request.Name, - ) - } - current = existing - } - - storyRun, err := c.waitForStoryTriggerResolution(ctx, current) - if err != nil { + applyObj.SetGroupVersionKind(runsv1alpha1.GroupVersion.WithKind("StoryRun")) + force := true + if err := apply.Apply(ctx, c.Client, applyObj, force); err != nil { metrics.RecordK8sOperation(ctx, "TriggerStory", time.Since(start).Seconds(), false) - return nil, err + return nil, fmt.Errorf("failed to apply storyrun '%s' in namespace '%s': %w", name, c.namespace, err) } metrics.RecordK8sOperation(ctx, "TriggerStory", time.Since(start).Seconds(), true) - return storyRun, nil -} - -func storyTriggerRequestMatches(existing, desired *runsv1alpha1.StoryTrigger) bool { - if existing == nil || desired == nil { - return false - } - if existing.Spec.StoryRef.Name != desired.Spec.StoryRef.Name { - return false - } - if strings.TrimSpace(existing.Spec.StoryRef.Version) != strings.TrimSpace(desired.Spec.StoryRef.Version) { - return false - } - if !sameNamespace(existing.Spec.StoryRef.Namespace, desired.Spec.StoryRef.Namespace) { - return false - } - if !sameImpulseRef(existing.Spec.ImpulseRef, desired.Spec.ImpulseRef) { - return false - } - if !sameTriggerDeliveryIdentity(existing.Spec.DeliveryIdentity, desired.Spec.DeliveryIdentity) { - return false - } - return bytes.Equal(normalizeInputs(existing.Spec.Inputs), normalizeInputs(desired.Spec.Inputs)) -} - -func sameImpulseRef(a, b *refs.ImpulseReference) bool { - if a == nil && b == nil { - return true - } - if a == nil || b == nil { - return false - } - if a.Name != b.Name || strings.TrimSpace(a.Version) != strings.TrimSpace(b.Version) { - return false + // Return the applied object (contains only the fields we set). Fetch to return server state. + created := &runsv1alpha1.StoryRun{} + if err := c.Get(ctx, types.NamespacedName{Name: name, Namespace: c.namespace}, created); err != nil { + // Best-effort: still return our object if GET fails post-apply + return applyObj, nil } - return sameNamespace(a.Namespace, b.Namespace) -} - -func sameTriggerDeliveryIdentity(a, b runsv1alpha1.TriggerDeliveryIdentity) bool { - switch { - case a.Mode == nil && b.Mode != nil, a.Mode != nil && b.Mode == nil: - return false - case a.Mode != nil && b.Mode != nil && *a.Mode != *b.Mode: - return false - } - return strings.TrimSpace(a.Key) == strings.TrimSpace(b.Key) && - strings.TrimSpace(a.InputHash) == strings.TrimSpace(b.InputHash) && - strings.TrimSpace(a.SubmissionID) == strings.TrimSpace(b.SubmissionID) -} - -func (c *Client) waitForStoryTriggerResolution(ctx context.Context, trigger *runsv1alpha1.StoryTrigger) (*runsv1alpha1.StoryRun, error) { //nolint:lll - if trigger == nil { - return nil, fmt.Errorf("story trigger request must not be nil") - } - - waitCtx := ctx - cancel := func() {} - if _, hasDeadline := ctx.Deadline(); !hasDeadline { - waitCtx, cancel = context.WithTimeout(ctx, getOperationTimeout()) - } - defer cancel() - - key := types.NamespacedName{Name: trigger.Name, Namespace: trigger.Namespace} - ticker := time.NewTicker(storyTriggerPollInterval) - defer ticker.Stop() - - current := trigger.DeepCopy() - for { - switch current.Status.Decision { - case runsv1alpha1.StoryTriggerDecisionCreated, runsv1alpha1.StoryTriggerDecisionReused: - return c.getResolvedStoryRun(waitCtx, current) - case runsv1alpha1.StoryTriggerDecisionRejected: - message := strings.TrimSpace(current.Status.Message) - if message == "" { - message = fmt.Sprintf("storytrigger %s/%s was rejected", current.Namespace, current.Name) - } - return nil, fmt.Errorf("%s", message) - } - - select { - case <-waitCtx.Done(): - if ctx.Err() != nil { - return nil, ctx.Err() - } - return nil, fmt.Errorf("%w: storytrigger '%s/%s' is still pending", sdkerrors.ErrRetryable, trigger.Namespace, trigger.Name) //nolint:lll - case <-ticker.C: - } - - getCtx, cancelGet := context.WithTimeout(waitCtx, getOperationTimeout()) - fresh := &runsv1alpha1.StoryTrigger{} - err := c.Get(getCtx, key, fresh) - cancelGet() - if err != nil { - if waitCtx.Err() != nil { - if ctx.Err() != nil { - return nil, ctx.Err() - } - return nil, fmt.Errorf("%w: storytrigger '%s/%s' is still pending", sdkerrors.ErrRetryable, trigger.Namespace, trigger.Name) //nolint:lll - } - return nil, wrapK8sError(err, "failed to get storytrigger '%s' in namespace '%s'", trigger.Name, trigger.Namespace) - } - current = fresh - } -} - -func (c *Client) getResolvedStoryRun(ctx context.Context, trigger *runsv1alpha1.StoryTrigger) (*runsv1alpha1.StoryRun, error) { //nolint:lll - ref := trigger.Status.StoryRunRef - if ref == nil { - return nil, fmt.Errorf("%w: storytrigger '%s/%s' resolved without a storyrun reference", sdkerrors.ErrRetryable, trigger.Namespace, trigger.Name) //nolint:lll - } - - runNamespace := trigger.Namespace - if ref.Namespace != nil && strings.TrimSpace(*ref.Namespace) != "" { - runNamespace = strings.TrimSpace(*ref.Namespace) - } - - getCtx, cancel := context.WithTimeout(ctx, getOperationTimeout()) - defer cancel() - - run := &runsv1alpha1.StoryRun{} - if err := c.Get(getCtx, types.NamespacedName{Name: ref.Name, Namespace: runNamespace}, run); err != nil { - return nil, wrapK8sError(err, "failed to get resolved storyrun '%s' in namespace '%s'", ref.Name, runNamespace) - } - return run, nil -} - -func storyInputSchemaID(namespace, name string) string { - name = strings.TrimSpace(name) - if name == "" { - return "" - } - namespace = strings.TrimSpace(namespace) - if namespace == "" { - return fmt.Sprintf("bubu://story/%s/inputs", name) - } - return fmt.Sprintf("bubu://story/%s/%s/inputs", namespace, name) -} - -func (c *Client) resolveStoryVersion(ctx context.Context, namespace, name string) string { - name = strings.TrimSpace(name) - if name == "" { - return "" - } - if namespace == "" { - namespace = c.namespace - } - var story bubuv1alpha1.Story - if err := c.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, &story); err != nil { - return "" - } - return strings.TrimSpace(story.Spec.Version) -} - -// StopStoryRun requests graceful cancellation for the specified StoryRun. If -// namespace is empty, the client's namespace is used. Callers need RBAC for -// `storyruns` `get` and `storyruns` `patch`. -// Returns sdkerrors.ErrNotFound when the StoryRun does not exist. -func (c *Client) StopStoryRun(ctx context.Context, storyRunName, namespace string) error { //nolint:gocyclo - if ctx == nil { - return fmt.Errorf("context must not be nil") - } - targetNamespace := strings.TrimSpace(namespace) - if targetNamespace == "" { - targetNamespace = c.namespace - } - if targetNamespace == "" { - return fmt.Errorf("failed to resolve namespace for storyrun %q", storyRunName) - } - - start := time.Now() - key := types.NamespacedName{Name: storyRunName, Namespace: targetNamespace} - - updateErr := retry.RetryOnConflict(retry.DefaultRetry, func() error { - apiCtx, cancel := context.WithTimeout(ctx, getOperationTimeout()) - defer cancel() - - current := &runsv1alpha1.StoryRun{} - if err := c.Get(apiCtx, key, current); err != nil { - return err - } - - // If already in a terminal phase, respect it and don't overwrite. - // This prevents race conditions where the controller sets Succeeded/Failed - // between our read and write. - if current.Status.Phase.IsTerminal() { - return nil - } - - // Request graceful cancel through spec instead of force-finishing status. - if current.Spec.CancelRequested != nil && *current.Spec.CancelRequested { - return nil - } - - before := current.DeepCopy() - cancelRequested := true - current.Spec.CancelRequested = &cancelRequested - if err := c.Patch(apiCtx, current, client.MergeFrom(before)); err != nil { - return err - } - return nil - }) - if updateErr != nil { - if apierrors.IsNotFound(updateErr) { - metrics.RecordK8sOperation(ctx, "StopStoryRun", time.Since(start).Seconds(), false) - return fmt.Errorf("storyrun %s/%s not found: %w", targetNamespace, storyRunName, sdkerrors.ErrNotFound) - } - metrics.RecordK8sOperation(ctx, "StopStoryRun", time.Since(start).Seconds(), false) - return wrapK8sError(updateErr, "failed to update storyrun '%s' in namespace '%s'", storyRunName, targetNamespace) - } - - metrics.RecordK8sOperation(ctx, "StopStoryRun", time.Since(start).Seconds(), true) - return nil + return created, nil } // offloadInputsIfNecessary checks if the inputs exceed the max inline size and, if so, // offloads them to the configured storage backend, returning a storage reference. // If inputs are small enough, it returns them unchanged. -func (c *Client) offloadInputsIfNecessary( - ctx context.Context, - storyNamespace string, - storyName string, - token string, - inputs map[string]any, -) (any, error) { +func (c *Client) offloadInputsIfNecessary(ctx context.Context, storyName string, inputs map[string]any) (any, error) { // A StorageManager is needed to check size limits and perform offloading. - sm, err := storage.SharedManager(ctx) + sm, err := storage.NewManager(ctx) if err != nil { return nil, fmt.Errorf("failed to create storage manager for input offloading: %w", err) } - if sm == nil { - // No storage backend configured; return inputs as-is (no offloading possible). - return inputs, nil - } // Dehydrate will check the size and offload if necessary. // We use the storyName as a stable prefix for the storage path. The StoryRun's // name is generated and not known at this point, so we can't use it here. // The storyrun-controller will later pass this reference to the first step, // which will hydrate it using its own stepRunID in the path. - var inputFingerprint string - if token != "" { - inputFingerprint = computeInputFingerprint(inputs) - } - storageKey := deriveStorageInputKey(storyNamespace, storyName, token, inputFingerprint) - namespacedKey := storage.NamespacedKey(c.namespace, storageKey) - dehydratedInputs, err := sm.DehydrateInputs(ctx, inputs, namespacedKey) + // We generate a UUID to ensure the path is unique for this trigger. + uniqueID := uuid.New().String() + dehydratedInputs, err := sm.DehydrateInputs(ctx, inputs, uniqueID) if err != nil { return nil, fmt.Errorf("failed to dehydrate inputs for story '%s': %w", storyName, err) } @@ -1118,72 +452,40 @@ func (c *Client) offloadInputsIfNecessary( // PatchStepRunStatus updates the StepRun status with retry-on-conflict logic. // This prevents lost updates when the controller patches status simultaneously. -// Uses exponential backoff with jitter to prevent "thundering herd" problems. -// Callers need RBAC for `stepruns` `get` and `stepruns/status` `patch`. -func (c *Client) PatchStepRunStatus( //nolint:gocyclo +func (c *Client) PatchStepRunStatus( ctx context.Context, stepRunName string, patchData runsv1alpha1.StepRunStatus, ) error { - if ctx == nil { - return fmt.Errorf("context must not be nil") - } // Set timeout for this operation ctx, cancel := context.WithTimeout(ctx, getOperationTimeout()) defer cancel() start := time.Now() - maxRetryCount := getMaxPatchRetries() + maxRetries := getMaxPatchRetries() backoff := 100 * time.Millisecond var lastErr error - for attempt := 0; attempt <= maxRetryCount; attempt++ { + for attempt := 0; attempt <= maxRetries; attempt++ { stepRun := &runsv1alpha1.StepRun{} if err := c.Get(ctx, types.NamespacedName{Name: stepRunName, Namespace: c.namespace}, stepRun); err != nil { metrics.RecordK8sOperation(ctx, "PatchStepRunStatus", time.Since(start).Seconds(), false) - wrappedErr := wrapK8sError(err, - "failed to get StepRun '%s' in namespace '%s' for status patch", - stepRunName, c.namespace, + return fmt.Errorf("failed to get StepRun '%s' in namespace '%s' for status patch: %w", + stepRunName, c.namespace, err, ) - if attempt < maxRetryCount && errors.Is(classifyK8sError(err), sdkerrors.ErrRetryable) { - lastErr = wrappedErr - jitteredBackoff := jitterDuration(backoff) - select { - case <-ctx.Done(): - return fmt.Errorf("status patch retry aborted due to context cancellation: %w", ctx.Err()) - case <-time.After(jitteredBackoff): - } - backoff *= 2 - continue - } - return wrappedErr - } - - // Zombie pod fencing: reject patches from stale pods when another pod is already recorded. - if podName := strings.TrimSpace(os.Getenv(contracts.PodNameEnv)); podName != "" { - if existing := strings.TrimSpace(stepRun.Status.PodName); existing != "" && existing != podName { - return fmt.Errorf("zombie pod fencing: current pod %q does not match existing %q; skipping patch", - podName, existing) - } } // Merge field-wise to avoid clobbering controller-managed fields merged, err := mergeStepRunStatus(&stepRun.Status, &patchData) if err != nil { + // Validation error indicates stale read or invalid patch + // Log and retry on next attempt metrics.RecordK8sOperation(ctx, "PatchStepRunStatus", time.Since(start).Seconds(), false) - // Invalid transitions are permanent errors — retrying the same patch against - // the same phase will never succeed. Return immediately without retrying. - if errors.Is(err, sdkerrors.ErrInvalidTransition) { - return err - } - // Other merge errors indicate stale read or transient conflict; retry. - if attempt < maxRetryCount { + if attempt < maxRetries { lastErr = err - // Use context-aware sleep with jitter to respect cancellation during backoff - // and prevent synchronized retries from multiple SDK clients. - jitteredBackoff := jitterDuration(backoff) + // Use context-aware sleep to respect cancellation during backoff select { case <-ctx.Done(): return fmt.Errorf("status patch retry aborted due to context cancellation: %w", ctx.Err()) - case <-time.After(jitteredBackoff): + case <-time.After(backoff): // Continue to next attempt } backoff *= 2 @@ -1191,115 +493,40 @@ func (c *Client) PatchStepRunStatus( //nolint:gocyclo } return fmt.Errorf("failed to merge StepRun status: %w", err) } - before := stepRun.DeepCopy() - stepRun.Status = merged - if err := c.Status().Patch(ctx, stepRun, client.MergeFrom(before)); err != nil { + // Server-Side Apply on status with stable FieldManager + applyObj := &runsv1alpha1.StepRun{ + ObjectMeta: metav1.ObjectMeta{ + Name: stepRun.Name, + Namespace: stepRun.Namespace, + }, + Status: merged, + } + applyObj.SetGroupVersionKind(runsv1alpha1.GroupVersion.WithKind("StepRun")) + force := true + if err := c.Status().Patch(ctx, applyObj, client.Apply, &client.SubResourcePatchOptions{ + PatchOptions: client.PatchOptions{ + FieldManager: apply.FieldManager, + Force: &force, + }, + }); err != nil { metrics.RecordK8sOperation(ctx, "PatchStepRunStatus", time.Since(start).Seconds(), false) - if attempt < maxRetryCount { + if attempt < maxRetries { lastErr = err - // Apply jitter to prevent synchronized retries across SDK instances. - jitteredBackoff := jitterDuration(backoff) select { case <-ctx.Done(): return fmt.Errorf("status patch retry aborted due to context cancellation: %w", ctx.Err()) - case <-time.After(jitteredBackoff): + case <-time.After(backoff): } backoff *= 2 continue } - return wrapK8sError(err, - "failed to apply StepRun '%s' status in namespace '%s'", - stepRunName, c.namespace, - ) + return fmt.Errorf("failed to apply StepRun '%s' status in namespace '%s': %w", + stepRunName, c.namespace, err) } metrics.RecordK8sOperation(ctx, "PatchStepRunStatus", time.Since(start).Seconds(), true) return nil } - return wrapK8sError(lastErr, "failed to patch StepRun '%s' status after %d retries", stepRunName, maxRetryCount) -} - -// jitterDuration adds random jitter (±25%) to a duration to prevent synchronized -// retries and "thundering herd" effects when multiple SDK clients retry simultaneously. -func jitterDuration(d time.Duration) time.Duration { - if d <= 0 { - return d - } - // Add ±25% jitter: multiply by (0.75 + random(0, 0.5)) - jitterFactor := 0.75 + rand.Float64()*0.5 - return time.Duration(float64(d) * jitterFactor) -} - -func classifyK8sError(err error) error { - if err == nil { - return nil - } - - switch { - case errors.Is(err, context.Canceled), errors.Is(err, context.DeadlineExceeded), - apierrors.IsTimeout(err), apierrors.IsServerTimeout(err), apierrors.IsTooManyRequests(err), - isRetryableK8sTransportError(err): - return fmt.Errorf("%w: %v", sdkerrors.ErrRetryable, err) - case apierrors.IsConflict(err): - return fmt.Errorf("%w: %v", sdkerrors.ErrConflict, err) - case apierrors.IsNotFound(err): - return fmt.Errorf("%w: %v", sdkerrors.ErrNotFound, err) - default: - return err - } -} - -func isRetryableK8sTransportError(err error) bool { - if err == nil { - return false - } - - var urlErr *url.Error - if errors.As(err, &urlErr) { - if urlErr.Timeout() { - return true - } - inner := urlErr.Err - if inner != nil && inner != err && isRetryableK8sTransportError(inner) { - return true - } - } - - var timeoutErr interface{ Timeout() bool } - if errors.As(err, &timeoutErr) && timeoutErr.Timeout() { - return true - } - - var temporaryErr interface{ Temporary() bool } - if errors.As(err, &temporaryErr) && temporaryErr.Temporary() { - return true - } - - for _, target := range []error{ - syscall.ECONNRESET, - syscall.ECONNREFUSED, - syscall.ETIMEDOUT, - syscall.EHOSTUNREACH, - syscall.ENETUNREACH, - syscall.EPIPE, - } { - if errors.Is(err, target) { - return true - } - } - - return false -} - -func wrapK8sError(err error, format string, args ...any) error { - if err == nil { - return nil - } - msg := fmt.Sprintf(format, args...) - classified := classifyK8sError(err) - if classified != err { - return fmt.Errorf("%s: %w", msg, classified) - } - return fmt.Errorf("%s: %w", msg, err) + return fmt.Errorf("failed to patch StepRun '%s' status after %d retries: %w", stepRunName, maxRetries, lastErr) } // ResolvePodNamespace exposes the environment-based namespace resolution used by the SDK. @@ -1312,83 +539,24 @@ func ResolvePodNamespace() string { // It checks a series of BUBU_* environment variables in order of precedence, // which are set by the bobrapet controller for different execution contexts // (Stories, Impulses, StepRuns). It also checks the standard POD_NAMESPACE -// env var set by Kubernetes Downward API. When none are set, it returns an -// empty string so callers can fail closed on missing runtime namespace wiring. +// env var set by Kubernetes Downward API, and falls back to "default" for +// local development. func getPodNamespace() string { - if ns, ok := os.LookupEnv(contracts.TargetStoryNamespaceEnv); ok && ns != "" { + if ns, ok := os.LookupEnv("BUBU_TARGET_STORY_NAMESPACE"); ok && ns != "" { return ns } - if ns, ok := os.LookupEnv(contracts.ImpulseNamespaceEnv); ok && ns != "" { + if ns, ok := os.LookupEnv("BUBU_IMPULSE_NAMESPACE"); ok && ns != "" { return ns } - if ns, ok := os.LookupEnv(contracts.StepRunNamespaceEnv); ok && ns != "" { + if ns, ok := os.LookupEnv("BUBU_STEPRUN_NAMESPACE"); ok && ns != "" { return ns } - if ns, ok := os.LookupEnv(contracts.PodNamespaceEnv); ok && ns != "" { + if ns, ok := os.LookupEnv("BUBU_POD_NAMESPACE"); ok && ns != "" { return ns } - return "" -} - -func resolveImpulseRefFromEnv() *refs.ImpulseReference { - name := strings.TrimSpace(os.Getenv(contracts.ImpulseNameEnv)) - if name == "" { - return nil - } - ref := &refs.ImpulseReference{ - ObjectReference: refs.ObjectReference{Name: name}, - } - if ns := strings.TrimSpace(os.Getenv(contracts.ImpulseNamespaceEnv)); ns != "" { - ref.Namespace = new(ns) - } - return ref -} - -func (c *Client) recordTriggerThrottle(ctx context.Context) { - if c == nil { - return - } - ref := resolveImpulseRefFromEnv() - if ref == nil || ref.Name == "" { - return - } - ns := "" - if ref.Namespace != nil { - ns = *ref.Namespace - } - now := time.Now().UTC() - _ = c.UpdateImpulseTriggerStats(ctx, ref.Name, ns, TriggerStatsDelta{ - ThrottledTriggers: 1, - LastThrottled: &now, - }) -} - -func parentMetadataFromEnv() (map[string]string, map[string]string) { - labels := map[string]string{} - annotations := map[string]string{} - - parentStoryRun := strings.TrimSpace(os.Getenv(contracts.StoryRunIDEnv)) - parentStepRun := strings.TrimSpace(os.Getenv(contracts.StepRunNameEnv)) - parentStep := strings.TrimSpace(os.Getenv(contracts.StepNameEnv)) - - if parentStoryRun != "" { - labels[contracts.ParentStoryRunLabel] = coreidentity.SafeLabelValue(parentStoryRun) - annotations[contracts.ParentStoryRunLabel] = parentStoryRun - } - if parentStep != "" { - labels[contracts.ParentStepLabel] = coreidentity.SafeLabelValue(parentStep) - annotations[contracts.ParentStepLabel] = parentStep - } - if parentStepRun != "" { - labels[contracts.StepRunLabelKey] = coreidentity.SafeLabelValue(parentStepRun) - annotations["bubustack.io/parent-steprun"] = parentStepRun - } - - if len(labels) == 0 { - labels = nil - } - if len(annotations) == 0 { - annotations = nil + if ns, ok := os.LookupEnv("POD_NAMESPACE"); ok { + return ns } - return labels, annotations + // Fallback for local testing. + return "default" } diff --git a/k8s/client_mock.go b/k8s/client_mock.go index 4ce3b6e..2798bb0 100644 --- a/k8s/client_mock.go +++ b/k8s/client_mock.go @@ -1,24 +1,7 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - package k8s import ( "context" - "fmt" "github.com/stretchr/testify/mock" @@ -38,25 +21,15 @@ func (m *MockClient) GetNamespace() string { // TriggerStory mocks the TriggerStory method for testing. func (m *MockClient) TriggerStory( - ctx context.Context, storyName string, storyNamespace string, inputs map[string]any, + ctx context.Context, storyName string, inputs map[string]any, ) (*runsv1alpha1.StoryRun, error) { - args := m.Called(ctx, storyName, storyNamespace, inputs) + args := m.Called(ctx, storyName, inputs) if sr := args.Get(0); sr != nil { - typed, ok := sr.(*runsv1alpha1.StoryRun) - if !ok { - return nil, fmt.Errorf("mock TriggerStory expected *runsv1alpha1.StoryRun, got %T", sr) - } - return typed, args.Error(1) + return sr.(*runsv1alpha1.StoryRun), args.Error(1) } return nil, args.Error(1) } -// StopStoryRun mocks the StopStoryRun method for testing. -func (m *MockClient) StopStoryRun(ctx context.Context, storyRunName, namespace string) error { - args := m.Called(ctx, storyRunName, namespace) - return args.Error(0) -} - // PatchStepRunStatus mocks the PatchStepRunStatus method for testing. func (m *MockClient) PatchStepRunStatus( ctx context.Context, stepRunName string, patchData runsv1alpha1.StepRunStatus, diff --git a/k8s/client_mock_test.go b/k8s/client_mock_test.go deleted file mode 100644 index 2e23b18..0000000 --- a/k8s/client_mock_test.go +++ /dev/null @@ -1,40 +0,0 @@ -package k8s - -import ( - "context" - "strings" - "testing" - - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" -) - -func TestMockClientTriggerStoryReturnsTypedStoryRun(t *testing.T) { - m := &MockClient{} - expected := &runsv1alpha1.StoryRun{} - inputs := map[string]any{"key": "value"} - m.On("TriggerStory", context.Background(), "story", "ns", inputs).Return(expected, nil) - - got, err := m.TriggerStory(context.Background(), "story", "ns", inputs) - if err != nil { - t.Fatalf("TriggerStory() error = %v", err) - } - if got != expected { - t.Fatalf("TriggerStory() returned unexpected StoryRun: got %+v want %+v", got, expected) - } -} - -func TestMockClientTriggerStoryRejectsWrongReturnType(t *testing.T) { - m := &MockClient{} - m.On("TriggerStory", context.Background(), "story", "ns", map[string]any(nil)).Return("not-a-storyrun", nil) - - got, err := m.TriggerStory(context.Background(), "story", "ns", nil) - if got != nil { - t.Fatalf("expected nil StoryRun on wrong type, got %+v", got) - } - if err == nil { - t.Fatal("expected descriptive error for wrong mock return type") - } - if !strings.Contains(err.Error(), "expected *runsv1alpha1.StoryRun") { - t.Fatalf("unexpected error: %v", err) - } -} diff --git a/k8s/client_test.go b/k8s/client_test.go index bd0167b..e2d26c5 100644 --- a/k8s/client_test.go +++ b/k8s/client_test.go @@ -1,327 +1,18 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - package k8s import ( - "bytes" "context" - "crypto/sha256" - "encoding/hex" "encoding/json" - "errors" - "fmt" "os" - "path/filepath" "reflect" - "strings" "testing" - "time" runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - bobrapetv1alpha1 "github.com/bubustack/bobrapet/api/v1alpha1" - "github.com/bubustack/bobrapet/pkg/enums" - "github.com/bubustack/bobrapet/pkg/refs" - runsidentity "github.com/bubustack/bobrapet/pkg/runs/identity" - sdkerrors "github.com/bubustack/bubu-sdk-go/pkg/errors" - "github.com/bubustack/core/contracts" - apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - k8sruntime "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" - "k8s.io/utils/ptr" - ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" ) -type transientGetClient struct { - ctrlclient.Client - getErrors []error - getCalls int -} - -func (c *transientGetClient) Get( - ctx context.Context, - key ctrlclient.ObjectKey, - obj ctrlclient.Object, - opts ...ctrlclient.GetOption, -) error { - c.getCalls++ - if idx := c.getCalls - 1; idx < len(c.getErrors) && c.getErrors[idx] != nil { - return c.getErrors[idx] - } - return c.Client.Get(ctx, key, obj, opts...) -} - -type transientStoryTriggerGetClient struct { - ctrlclient.Client - triggerName string - failuresLeft int - failuresSeen int -} - -func (c *transientStoryTriggerGetClient) Get( - ctx context.Context, - key ctrlclient.ObjectKey, - obj ctrlclient.Object, - opts ...ctrlclient.GetOption, -) error { - if _, ok := obj.(*runsv1alpha1.StoryTrigger); ok && key.Name == c.triggerName && c.failuresLeft > 0 { - c.failuresLeft-- - c.failuresSeen++ - return apierrors.NewTimeoutError("simulated transient get timeout", 1) - } - return c.Client.Get(ctx, key, obj, opts...) -} - -type transientCreateClient struct { - ctrlclient.Client - createErrors []error - createCalls int -} - -func (c *transientCreateClient) Create( - ctx context.Context, - obj ctrlclient.Object, - opts ...ctrlclient.CreateOption, -) error { - c.createCalls++ - if idx := c.createCalls - 1; idx < len(c.createErrors) && c.createErrors[idx] != nil { - return c.createErrors[idx] - } - return c.Client.Create(ctx, obj, opts...) -} - -type storyTriggerResolvingClient struct { - ctrlclient.Client -} - -func newStoryTriggerResolvingFakeClient(objects ...ctrlclient.Object) ctrlclient.Client { //nolint:unparam - base := fake.NewClientBuilder(). - WithScheme(scheme). - WithStatusSubresource( - &runsv1alpha1.StepRun{}, - &runsv1alpha1.StoryRun{}, - &runsv1alpha1.StoryTrigger{}, - &bobrapetv1alpha1.Impulse{}, - ). - WithObjects(objects...). - Build() - return &storyTriggerResolvingClient{Client: base} -} - -func (c *storyTriggerResolvingClient) Create( - ctx context.Context, - obj ctrlclient.Object, - opts ...ctrlclient.CreateOption, -) error { - if err := c.Client.Create(ctx, obj, opts...); err != nil { - return err - } - trigger, ok := obj.(*runsv1alpha1.StoryTrigger) - if !ok { - return nil - } - return c.resolveStoryTrigger(ctx, ctrlclient.ObjectKeyFromObject(trigger)) -} - -func (c *storyTriggerResolvingClient) resolveStoryTrigger(ctx context.Context, key ctrlclient.ObjectKey) error { - trigger := &runsv1alpha1.StoryTrigger{} - if err := c.Client.Get(ctx, key, trigger); err != nil { //nolint:staticcheck - return err - } - if trigger.Status.Decision != "" { - return nil - } - - inputHash, err := runsidentity.ComputeTriggerInputHashFromRawExtension(trigger.Spec.Inputs) - if err != nil { - return err - } - - storyRun := desiredStoryRunForTriggerTest(trigger, inputHash) - decision := runsv1alpha1.StoryTriggerDecisionCreated - reason := "StoryRunCreated" - message := fmt.Sprintf("created StoryRun %s/%s", storyRun.Namespace, storyRun.Name) - - if err := c.Client.Create(ctx, storyRun); err != nil { - if !apierrors.IsAlreadyExists(err) { - return err - } - - existing := &runsv1alpha1.StoryRun{} - //nolint:staticcheck,lll - if getErr := c.Client.Get(ctx, ctrlclient.ObjectKeyFromObject(storyRun), existing); getErr != nil { - return getErr - } - if !storyRunMatchesTriggerForTest(existing, trigger, inputHash) { - decision = runsv1alpha1.StoryTriggerDecisionRejected - reason = "StoryRunConflict" - message = fmt.Sprintf("existing StoryRun %s/%s does not match StoryTrigger storyRef and inputs", existing.Namespace, existing.Name) //nolint:lll - storyRun = nil - } else { - decision = runsv1alpha1.StoryTriggerDecisionReused - reason = "StoryRunReused" - message = fmt.Sprintf("reused existing StoryRun %s/%s", existing.Namespace, existing.Name) - storyRun = existing - } - } - - now := metav1.Now() - trigger.Status.ObservedGeneration = trigger.Generation - if trigger.Status.AcceptedAt == nil { - trigger.Status.AcceptedAt = &now - } - trigger.Status.CompletedAt = &now - trigger.Status.Decision = decision - trigger.Status.Reason = reason - trigger.Status.Message = message - trigger.Status.StoryRunRef = storyRunRefForTest(storyRun) - return c.Client.Status().Update(ctx, trigger) -} - -func desiredStoryRunForTriggerTest(trigger *runsv1alpha1.StoryTrigger, inputHash string) *runsv1alpha1.StoryRun { - identity := runsidentity.StoryTriggerIdentity( - strings.TrimSpace(trigger.Spec.DeliveryIdentity.Key), - strings.TrimSpace(trigger.Spec.DeliveryIdentity.SubmissionID), - ) - storyNamespace := trigger.Spec.StoryRef.ToNamespacedName(trigger).Namespace - - annotations := map[string]string{ - runsidentity.StoryRunTriggerRequestNameAnnotation: trigger.Name, - } - if uid := string(trigger.GetUID()); uid != "" { - annotations[runsidentity.StoryRunTriggerRequestUIDAnnotation] = uid - } - if key := strings.TrimSpace(trigger.Spec.DeliveryIdentity.Key); key != "" { - annotations[runsidentity.StoryRunTriggerTokenAnnotation] = key - annotations[runsidentity.StoryRunTriggerInputHashAnnotation] = inputHash - } - - var impulseRef *refs.ImpulseReference - if trigger.Spec.ImpulseRef != nil { - impulseRef = trigger.Spec.ImpulseRef.DeepCopy() - } - - var inputs *k8sruntime.RawExtension - if trigger.Spec.Inputs != nil { - inputs = trigger.Spec.Inputs.DeepCopy() - } - - return &runsv1alpha1.StoryRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: runsidentity.DeriveStoryRunName(storyNamespace, trigger.Spec.StoryRef.Name, identity), - Namespace: trigger.Namespace, - Annotations: annotations, - }, - Spec: runsv1alpha1.StoryRunSpec{ - StoryRef: *trigger.Spec.StoryRef.DeepCopy(), - ImpulseRef: impulseRef, - Inputs: inputs, - }, - } -} - -func storyRunMatchesTriggerForTest(existing *runsv1alpha1.StoryRun, trigger *runsv1alpha1.StoryTrigger, triggerHash string) bool { //nolint:lll - if existing == nil || trigger == nil { - return false - } - if existing.Spec.StoryRef.ToNamespacedName(existing) != trigger.Spec.StoryRef.ToNamespacedName(trigger) { - return false - } - if strings.TrimSpace(existing.Spec.StoryRef.Version) != strings.TrimSpace(trigger.Spec.StoryRef.Version) { - return false - } - existingHash, err := runsidentity.ComputeTriggerInputHashFromRawExtension(existing.Spec.Inputs) - if err != nil { - return false - } - return existingHash == triggerHash -} - -func storyRunRefForTest(storyRun *runsv1alpha1.StoryRun) *refs.StoryRunReference { - if storyRun == nil { - return nil - } - ref := &refs.StoryRunReference{ - ObjectReference: refs.ObjectReference{ - Name: storyRun.Name, - }, - } - if storyRun.Namespace != "" { - namespace := storyRun.Namespace - ref.Namespace = &namespace - } - return ref -} - -func singleStoryTrigger(t *testing.T, c ctrlclient.Client, namespace string) *runsv1alpha1.StoryTrigger { - t.Helper() - list := &runsv1alpha1.StoryTriggerList{} - if err := c.List(context.Background(), list, ctrlclient.InNamespace(namespace)); err != nil { - t.Fatalf("failed to list StoryTriggers: %v", err) - } - if len(list.Items) != 1 { - t.Fatalf("expected exactly 1 StoryTrigger, got %d", len(list.Items)) - } - return list.Items[0].DeepCopy() -} - -type timeoutGetClient struct { - ctrlclient.Client -} - -func (c *timeoutGetClient) Get( - ctx context.Context, - key ctrlclient.ObjectKey, - obj ctrlclient.Object, - opts ...ctrlclient.GetOption, -) error { - return apierrors.NewTimeoutError("simulated timeout", 1) -} - -type transientStatusPatchClient struct { - ctrlclient.Client - statusWriter ctrlclient.SubResourceWriter -} - -func (c *transientStatusPatchClient) Status() ctrlclient.SubResourceWriter { - return c.statusWriter -} - -type transientStatusWriter struct { - ctrlclient.SubResourceWriter - patchErrors []error - patchCalls int -} - -func (w *transientStatusWriter) Patch( - ctx context.Context, - obj ctrlclient.Object, - patch ctrlclient.Patch, - opts ...ctrlclient.SubResourcePatchOption, -) error { - w.patchCalls++ - if idx := w.patchCalls - 1; idx < len(w.patchErrors) && w.patchErrors[idx] != nil { - return w.patchErrors[idx] - } - return w.SubResourceWriter.Patch(ctx, obj, patch, opts...) -} - func TestGetPodNamespace(t *testing.T) { testCases := []struct { name string @@ -329,49 +20,60 @@ func TestGetPodNamespace(t *testing.T) { expected string }{ { - name: contracts.TargetStoryNamespaceEnv + " should have the highest precedence", + name: "BUBU_TARGET_STORY_NAMESPACE should have the highest precedence", envVars: map[string]string{ - contracts.TargetStoryNamespaceEnv: "story-ns", - contracts.ImpulseNamespaceEnv: "impulse-ns", - contracts.StepRunNamespaceEnv: "steprun-ns", - contracts.PodNamespaceEnv: "pod-ns", + "BUBU_TARGET_STORY_NAMESPACE": "story-ns", + "BUBU_IMPULSE_NAMESPACE": "impulse-ns", + "BUBU_STEPRUN_NAMESPACE": "steprun-ns", + "BUBU_POD_NAMESPACE": "pod-ns", + "POD_NAMESPACE": "k8s-pod-ns", }, expected: "story-ns", }, { - name: contracts.ImpulseNamespaceEnv + " should have second precedence", + name: "BUBU_IMPULSE_NAMESPACE should have second precedence", envVars: map[string]string{ - contracts.ImpulseNamespaceEnv: "impulse-ns", - contracts.StepRunNamespaceEnv: "steprun-ns", - contracts.PodNamespaceEnv: "pod-ns", + "BUBU_IMPULSE_NAMESPACE": "impulse-ns", + "BUBU_STEPRUN_NAMESPACE": "steprun-ns", + "BUBU_POD_NAMESPACE": "pod-ns", + "POD_NAMESPACE": "k8s-pod-ns", }, expected: "impulse-ns", }, { - name: contracts.StepRunNamespaceEnv + " should have third precedence", + name: "BUBU_STEPRUN_NAMESPACE should have third precedence", envVars: map[string]string{ - contracts.StepRunNamespaceEnv: "steprun-ns", - contracts.PodNamespaceEnv: "pod-ns", + "BUBU_STEPRUN_NAMESPACE": "steprun-ns", + "BUBU_POD_NAMESPACE": "pod-ns", + "POD_NAMESPACE": "k8s-pod-ns", }, expected: "steprun-ns", }, { - name: contracts.PodNamespaceEnv + " should have fourth precedence", + name: "BUBU_POD_NAMESPACE should have fourth precedence", envVars: map[string]string{ - contracts.PodNamespaceEnv: "pod-ns", + "BUBU_POD_NAMESPACE": "pod-ns", + "POD_NAMESPACE": "k8s-pod-ns", }, expected: "pod-ns", }, { - name: "Should return empty string if no env vars are set", + name: "POD_NAMESPACE should have fifth precedence", + envVars: map[string]string{ + "POD_NAMESPACE": "k8s-pod-ns", + }, + expected: "k8s-pod-ns", + }, + { + name: "Should fallback to default if no env vars are set", envVars: map[string]string{}, - expected: "", + expected: "default", }, { name: "Should ignore empty env vars and use the next in precedence", envVars: map[string]string{ - contracts.TargetStoryNamespaceEnv: "", - contracts.ImpulseNamespaceEnv: "impulse-ns", + "BUBU_TARGET_STORY_NAMESPACE": "", + "BUBU_IMPULSE_NAMESPACE": "impulse-ns", }, expected: "impulse-ns", }, @@ -379,19 +81,26 @@ func TestGetPodNamespace(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - for _, key := range []string{ - contracts.TargetStoryNamespaceEnv, - contracts.ImpulseNamespaceEnv, - contracts.StepRunNamespaceEnv, - contracts.PodNamespaceEnv, - } { - t.Setenv(key, "") - } + // Clean up env vars before each test + os.Clearenv() for key, value := range tc.envVars { - t.Setenv(key, value) + err := os.Setenv(key, value) + if err != nil { + t.Fatal(err) + } } + // Defer cleanup to after the test + defer func() { + for key := range tc.envVars { + err := os.Unsetenv(key) + if err != nil { + t.Fatal(err) + } + } + }() + if got := getPodNamespace(); got != tc.expected { t.Errorf("getPodNamespace() = %v, want %v", got, tc.expected) } @@ -399,62 +108,6 @@ func TestGetPodNamespace(t *testing.T) { } } -func TestResolveImpulseRefFromEnv(t *testing.T) { - t.Run("missing env returns nil", func(t *testing.T) { - t.Setenv(contracts.ImpulseNameEnv, "") - t.Setenv(contracts.ImpulseNamespaceEnv, "") - if ref := resolveImpulseRefFromEnv(); ref != nil { - t.Fatalf("expected nil impulse ref, got %+v", ref) - } - }) - - t.Run("name only populates reference", func(t *testing.T) { - t.Setenv(contracts.ImpulseNameEnv, "impulse-a") - t.Setenv(contracts.ImpulseNamespaceEnv, "") - ref := resolveImpulseRefFromEnv() - if ref == nil { - t.Fatal("expected impulse ref, got nil") - } - if ref.Name != "impulse-a" { - t.Fatalf("expected impulse name impulse-a, got %q", ref.Name) - } - if ref.Namespace != nil { - t.Fatalf("expected nil namespace, got %q", *ref.Namespace) - } - }) - - t.Run("name and namespace populated", func(t *testing.T) { - t.Setenv(contracts.ImpulseNameEnv, "impulse-b") - t.Setenv(contracts.ImpulseNamespaceEnv, "impulse-ns") - ref := resolveImpulseRefFromEnv() - if ref == nil { - t.Fatal("expected impulse ref, got nil") - } - if ref.Name != "impulse-b" { - t.Fatalf("expected impulse name impulse-b, got %q", ref.Name) - } - if ref.Namespace == nil || *ref.Namespace != "impulse-ns" { - t.Fatalf("expected namespace impulse-ns, got %#v", ref.Namespace) - } - }) -} - -func TestWithTriggerToken_AllowsNilContextAndStoresToken(t *testing.T) { - ctx := WithTriggerToken(nil, "token-123") //nolint:staticcheck - if ctx == nil { - t.Fatal("expected context when attaching token to nil context") - } - if got := TriggerTokenFromContext(ctx); got != "token-123" { - t.Fatalf("TriggerTokenFromContext() = %q, want %q", got, "token-123") - } -} - -func TestWithTriggerToken_EmptyTokenPreservesNilContext(t *testing.T) { - if got := WithTriggerToken(nil, ""); got != nil { //nolint:staticcheck - t.Fatalf("expected nil context passthrough for empty token, got %#v", got) - } -} - func TestInit(t *testing.T) { // Test that the init function sets up the scheme properly // We can't directly test init(), but we can verify the scheme variable is not nil @@ -480,146 +133,8 @@ func TestClientStructure(t *testing.T) { } } -func TestDeriveStoryRunName(t *testing.T) { - tests := []struct { - name string - storyNamespace string - storyName string - token string - }{ - { - name: "simple alphanumeric token", - storyName: "workflow", - token: "abc123", - }, - { - name: "token with invalid characters gets sanitized", - storyName: "workflow", - token: "Order 42 / Completed", - }, - { - name: "token differing only by case still gets unique suffix", - storyName: "workflow", - token: "Order-42", - }, - { - name: "long token falls back to hash suffix", - storyName: "workflow", - token: strings.Repeat("a", 300), - }, - { - name: "token with only invalid characters falls back to hash", - storyName: "workflow", - token: "!!!", - }, - { - name: "long story name still respects metadata length", - storyName: strings.Repeat("b", 250), - token: "token", - }, - { - name: "cross-namespace tokens always include hash", - storyNamespace: "team-a", - storyName: "workflow", - token: "deploy-42", - }, - } - - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - got := deriveStoryRunName(tc.storyNamespace, tc.storyName, tc.token) - want := runsidentity.DeriveStoryRunName(tc.storyNamespace, tc.storyName, tc.token) - if got != want { - t.Fatalf("deriveStoryRunName() = %q, want %q", got, want) - } - if len(got) > metadataNameMaxLength { - t.Fatalf("deriveStoryRunName() length = %d, exceeds %d", len(got), metadataNameMaxLength) - } - }) - } -} - -func TestDeriveStorageInputKeyUsesInputHash(t *testing.T) { - storyNamespace := "team-a" - storyName := "workflow" - token := "token-123" - - inputsA := map[string]any{"key": "value"} - fingerprintA := computeInputFingerprint(inputsA) - keyOne := deriveStorageInputKey(storyNamespace, storyName, token, fingerprintA) - keyTwo := deriveStorageInputKey(storyNamespace, storyName, token, fingerprintA) - if keyOne != keyTwo { - t.Fatalf("expected identical keys for identical inputs, got %q vs %q", keyOne, keyTwo) - } - - inputsB := map[string]any{"key": "different"} - fingerprintB := computeInputFingerprint(inputsB) - if fingerprintB == fingerprintA { - t.Fatalf("fingerprints should differ for different payloads") - } - keyThree := deriveStorageInputKey(storyNamespace, storyName, token, fingerprintB) - if keyThree == keyOne { - t.Fatalf("storage key should change when input fingerprint changes (got %q)", keyThree) - } -} - -func TestComputeInputFingerprintDeterministic(t *testing.T) { - inputOne := map[string]any{ - "alpha": "a", - "beta": []any{map[string]any{"nested": 1}, "value"}, - } - inputTwo := map[string]any{ - "beta": []any{map[string]any{"nested": 1}, "value"}, - "alpha": "a", - } - - hashOne := computeInputFingerprint(inputOne) - hashTwo := computeInputFingerprint(inputTwo) - if hashOne == "" || hashTwo == "" { - t.Fatalf("expected non-empty fingerprints (hashOne=%q, hashTwo=%q)", hashOne, hashTwo) - } - if hashOne != hashTwo { - t.Fatalf("expected deterministic fingerprint, got %q vs %q", hashOne, hashTwo) - } -} - -func TestStoryTriggerRequestMatchesNamespace(t *testing.T) { - payload := []byte(`{"key":"value"}`) - buildTrigger := func(ns string) *runsv1alpha1.StoryTrigger { - storyRef := refs.StoryReference{ - ObjectReference: refs.ObjectReference{Name: "workflow"}, - } - if ns != "" { - storyRef.Namespace = &ns - } - return &runsv1alpha1.StoryTrigger{ - Spec: runsv1alpha1.StoryTriggerSpec{ - StoryRef: storyRef, - Inputs: &k8sruntime.RawExtension{Raw: payload}, - DeliveryIdentity: runsv1alpha1.TriggerDeliveryIdentity{ - SubmissionID: "submission", - }, - }, - } - } - - a := buildTrigger("team-a") - b := buildTrigger("team-b") - defaultNs := buildTrigger("") - - if !storyTriggerRequestMatches(a, buildTrigger("team-a")) { - t.Fatalf("expected StoryTriggers in the same namespace to match") - } - if storyTriggerRequestMatches(a, b) { - t.Fatalf("expected StoryTriggers in different namespaces not to match") - } - if storyTriggerRequestMatches(a, defaultNs) { - t.Fatalf("expected StoryTriggers with nil namespace pointers not to match explicit namespaces") - } -} - func TestTriggerStory(t *testing.T) { - fakeClient := newStoryTriggerResolvingFakeClient() + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithStatusSubresource(&runsv1alpha1.StepRun{}).Build() c := &Client{ Client: fakeClient, namespace: "test-ns", @@ -632,7 +147,7 @@ func TestTriggerStory(t *testing.T) { "param2": 123, } - storyRun, err := c.TriggerStory(ctx, storyName, "", inputs) + storyRun, err := c.TriggerStory(ctx, storyName, inputs) if err != nil { t.Fatalf("TriggerStory() failed: %v", err) } @@ -641,1436 +156,72 @@ func TestTriggerStory(t *testing.T) { t.Fatal("TriggerStory() returned nil storyRun") } - createdTrigger := singleStoryTrigger(t, fakeClient, "test-ns") - if createdTrigger.Namespace != "test-ns" { //nolint:goconst - t.Errorf("StoryTrigger namespace = %v, want 'test-ns'", createdTrigger.Namespace) - } - if createdTrigger.Spec.StoryRef.Name != storyName { - t.Errorf("StoryTrigger storyRef name = %v, want '%s'", createdTrigger.Spec.StoryRef.Name, storyName) - } - - var createdInputs map[string]any - if err := json.Unmarshal(createdTrigger.Spec.Inputs.Raw, &createdInputs); err != nil { - t.Fatalf("Failed to unmarshal inputs from created StoryTrigger: %v", err) - } - - expectedInputs := map[string]any{ - "param1": "value1", - "param2": float64(123), - } - if !reflect.DeepEqual(createdInputs, expectedInputs) { - t.Errorf("StoryTrigger inputs = %v, want %v", createdInputs, expectedInputs) - } - + // Verify the created StoryRun createdStoryRun := &runsv1alpha1.StoryRun{} err = fakeClient.Get(ctx, types.NamespacedName{Name: storyRun.Name, Namespace: storyRun.Namespace}, createdStoryRun) if err != nil { t.Fatalf("Failed to get created StoryRun: %v", err) } + + if createdStoryRun.Namespace != "test-ns" { + t.Errorf("StoryRun namespace = %v, want 'test-ns'", createdStoryRun.Namespace) + } if createdStoryRun.Spec.StoryRef.Name != storyName { t.Errorf("StoryRun storyRef name = %v, want '%s'", createdStoryRun.Spec.StoryRef.Name, storyName) } -} - -func TestTriggerStoryIncludesImpulseRef(t *testing.T) { - t.Setenv(contracts.ImpulseNameEnv, "live-impulse") - t.Setenv(contracts.ImpulseNamespaceEnv, "impulse-ns") - - fakeClient := newStoryTriggerResolvingFakeClient() - c := &Client{ - Client: fakeClient, - namespace: "stories", - } - ctx := context.Background() - storyRun, err := c.TriggerStory(ctx, "impulse-story", "", map[string]any{}) - if err != nil { - t.Fatalf("TriggerStory() failed: %v", err) - } - if storyRun == nil { - t.Fatal("TriggerStory() returned nil storyRun") + var createdInputs map[string]any + if err := json.Unmarshal(createdStoryRun.Spec.Inputs.Raw, &createdInputs); err != nil { + t.Fatalf("Failed to unmarshal inputs from created StoryRun: %v", err) } - created := singleStoryTrigger(t, fakeClient, "stories") - if created.Spec.ImpulseRef == nil { - t.Fatal("expected impulseRef populated on StoryTrigger") - } - if created.Spec.ImpulseRef.Name != "live-impulse" { - t.Fatalf("expected impulse name live-impulse, got %q", created.Spec.ImpulseRef.Name) + // JSON unmarshaling converts numbers to float64 + expectedInputs := map[string]any{ + "param1": "value1", + "param2": float64(123), } - if created.Spec.ImpulseRef.Namespace == nil || *created.Spec.ImpulseRef.Namespace != "impulse-ns" { - t.Fatalf("expected impulse namespace impulse-ns, got %#v", created.Spec.ImpulseRef.Namespace) + if !reflect.DeepEqual(createdInputs, expectedInputs) { + t.Errorf("StoryRun inputs = %v, want %v", createdInputs, expectedInputs) } } -func TestTriggerStoryDeterministicStorageKey(t *testing.T) { - t.Setenv(contracts.StorageProviderEnv, "file") - storageDir := t.TempDir() - t.Setenv(contracts.StoragePathEnv, storageDir) - t.Setenv(contracts.MaxInlineSizeEnv, "1") - - fakeClient := newStoryTriggerResolvingFakeClient() - c := &Client{ - Client: fakeClient, - namespace: "test-ns", - } - - ctx := WithTriggerToken(context.Background(), "consistent-token") - storyName := "deterministic-story" - inputs := map[string]any{ - "payload": strings.Repeat("x", 2048), - } - - first, err := c.TriggerStory(ctx, storyName, "", inputs) - if err != nil { - t.Fatalf("first TriggerStory() call failed: %v", err) - } - second, err := c.TriggerStory(ctx, storyName, "", inputs) - if err != nil { - t.Fatalf("second TriggerStory() call failed: %v", err) - } - - ref1 := storageRefFromStoryRun(t, first) - ref2 := storageRefFromStoryRun(t, second) - - if ref1 != ref2 { - t.Fatalf("storage reference mismatch between idempotent triggers: %q != %q", ref1, ref2) +func TestPatchStepRunStatus(t *testing.T) { + stepRun := &runsv1alpha1.StepRun{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-step-run", + Namespace: "test-ns", + }, } - if _, err := os.Stat(filepath.Join(storageDir, ref1)); err != nil { - t.Fatalf("expected offloaded inputs at %s: %v", filepath.Join(storageDir, ref1), err) - } -} + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithStatusSubresource(&runsv1alpha1.StepRun{}). + WithObjects(stepRun).Build() -func TestTriggerStoryTokenMismatchErrors(t *testing.T) { - fakeClient := newStoryTriggerResolvingFakeClient() c := &Client{ Client: fakeClient, namespace: "test-ns", } - ctx := WithTriggerToken(context.Background(), "consistent-token") - storyName := "idempotent-story" - if _, err := c.TriggerStory(ctx, storyName, "", map[string]any{"value": "first"}); err != nil { - t.Fatalf("initial TriggerStory() call failed: %v", err) - } - - if _, err := c.TriggerStory(ctx, storyName, "", map[string]any{"value": "second"}); err == nil { - t.Fatal("expected error when reusing trigger token with different inputs") - } -} - -func TestTriggerStoryContextTokenOverridesEnv(t *testing.T) { - t.Setenv(contracts.TriggerTokenEnv, "env-token") - - fakeClient := newStoryTriggerResolvingFakeClient() - c := &Client{ - Client: fakeClient, - namespace: "token-ns", - } - - ctx := WithTriggerToken(context.Background(), "ctx-token") - storyName := "token-story" - sr, err := c.TriggerStory(ctx, storyName, "", map[string]any{}) - if err != nil { - t.Fatalf("TriggerStory() error = %v", err) - } - expected := deriveStoryRunName("token-ns", storyName, "ctx-token") - if sr == nil || sr.Name != expected { - t.Fatalf("expected StoryRun name %q, got %+v", expected, sr) - } - created := singleStoryTrigger(t, fakeClient, "token-ns") - inputBytes, _ := json.Marshal(map[string]any{}) - expectedHash, err := runsidentity.ComputeTriggerInputHash(inputBytes) - if err != nil { - t.Fatalf("failed to compute trigger input hash: %v", err) - } - if created.Spec.DeliveryIdentity.Mode == nil || *created.Spec.DeliveryIdentity.Mode != bobrapetv1alpha1.TriggerDedupeToken { //nolint:lll - t.Fatalf("expected token delivery identity, got %#v", created.Spec.DeliveryIdentity.Mode) - } - if got := created.Spec.DeliveryIdentity.Key; got != "ctx-token" { - t.Fatalf("expected delivery identity key ctx-token, got %q", got) - } - if got := created.Spec.DeliveryIdentity.SubmissionID; got != "ctx-token" { - t.Fatalf("expected submissionID ctx-token, got %q", got) - } - if got := created.Spec.DeliveryIdentity.InputHash; got != expectedHash { - t.Fatalf("expected trigger input hash %q, got %q", expectedHash, got) - } -} - -func TestTriggerStoryNilInputsUsesEmptyObject(t *testing.T) { - fakeClient := newStoryTriggerResolvingFakeClient() - c := &Client{ - Client: fakeClient, - namespace: "token-ns", + ctx := context.Background() + patchData := runsv1alpha1.StepRunStatus{ + Phase: "Succeeded", } - ctx := WithTriggerToken(context.Background(), "ctx-token") - storyName := "nil-inputs-story" - sr, err := c.TriggerStory(ctx, storyName, "", nil) + err := c.PatchStepRunStatus(ctx, "test-step-run", patchData) if err != nil { - t.Fatalf("TriggerStory() error = %v", err) - } - expected := deriveStoryRunName("token-ns", storyName, "ctx-token") - if sr == nil || sr.Name != expected { - t.Fatalf("expected StoryRun name %q, got %+v", expected, sr) + t.Fatalf("PatchStepRunStatus() failed: %v", err) } - created := singleStoryTrigger(t, fakeClient, "token-ns") - if created.Spec.Inputs == nil { - t.Fatalf("expected StoryTrigger inputs to be set") - } - if got := bytes.TrimSpace(created.Spec.Inputs.Raw); string(got) != "{}" { - t.Fatalf("expected StoryTrigger inputs to be {}, got %q", string(got)) - } - expectedHash, err := runsidentity.ComputeTriggerInputHash([]byte(`{}`)) + // Verify the patch + updatedStepRun := &runsv1alpha1.StepRun{} + err = fakeClient.Get(ctx, types.NamespacedName{Name: "test-step-run", Namespace: "test-ns"}, updatedStepRun) if err != nil { - t.Fatalf("failed to compute trigger input hash: %v", err) - } - if got := created.Spec.DeliveryIdentity.InputHash; got != expectedHash { - t.Fatalf("expected trigger input hash %q, got %q", expectedHash, got) - } -} - -func TestTriggerStoryDedupeKeyPolicy(t *testing.T) { - t.Setenv(triggerDedupeModeEnv, "key") - t.Setenv(triggerDedupeKeyTemplateEnv, "{{ inputs.eventId }}") - - fakeClient := newStoryTriggerResolvingFakeClient() - c := &Client{ - Client: fakeClient, - namespace: "token-ns", + t.Fatalf("Failed to get updated StepRun: %v", err) } - ctx := context.Background() - storyName := "policy-story" - inputs := map[string]any{"eventId": "evt-123"} - sr, err := c.TriggerStory(ctx, storyName, "", inputs) - if err != nil { - t.Fatalf("TriggerStory() error = %v", err) - } - - hash := sha256.Sum256([]byte("evt-123")) - token := hex.EncodeToString(hash[:]) - expected := deriveStoryRunName("token-ns", storyName, token) - if sr == nil || sr.Name != expected { - t.Fatalf("expected StoryRun name %q, got %+v", expected, sr) - } - - created := singleStoryTrigger(t, fakeClient, "token-ns") - inputBytes, _ := json.Marshal(inputs) - expectedHash, err := runsidentity.ComputeTriggerInputHash(inputBytes) - if err != nil { - t.Fatalf("failed to compute trigger input hash: %v", err) - } - if created.Spec.DeliveryIdentity.Mode == nil || *created.Spec.DeliveryIdentity.Mode != bobrapetv1alpha1.TriggerDedupeKey { //nolint:lll - t.Fatalf("expected key delivery identity, got %#v", created.Spec.DeliveryIdentity.Mode) - } - if got := created.Spec.DeliveryIdentity.Key; got != token { - t.Fatalf("expected delivery identity key %q, got %q", token, got) - } - if got := created.Spec.DeliveryIdentity.SubmissionID; got != token { - t.Fatalf("expected submissionID %q, got %q", token, got) - } - if got := created.Spec.DeliveryIdentity.InputHash; got != expectedHash { - t.Fatalf("expected trigger input hash %q, got %q", expectedHash, got) - } -} - -func TestTriggerStoryTokenPolicyRequiresToken(t *testing.T) { - t.Setenv(triggerDedupeModeEnv, "token") - - fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithStatusSubresource(&runsv1alpha1.StepRun{}).Build() - c := &Client{ - Client: fakeClient, - namespace: "token-ns", - } - - if _, err := c.TriggerStory(context.Background(), "token-policy-story", "", map[string]any{}); err == nil { - t.Fatal("expected error when trigger token is required but missing") - } -} - -func TestTriggerStory_RetriesAlreadyExistsGetFailureAndSucceeds(t *testing.T) { - t.Setenv(triggerRetryMaxAttemptsEnv, "2") - t.Setenv(triggerRetryBackoffEnv, "constant") - t.Setenv(triggerRetryBaseDelayEnv, "1ms") - t.Setenv(triggerRetryMaxDelayEnv, "2ms") - - const namespace = "retry-ns" - const storyName = "retry-story" - inputs := map[string]any{"value": "stable"} - inputBytes, err := json.Marshal(inputs) - if err != nil { - t.Fatalf("marshal inputs: %v", err) - } - ctx := WithTriggerToken(context.Background(), "retry-token") - storyRunName := deriveStoryRunName(namespace, storyName, "retry-token") - triggerName := runsidentity.DeriveStoryTriggerName(namespace, storyName, "retry-token", "retry-token") - storyRefNamespace := namespace - - existingRun := &runsv1alpha1.StoryRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: storyRunName, - Namespace: namespace, - Annotations: map[string]string{ - runsidentity.StoryRunTriggerRequestNameAnnotation: triggerName, - runsidentity.StoryRunTriggerTokenAnnotation: "retry-token", - }, - }, - Spec: runsv1alpha1.StoryRunSpec{ - StoryRef: refs.StoryReference{ - ObjectReference: refs.ObjectReference{Name: storyName}, - }, - Inputs: &k8sruntime.RawExtension{Raw: inputBytes}, - }, - } - inputHash, err := runsidentity.ComputeTriggerInputHash(inputBytes) - if err != nil { - t.Fatalf("compute trigger input hash: %v", err) - } - existingRun.Annotations[runsidentity.StoryRunTriggerInputHashAnnotation] = inputHash - - existingTrigger := &runsv1alpha1.StoryTrigger{ - ObjectMeta: metav1.ObjectMeta{ - Name: triggerName, - Namespace: namespace, - }, - Spec: runsv1alpha1.StoryTriggerSpec{ - StoryRef: refs.StoryReference{ - ObjectReference: refs.ObjectReference{Name: storyName}, - }, - Inputs: &k8sruntime.RawExtension{Raw: inputBytes}, - DeliveryIdentity: runsv1alpha1.TriggerDeliveryIdentity{ - Mode: ptr.To(bobrapetv1alpha1.TriggerDedupeToken), - Key: "retry-token", - InputHash: inputHash, - SubmissionID: "retry-token", - }, - }, - Status: runsv1alpha1.StoryTriggerStatus{ - Decision: runsv1alpha1.StoryTriggerDecisionCreated, - StoryRunRef: &refs.StoryRunReference{ - ObjectReference: refs.ObjectReference{Name: storyRunName, Namespace: &storyRefNamespace}, - }, - }, - } - - baseClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithStatusSubresource(&runsv1alpha1.StepRun{}, &runsv1alpha1.StoryRun{}, &runsv1alpha1.StoryTrigger{}). - WithObjects(existingRun, existingTrigger). - Build() - retryingClient := &transientStoryTriggerGetClient{ - Client: baseClient, - triggerName: triggerName, - failuresLeft: 1, - } - c := &Client{ - Client: retryingClient, - namespace: namespace, - } - - got, err := c.TriggerStory(ctx, storyName, "", inputs) - if err != nil { - t.Fatalf("TriggerStory() failed: %v", err) - } - if got == nil || got.Name != storyRunName { - t.Fatalf("expected StoryRun %q, got %+v", storyRunName, got) - } - if retryingClient.failuresSeen != 1 { - t.Fatalf("expected exactly 1 transient Get failure, saw %d", retryingClient.failuresSeen) - } -} - -func TestTriggerStory_RetriesTransientCreateWithoutToken(t *testing.T) { - t.Setenv(triggerRetryMaxAttemptsEnv, "2") - t.Setenv(triggerRetryBackoffEnv, "constant") - t.Setenv(triggerRetryBaseDelayEnv, "1ms") - t.Setenv(triggerRetryMaxDelayEnv, "2ms") - - baseClient := newStoryTriggerResolvingFakeClient() - retryingClient := &transientCreateClient{ - Client: baseClient, - createErrors: []error{apierrors.NewTimeoutError("simulated transient create timeout", 1)}, - } - c := &Client{ - Client: retryingClient, - namespace: "retry-ns", - } - - got, err := c.TriggerStory(context.Background(), "untokened-story", "", map[string]any{"value": "once"}) - if err != nil { - t.Fatalf("expected retry to recover for untokened StoryTrigger create, got %v", err) - } - if got == nil { - t.Fatal("expected resolved StoryRun after retry") - } - if retryingClient.createCalls != 2 { - t.Fatalf("expected untokened StoryTrigger create to retry once, got %d calls", retryingClient.createCalls) - } -} - -func TestTriggerStory_RetriesTransientCreateWhenTokenMakesRequestIdempotent(t *testing.T) { - t.Setenv(triggerRetryMaxAttemptsEnv, "2") - t.Setenv(triggerRetryBackoffEnv, "constant") - t.Setenv(triggerRetryBaseDelayEnv, "1ms") - t.Setenv(triggerRetryMaxDelayEnv, "2ms") - - const namespace = "retry-ns" - const storyName = "tokened-retry-story" - ctx := WithTriggerToken(context.Background(), "retry-token") - - baseClient := newStoryTriggerResolvingFakeClient() - retryingClient := &transientCreateClient{ - Client: baseClient, - createErrors: []error{apierrors.NewTimeoutError("simulated transient create timeout", 1)}, - } - c := &Client{ - Client: retryingClient, - namespace: namespace, - } - - got, err := c.TriggerStory(ctx, storyName, "", map[string]any{"value": "stable"}) - if err != nil { - t.Fatalf("expected retry to recover for idempotent create, got %v", err) - } - expectedName := deriveStoryRunName(namespace, storyName, "retry-token") - if got == nil || got.Name != expectedName { - t.Fatalf("expected StoryRun %q after retry, got %+v", expectedName, got) - } - if retryingClient.createCalls != 2 { - t.Fatalf("expected tokened trigger create to retry once, got %d calls", retryingClient.createCalls) - } - - var created runsv1alpha1.StoryRun //nolint:staticcheck - //nolint:lll,staticcheck - if err := retryingClient.Client.Get(ctx, types.NamespacedName{Name: expectedName, Namespace: namespace}, &created); err != nil { - t.Fatalf("failed to fetch retried StoryRun: %v", err) - } -} - -func TestTriggerStory_RetryExhaustedOnAlreadyExistsGetFailure(t *testing.T) { - t.Setenv(triggerRetryMaxAttemptsEnv, "2") - t.Setenv(triggerRetryBackoffEnv, "constant") - t.Setenv(triggerRetryBaseDelayEnv, "1ms") - t.Setenv(triggerRetryMaxDelayEnv, "2ms") - - const namespace = "retry-ns" - const storyName = "retry-story-exhausted" - inputs := map[string]any{"value": "stable"} - inputBytes, err := json.Marshal(inputs) - if err != nil { - t.Fatalf("marshal inputs: %v", err) - } - ctx := WithTriggerToken(context.Background(), "retry-token-exhausted") - storyRunName := deriveStoryRunName(namespace, storyName, "retry-token-exhausted") - triggerName := runsidentity.DeriveStoryTriggerName(namespace, storyName, "retry-token-exhausted", "retry-token-exhausted") //nolint:lll - storyRefNamespace := namespace - - existingRun := &runsv1alpha1.StoryRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: storyRunName, - Namespace: namespace, - Annotations: map[string]string{ - runsidentity.StoryRunTriggerRequestNameAnnotation: triggerName, - runsidentity.StoryRunTriggerTokenAnnotation: "retry-token-exhausted", - }, - }, - Spec: runsv1alpha1.StoryRunSpec{ - StoryRef: refs.StoryReference{ - ObjectReference: refs.ObjectReference{Name: storyName}, - }, - Inputs: &k8sruntime.RawExtension{Raw: inputBytes}, - }, - } - inputHash, err := runsidentity.ComputeTriggerInputHash(inputBytes) - if err != nil { - t.Fatalf("compute trigger input hash: %v", err) - } - existingRun.Annotations[runsidentity.StoryRunTriggerInputHashAnnotation] = inputHash - - existingTrigger := &runsv1alpha1.StoryTrigger{ - ObjectMeta: metav1.ObjectMeta{ - Name: triggerName, - Namespace: namespace, - }, - Spec: runsv1alpha1.StoryTriggerSpec{ - StoryRef: refs.StoryReference{ - ObjectReference: refs.ObjectReference{Name: storyName}, - }, - Inputs: &k8sruntime.RawExtension{Raw: inputBytes}, - DeliveryIdentity: runsv1alpha1.TriggerDeliveryIdentity{ - Mode: ptr.To(bobrapetv1alpha1.TriggerDedupeToken), - Key: "retry-token-exhausted", - InputHash: inputHash, - SubmissionID: "retry-token-exhausted", - }, - }, - Status: runsv1alpha1.StoryTriggerStatus{ - Decision: runsv1alpha1.StoryTriggerDecisionCreated, - StoryRunRef: &refs.StoryRunReference{ - ObjectReference: refs.ObjectReference{Name: storyRunName, Namespace: &storyRefNamespace}, - }, - }, - } - - baseClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithStatusSubresource(&runsv1alpha1.StepRun{}, &runsv1alpha1.StoryRun{}, &runsv1alpha1.StoryTrigger{}). - WithObjects(existingRun, existingTrigger). - Build() - retryingClient := &transientStoryTriggerGetClient{ - Client: baseClient, - triggerName: triggerName, - failuresLeft: 2, // exhaust both attempts - } - c := &Client{ - Client: retryingClient, - namespace: namespace, - } - - got, err := c.TriggerStory(ctx, storyName, "", inputs) - if err == nil { - t.Fatal("expected retry exhaustion error, got nil") - } - if got != nil { - t.Fatalf("expected nil StoryRun on error, got %+v", got) - } - if !errors.Is(err, sdkerrors.ErrRetryable) { - t.Fatalf("expected retryable error, got %v", err) - } - if retryingClient.failuresSeen != 2 { - t.Fatalf("expected 2 transient Get failures, saw %d", retryingClient.failuresSeen) - } -} - -func TestStopStoryRun_RequestsGracefulCancelForRunningStoryRun(t *testing.T) { - startedAt := metav1.NewTime(time.Now().Add(-95 * time.Second).UTC().Truncate(time.Second)) - storyRun := &runsv1alpha1.StoryRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: "storyrun-running", - Namespace: "test-ns", - }, - Status: runsv1alpha1.StoryRunStatus{ - Phase: enums.PhaseRunning, - StartedAt: &startedAt, - }, - } - - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithStatusSubresource(&runsv1alpha1.StoryRun{}). - WithObjects(storyRun). - Build() - c := &Client{ - Client: fakeClient, - namespace: "test-ns", - } - - if err := c.StopStoryRun(context.Background(), "storyrun-running", ""); err != nil { - t.Fatalf("StopStoryRun() failed: %v", err) - } - - var updated runsv1alpha1.StoryRun - if err := fakeClient.Get(context.Background(), types.NamespacedName{ - Name: "storyrun-running", - Namespace: "test-ns", - }, &updated); err != nil { - t.Fatalf("failed to get updated StoryRun: %v", err) - } - if updated.Status.Phase != enums.PhaseRunning { - t.Fatalf("StoryRun phase = %s, want %s", updated.Status.Phase, enums.PhaseRunning) - } - if updated.Status.Message != "" { - t.Fatalf("StoryRun message = %q, want empty", updated.Status.Message) - } - if updated.Status.FinishedAt != nil { - t.Fatalf("expected FinishedAt to remain nil, got %#v", updated.Status.FinishedAt) - } - if updated.Status.StartedAt == nil || !updated.Status.StartedAt.Time.Equal(startedAt.Time) { - t.Fatalf("StartedAt = %#v, want %s", updated.Status.StartedAt, startedAt.Time) - } - if updated.Status.Duration != "" { - t.Fatalf("expected Duration to remain empty, got %q", updated.Status.Duration) - } - if updated.Spec.CancelRequested == nil || !*updated.Spec.CancelRequested { - t.Fatalf("expected CancelRequested=true, got %#v", updated.Spec.CancelRequested) - } -} - -func TestStopStoryRun_TerminalPhaseNoop(t *testing.T) { - finishedAt := metav1.NewTime(time.Now().Add(-5 * time.Second).UTC().Truncate(time.Second)) - storyRun := &runsv1alpha1.StoryRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: "storyrun-terminal", - Namespace: "test-ns", - }, - Status: runsv1alpha1.StoryRunStatus{ - Phase: enums.PhaseSucceeded, - Message: "already finished", - FinishedAt: &finishedAt, - }, - } - - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithStatusSubresource(&runsv1alpha1.StoryRun{}). - WithObjects(storyRun). - Build() - c := &Client{ - Client: fakeClient, - namespace: "test-ns", - } - - if err := c.StopStoryRun(context.Background(), "storyrun-terminal", ""); err != nil { - t.Fatalf("StopStoryRun() failed: %v", err) - } - - var updated runsv1alpha1.StoryRun - if err := fakeClient.Get(context.Background(), types.NamespacedName{ - Name: "storyrun-terminal", - Namespace: "test-ns", - }, &updated); err != nil { - t.Fatalf("failed to get updated StoryRun: %v", err) - } - if updated.Status.Phase != enums.PhaseSucceeded { - t.Fatalf("StoryRun phase = %s, want %s", updated.Status.Phase, enums.PhaseSucceeded) - } - if updated.Status.Message != "already finished" { - t.Fatalf("StoryRun message = %q, want %q", updated.Status.Message, "already finished") - } - if updated.Status.FinishedAt == nil || !updated.Status.FinishedAt.Time.Equal(finishedAt.Time) { - t.Fatalf("FinishedAt = %#v, want %s", updated.Status.FinishedAt, finishedAt.Time) - } -} - -func TestStopStoryRun_RequestsGracefulCancelForPausedStoryRun(t *testing.T) { - storyRun := &runsv1alpha1.StoryRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: "storyrun-paused", - Namespace: "test-ns", - }, - Status: runsv1alpha1.StoryRunStatus{ - Phase: enums.PhasePaused, - Message: "paused by controller", - }, - } - - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithStatusSubresource(&runsv1alpha1.StoryRun{}). - WithObjects(storyRun). - Build() - c := &Client{ - Client: fakeClient, - namespace: "test-ns", - } - - if err := c.StopStoryRun(context.Background(), "storyrun-paused", ""); err != nil { - t.Fatalf("StopStoryRun() failed: %v", err) - } - - var updated runsv1alpha1.StoryRun - if err := fakeClient.Get(context.Background(), types.NamespacedName{ - Name: "storyrun-paused", - Namespace: "test-ns", - }, &updated); err != nil { - t.Fatalf("failed to get StoryRun after unsupported stop attempt: %v", err) - } - if updated.Status.Phase != enums.PhasePaused { - t.Fatalf("StoryRun phase = %s, want %s", updated.Status.Phase, enums.PhasePaused) - } - if updated.Status.Message != "paused by controller" { - t.Fatalf("StoryRun message = %q, want %q", updated.Status.Message, "paused by controller") - } - if updated.Status.FinishedAt != nil { - t.Fatalf("expected FinishedAt to remain nil, got %#v", updated.Status.FinishedAt) - } - if updated.Spec.CancelRequested == nil || !*updated.Spec.CancelRequested { - t.Fatalf("expected CancelRequested=true, got %#v", updated.Spec.CancelRequested) - } -} - -func TestStopStoryRun_NotFound(t *testing.T) { - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithStatusSubresource(&runsv1alpha1.StoryRun{}). - Build() - c := &Client{ - Client: fakeClient, - namespace: "test-ns", - } - - err := c.StopStoryRun(context.Background(), "missing-storyrun", "") - if err == nil { - t.Fatal("expected not-found error") - } - if !errors.Is(err, sdkerrors.ErrNotFound) { - t.Fatalf("expected errors.Is(err, ErrNotFound), got %v", err) - } -} - -func TestUpdateImpulseTriggerStats_AppliesDelta(t *testing.T) { - lastTrigger := time.Date(2026, time.March, 27, 10, 0, 0, 0, time.FixedZone("UTC+2", 2*60*60)) - lastSuccess := lastTrigger.Add(2 * time.Minute) - lastThrottled := lastTrigger.Add(3 * time.Minute) - impulse := &bobrapetv1alpha1.Impulse{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-impulse", - Namespace: "test-ns", - Generation: 7, - }, - Status: bobrapetv1alpha1.ImpulseStatus{ - TriggersReceived: 10, - StoriesLaunched: 3, - FailedTriggers: 1, - ThrottledTriggers: 2, - }, - } - - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithStatusSubresource(&bobrapetv1alpha1.Impulse{}). - WithObjects(impulse). - Build() - c := &Client{ - Client: fakeClient, - namespace: "test-ns", - } - - err := c.UpdateImpulseTriggerStats(context.Background(), "test-impulse", "", TriggerStatsDelta{ - TriggersReceived: 2, - StoriesLaunched: 1, - FailedTriggers: 1, - ThrottledTriggers: 3, - LastTrigger: lastTrigger, - LastSuccess: &lastSuccess, - LastThrottled: &lastThrottled, - }) - if err != nil { - t.Fatalf("UpdateImpulseTriggerStats() failed: %v", err) - } - - var updated bobrapetv1alpha1.Impulse - if err := fakeClient.Get(context.Background(), types.NamespacedName{ - Name: "test-impulse", - Namespace: "test-ns", - }, &updated); err != nil { - t.Fatalf("failed to get updated Impulse: %v", err) - } - if updated.Status.ObservedGeneration != 7 { - t.Fatalf("ObservedGeneration = %d, want 7", updated.Status.ObservedGeneration) - } - if updated.Status.TriggersReceived != 12 { - t.Fatalf("TriggersReceived = %d, want 12", updated.Status.TriggersReceived) - } - if updated.Status.StoriesLaunched != 4 { - t.Fatalf("StoriesLaunched = %d, want 4", updated.Status.StoriesLaunched) - } - if updated.Status.FailedTriggers != 2 { - t.Fatalf("FailedTriggers = %d, want 2", updated.Status.FailedTriggers) - } - if updated.Status.ThrottledTriggers != 5 { - t.Fatalf("ThrottledTriggers = %d, want 5", updated.Status.ThrottledTriggers) - } - if updated.Status.LastTrigger == nil || !updated.Status.LastTrigger.Time.Equal(lastTrigger.UTC()) { - t.Fatalf("LastTrigger = %#v, want %s", updated.Status.LastTrigger, lastTrigger.UTC()) - } - if updated.Status.LastSuccess == nil || !updated.Status.LastSuccess.Time.Equal(lastSuccess.UTC()) { - t.Fatalf("LastSuccess = %#v, want %s", updated.Status.LastSuccess, lastSuccess.UTC()) - } - if updated.Status.LastThrottled == nil || !updated.Status.LastThrottled.Time.Equal(lastThrottled.UTC()) { - t.Fatalf("LastThrottled = %#v, want %s", updated.Status.LastThrottled, lastThrottled.UTC()) - } -} - -func TestUpdateImpulseTriggerStats_NilContextRejected(t *testing.T) { - c := &Client{namespace: "test-ns"} - err := c.UpdateImpulseTriggerStats(nil, "test-impulse", "", TriggerStatsDelta{ //nolint:staticcheck - TriggersReceived: 1, - }) - if err == nil { - t.Fatal("expected nil context to be rejected") - } - if !strings.Contains(err.Error(), "context must not be nil") { - t.Fatalf("unexpected error: %v", err) - } -} - -func TestUpdateImpulseTriggerStats_EmptyImpulseNameRejected(t *testing.T) { - c := &Client{namespace: "test-ns"} - err := c.UpdateImpulseTriggerStats(context.Background(), " ", "", TriggerStatsDelta{ - TriggersReceived: 1, - }) - if err == nil { - t.Fatal("expected empty impulse name to be rejected") - } - if !strings.Contains(err.Error(), "impulse name is required") { - t.Fatalf("unexpected error: %v", err) - } -} - -func TestUpdateImpulseTriggerStats_NegativeDeltaRejected(t *testing.T) { - c := &Client{namespace: "test-ns"} - err := c.UpdateImpulseTriggerStats(context.Background(), "test-impulse", "", TriggerStatsDelta{ - TriggersReceived: -1, - }) - if err == nil { - t.Fatal("expected negative delta to be rejected") - } - if !strings.Contains(err.Error(), "TriggersReceived must not be negative") { - t.Fatalf("unexpected error: %v", err) - } -} - -func storageRefFromStoryRun(t *testing.T, sr *runsv1alpha1.StoryRun) string { - t.Helper() - if sr == nil || sr.Spec.Inputs == nil { - t.Fatalf("story run inputs missing: %+v", sr) - } - var spec map[string]any - if err := json.Unmarshal(sr.Spec.Inputs.Raw, &spec); err != nil { - t.Fatalf("failed to unmarshal story inputs: %v", err) - } - if raw, exists := spec["$bubuStorageRef"]; exists { - ref, ok := raw.(string) - if !ok { - t.Fatalf("storage reference expected string, got %T", raw) - } - return ref - } - for _, v := range spec { - if nested, ok := v.(map[string]any); ok { - if raw, exists := nested["$bubuStorageRef"]; exists { - ref, ok := raw.(string) - if !ok { - t.Fatalf("storage reference expected string, got %T", raw) - } - return ref - } - } - } - t.Fatalf("expected storage reference in inputs, got %v", spec) - return "" -} - -func TestPatchStepRunStatus(t *testing.T) { - stepRun := &runsv1alpha1.StepRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-step-run", - Namespace: "test-ns", - }, - } - - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithStatusSubresource(&runsv1alpha1.StepRun{}). - WithObjects(stepRun).Build() - - c := &Client{ - Client: fakeClient, - namespace: "test-ns", - } - - ctx := context.Background() - patchData := runsv1alpha1.StepRunStatus{ - Phase: "Succeeded", - } - - err := c.PatchStepRunStatus(ctx, "test-step-run", patchData) - if err != nil { - t.Fatalf("PatchStepRunStatus() failed: %v", err) - } - - // Verify the patch - updatedStepRun := &runsv1alpha1.StepRun{} - err = fakeClient.Get(ctx, types.NamespacedName{Name: "test-step-run", Namespace: "test-ns"}, updatedStepRun) - if err != nil { - t.Fatalf("Failed to get updated StepRun: %v", err) - } - - if updatedStepRun.Status.Phase != "Succeeded" { - t.Errorf("StepRun status phase = %v, want 'Succeeded'", updatedStepRun.Status.Phase) - } -} - -func TestGetMaxPatchRetries_ClampsNegativeValues(t *testing.T) { - t.Setenv(contracts.K8sPatchMaxRetriesEnv, "-3") - if got := getMaxPatchRetries(); got != 0 { - t.Fatalf("expected negative retry config to clamp to 0, got %d", got) - } -} - -func TestPatchStepRunStatus_NilContextRejected(t *testing.T) { - c := &Client{} //nolint:staticcheck - err := c.PatchStepRunStatus(nil, "test-step-run", runsv1alpha1.StepRunStatus{}) //nolint:staticcheck - if err == nil { - t.Fatal("expected nil context to be rejected") - } - if !strings.Contains(err.Error(), "context must not be nil") { - t.Fatalf("unexpected error: %v", err) - } -} - -func TestPatchStepRunStatus_NegativeRetryConfigStillAttemptsInitialPatch(t *testing.T) { - t.Setenv(contracts.K8sPatchMaxRetriesEnv, "-1") - - stepRun := &runsv1alpha1.StepRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-step-run-negative-retries", - Namespace: "test-ns", - }, - } - - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithStatusSubresource(&runsv1alpha1.StepRun{}). - WithObjects(stepRun).Build() - - c := &Client{ - Client: fakeClient, - namespace: "test-ns", - } - - err := c.PatchStepRunStatus(context.Background(), "test-step-run-negative-retries", runsv1alpha1.StepRunStatus{ - Phase: enums.PhaseSucceeded, - }) - if err != nil { - t.Fatalf("PatchStepRunStatus() failed: %v", err) - } - - updatedStepRun := &runsv1alpha1.StepRun{} - err = fakeClient.Get(context.Background(), types.NamespacedName{ - Name: "test-step-run-negative-retries", - Namespace: "test-ns", - }, updatedStepRun) - if err != nil { - t.Fatalf("Failed to get updated StepRun: %v", err) - } - if updatedStepRun.Status.Phase != enums.PhaseSucceeded { - t.Fatalf("StepRun status phase = %v, want %v", updatedStepRun.Status.Phase, enums.PhaseSucceeded) - } -} - -func TestPatchStepRunStatus_RetriesTransientGetFailure(t *testing.T) { - t.Setenv(contracts.K8sPatchMaxRetriesEnv, "1") - - stepRun := &runsv1alpha1.StepRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-step-run-transient-get", - Namespace: "test-ns", - }, - } - - baseClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithStatusSubresource(&runsv1alpha1.StepRun{}). - WithObjects(stepRun). - Build() - - retryingClient := &transientGetClient{ - Client: baseClient, - getErrors: []error{apierrors.NewTimeoutError("temporary timeout", 1)}, - } - c := &Client{ - Client: retryingClient, - namespace: "test-ns", - } - - err := c.PatchStepRunStatus(context.Background(), "test-step-run-transient-get", runsv1alpha1.StepRunStatus{ - Phase: enums.PhaseSucceeded, - }) - if err != nil { - t.Fatalf("PatchStepRunStatus() failed after transient Get error: %v", err) - } - if retryingClient.getCalls != 2 { - t.Fatalf("expected 2 Get calls, got %d", retryingClient.getCalls) - } - - updatedStepRun := &runsv1alpha1.StepRun{} - err = baseClient.Get(context.Background(), types.NamespacedName{ - Name: "test-step-run-transient-get", - Namespace: "test-ns", - }, updatedStepRun) - if err != nil { - t.Fatalf("Failed to get updated StepRun: %v", err) - } - if updatedStepRun.Status.Phase != enums.PhaseSucceeded { - t.Fatalf("StepRun status phase = %v, want %v", updatedStepRun.Status.Phase, enums.PhaseSucceeded) - } -} - -func TestPatchStepRunStatus_TransientGetFailureReturnsRetryableAfterExhaustion(t *testing.T) { - t.Setenv(contracts.K8sPatchMaxRetriesEnv, "1") - - stepRun := &runsv1alpha1.StepRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-step-run-transient-get-fail", - Namespace: "test-ns", - }, - } - - baseClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithStatusSubresource(&runsv1alpha1.StepRun{}). - WithObjects(stepRun). - Build() - - retryingClient := &transientGetClient{ - Client: baseClient, - getErrors: []error{ - apierrors.NewTimeoutError("temporary timeout", 1), - apierrors.NewTimeoutError("temporary timeout again", 1), - }, - } - c := &Client{ - Client: retryingClient, - namespace: "test-ns", - } - - err := c.PatchStepRunStatus(context.Background(), "test-step-run-transient-get-fail", runsv1alpha1.StepRunStatus{ - Phase: enums.PhaseSucceeded, - }) - if err == nil { - t.Fatal("expected transient Get error after retry exhaustion") - } - if !errors.Is(err, sdkerrors.ErrRetryable) { - t.Fatalf("expected retryable error after retry exhaustion, got %v", err) - } - if retryingClient.getCalls != 2 { - t.Fatalf("expected 2 Get calls, got %d", retryingClient.getCalls) - } -} - -func TestPatchStepRunStatus_CancelledContextAbortsRetryBackoff(t *testing.T) { - t.Setenv(contracts.K8sPatchMaxRetriesEnv, "1") - - baseClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithStatusSubresource(&runsv1alpha1.StepRun{}). - Build() - c := &Client{ - Client: &timeoutGetClient{Client: baseClient}, - namespace: "test-ns", - } - - ctx, cancel := context.WithCancel(context.Background()) - go func() { - time.Sleep(10 * time.Millisecond) - cancel() - }() - - err := c.PatchStepRunStatus(ctx, "test-step-run-cancel-backoff", runsv1alpha1.StepRunStatus{ - Phase: enums.PhaseRunning, - }) - if err == nil { - t.Fatal("expected context cancellation error during retry backoff") - } - if !errors.Is(err, context.Canceled) { - t.Fatalf("expected context canceled sentinel, got %v", err) - } - if !strings.Contains(err.Error(), "status patch retry aborted due to context cancellation") { - t.Fatalf("unexpected error: %v", err) - } -} - -func TestPatchStepRunStatus_RetriesConflictOnStatusPatch(t *testing.T) { - t.Setenv(contracts.K8sPatchMaxRetriesEnv, "1") - - stepRun := &runsv1alpha1.StepRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-step-run-conflict-retry", - Namespace: "test-ns", - }, - Status: runsv1alpha1.StepRunStatus{ - Phase: enums.PhasePending, - }, - } - - baseClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithStatusSubresource(&runsv1alpha1.StepRun{}). - WithObjects(stepRun). - Build() - - conflictErr := apierrors.NewConflict( - schema.GroupResource{Group: "runs.bubustack.io", Resource: "stepruns"}, - "test-step-run-conflict-retry", - errors.New("simulated conflict"), - ) - statusWriter := &transientStatusWriter{ - SubResourceWriter: baseClient.Status(), - patchErrors: []error{conflictErr}, - } - retryingClient := &transientStatusPatchClient{ - Client: baseClient, - statusWriter: statusWriter, - } - c := &Client{ - Client: retryingClient, - namespace: "test-ns", - } - - err := c.PatchStepRunStatus(context.Background(), "test-step-run-conflict-retry", runsv1alpha1.StepRunStatus{ - Phase: enums.PhaseRunning, - }) - if err != nil { - t.Fatalf("PatchStepRunStatus() failed after transient conflict: %v", err) - } - if statusWriter.patchCalls != 2 { - t.Fatalf("expected 2 status patch calls, got %d", statusWriter.patchCalls) - } - - updated := &runsv1alpha1.StepRun{} - err = baseClient.Get(context.Background(), types.NamespacedName{ - Name: "test-step-run-conflict-retry", - Namespace: "test-ns", - }, updated) - if err != nil { - t.Fatalf("Failed to get updated StepRun: %v", err) - } - if updated.Status.Phase != enums.PhaseRunning { - t.Fatalf("StepRun status phase = %v, want %v", updated.Status.Phase, enums.PhaseRunning) - } -} - -func TestPatchStepRunStatus_ConflictReturnsErrConflictAfterExhaustion(t *testing.T) { - t.Setenv(contracts.K8sPatchMaxRetriesEnv, "0") - - stepRun := &runsv1alpha1.StepRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-step-run-conflict-exhaustion", - Namespace: "test-ns", - }, - Status: runsv1alpha1.StepRunStatus{ - Phase: enums.PhasePending, - }, - } - - baseClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithStatusSubresource(&runsv1alpha1.StepRun{}). - WithObjects(stepRun). - Build() - - conflictErr := apierrors.NewConflict( - schema.GroupResource{Group: "runs.bubustack.io", Resource: "stepruns"}, - "test-step-run-conflict-exhaustion", - errors.New("simulated conflict"), - ) - statusWriter := &transientStatusWriter{ - SubResourceWriter: baseClient.Status(), - patchErrors: []error{conflictErr}, - } - retryingClient := &transientStatusPatchClient{ - Client: baseClient, - statusWriter: statusWriter, - } - c := &Client{ - Client: retryingClient, - namespace: "test-ns", - } - - err := c.PatchStepRunStatus(context.Background(), "test-step-run-conflict-exhaustion", runsv1alpha1.StepRunStatus{ - Phase: enums.PhaseRunning, - }) - if err == nil { - t.Fatal("expected conflict error after retry exhaustion") - } - if !errors.Is(err, sdkerrors.ErrConflict) { - t.Fatalf("expected ErrConflict sentinel, got %v", err) - } - if statusWriter.patchCalls != 1 { - t.Fatalf("expected 1 status patch call, got %d", statusWriter.patchCalls) - } -} - -func TestPatchStepRunStatus_ZombiePodRejected(t *testing.T) { - t.Setenv(contracts.PodNameEnv, "old-pod") - - stepRun := &runsv1alpha1.StepRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-step-run-zombie", - Namespace: "test-ns", - }, - Status: runsv1alpha1.StepRunStatus{ - PodName: "current-pod", - }, - } - - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithStatusSubresource(&runsv1alpha1.StepRun{}). - WithObjects(stepRun).Build() - - c := &Client{ - Client: fakeClient, - namespace: "test-ns", - } - - ctx := context.Background() - err := c.PatchStepRunStatus(ctx, "test-step-run-zombie", runsv1alpha1.StepRunStatus{Phase: "Failed"}) - if err == nil { - t.Fatal("expected zombie pod fencing to reject the patch, got nil error") - } - if !strings.Contains(err.Error(), "zombie pod fencing") { - t.Errorf("expected 'zombie pod fencing' in error, got: %v", err) - } -} - -func TestIsValidPhaseTransition_AllowsRunningToTimeout(t *testing.T) { - if !isValidPhaseTransition(enums.PhaseRunning, enums.PhaseTimeout) { - t.Fatalf("expected Running → Timeout to be valid") - } -} - -func TestMergeStepRunStatus_AllowsTimeoutTransition(t *testing.T) { - existing := &runsv1alpha1.StepRunStatus{Phase: enums.PhaseRunning} - incoming := &runsv1alpha1.StepRunStatus{Phase: enums.PhaseTimeout} - - merged, err := mergeStepRunStatus(existing, incoming) - if err != nil { - t.Fatalf("mergeStepRunStatus() error = %v", err) - } - if merged.Phase != enums.PhaseTimeout { - t.Fatalf("merged phase = %s, want %s", merged.Phase, enums.PhaseTimeout) - } -} - -func TestIsValidPhaseTransition_NewTransitions(t *testing.T) { - tests := []struct { - from, to enums.Phase - want bool - }{ - {enums.PhasePending, enums.PhaseTimeout, true}, - {enums.PhaseRunning, enums.PhaseAborted, true}, - {enums.PhaseScheduling, enums.PhaseCanceled, true}, - {enums.PhaseBlocked, enums.PhaseCanceled, true}, - // Failed → Succeeded: SDK completed work after controller set Failed - {enums.PhaseFailed, enums.PhaseSucceeded, true}, - // Failed → Running: controller retry - {enums.PhaseFailed, enums.PhaseRunning, true}, - // Terminal states should not allow transitions - {enums.PhaseFinished, enums.PhaseRunning, false}, - {enums.PhaseSkipped, enums.PhaseRunning, false}, - } - for _, tt := range tests { - got := isValidPhaseTransition(tt.from, tt.to) - if got != tt.want { - t.Errorf("isValidPhaseTransition(%v→%v) = %v, want %v", tt.from, tt.to, got, tt.want) - } - } -} - -func TestPatchStepRunStatus_InvalidTransitionNoRetry(t *testing.T) { - // StepRun is in Succeeded (terminal) — any further phase transition is invalid - stepRun := &runsv1alpha1.StepRun{} - stepRun.Name = "test-step-run-invalid-transition" - stepRun.Namespace = "test-ns" - stepRun.Status.Phase = enums.PhaseSucceeded - - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithStatusSubresource(&runsv1alpha1.StepRun{}). - WithObjects(stepRun). - Build() - c := &Client{Client: fakeClient, namespace: "test-ns"} - - err := c.PatchStepRunStatus(context.Background(), "test-step-run-invalid-transition", - runsv1alpha1.StepRunStatus{Phase: enums.PhaseRunning}) - if err == nil { - t.Fatal("expected ErrInvalidTransition, got nil") - } - if !errors.Is(err, sdkerrors.ErrInvalidTransition) { - t.Errorf("expected errors.Is(err, ErrInvalidTransition), got: %v", err) - } -} - -func TestMergeStepRunStatus_AllowsFailedToSucceeded(t *testing.T) { - existing := &runsv1alpha1.StepRunStatus{Phase: enums.PhaseFailed} - incoming := &runsv1alpha1.StepRunStatus{Phase: enums.PhaseSucceeded} - - merged, err := mergeStepRunStatus(existing, incoming) - if err != nil { - t.Fatalf("mergeStepRunStatus(Failed→Succeeded) error = %v", err) - } - if merged.Phase != enums.PhaseSucceeded { - t.Fatalf("merged phase = %s, want %s", merged.Phase, enums.PhaseSucceeded) - } -} - -func TestPatchStepRunStatus_FailedToSucceeded(t *testing.T) { - stepRun := &runsv1alpha1.StepRun{} - stepRun.Name = "test-step-run-late-success" - stepRun.Namespace = "test-ns" - stepRun.Status.Phase = enums.PhaseFailed - - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithStatusSubresource(&runsv1alpha1.StepRun{}). - WithObjects(stepRun). - Build() - c := &Client{Client: fakeClient, namespace: "test-ns"} - - err := c.PatchStepRunStatus(context.Background(), "test-step-run-late-success", - runsv1alpha1.StepRunStatus{Phase: enums.PhaseSucceeded}) - if err != nil { - t.Fatalf("PatchStepRunStatus(Failed→Succeeded) should succeed, got: %v", err) - } - - var updated runsv1alpha1.StepRun - if err := fakeClient.Get(context.Background(), - types.NamespacedName{Name: "test-step-run-late-success", Namespace: "test-ns"}, - &updated); err != nil { - t.Fatalf("failed to get updated StepRun: %v", err) - } - if updated.Status.Phase != enums.PhaseSucceeded { - t.Fatalf("expected phase Succeeded after patch, got %s", updated.Status.Phase) - } -} - -func TestMergeStepRunStatus_PreservesFieldsWhenIncomingOmitsThem(t *testing.T) { - existing := &runsv1alpha1.StepRunStatus{ - Output: &k8sruntime.RawExtension{Raw: []byte(`"old"`)}, - Logs: &k8sruntime.RawExtension{Raw: []byte(`"logs"`)}, - Error: &runsv1alpha1.StructuredError{Message: "err"}, - Needs: []string{"prior-step"}, - } - incoming := &runsv1alpha1.StepRunStatus{} - - merged, err := mergeStepRunStatus(existing, incoming) - if err != nil { - t.Fatalf("mergeStepRunStatus() error = %v", err) - } - if merged.Output == nil || string(merged.Output.Raw) != `"old"` { - t.Fatalf("expected Output to be preserved, got %v", merged.Output) - } - if merged.Logs == nil || string(merged.Logs.Raw) != `"logs"` { - t.Fatalf("expected Logs to be preserved, got %v", merged.Logs) - } - if merged.Error == nil || merged.Error.Message != "err" { - t.Fatalf("expected Error to be preserved, got %v", merged.Error) - } - if !reflect.DeepEqual(merged.Needs, []string{"prior-step"}) { - t.Fatalf("expected Needs to be preserved, got %v", merged.Needs) - } -} - -func TestMergeStepRunStatus_ExplicitEmptyNeedsClearsExistingNeeds(t *testing.T) { - existing := &runsv1alpha1.StepRunStatus{ - Needs: []string{"prior-step"}, - } - incoming := &runsv1alpha1.StepRunStatus{ - Needs: []string{}, - } - - merged, err := mergeStepRunStatus(existing, incoming) - if err != nil { - t.Fatalf("mergeStepRunStatus() error = %v", err) - } - if merged.Needs == nil { - t.Fatal("expected explicit empty Needs slice to be preserved, got nil") - } - if len(merged.Needs) != 0 { - t.Fatalf("expected Needs to be cleared, got %v", merged.Needs) - } -} - -func TestPatchStepRunStatus_PreservesNeedsWhenIncomingOmitsThem(t *testing.T) { - stepRun := &runsv1alpha1.StepRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-step-run-preserve-needs", - Namespace: "test-ns", - }, - Status: runsv1alpha1.StepRunStatus{ - Phase: enums.PhasePending, - Needs: []string{"prior-step"}, - }, - } - - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithStatusSubresource(&runsv1alpha1.StepRun{}). - WithObjects(stepRun). - Build() - c := &Client{ - Client: fakeClient, - namespace: "test-ns", - } - - err := c.PatchStepRunStatus(context.Background(), "test-step-run-preserve-needs", runsv1alpha1.StepRunStatus{ - Phase: enums.PhaseRunning, - }) - if err != nil { - t.Fatalf("PatchStepRunStatus() failed: %v", err) - } - - updated := &runsv1alpha1.StepRun{} - err = fakeClient.Get(context.Background(), types.NamespacedName{ - Name: "test-step-run-preserve-needs", - Namespace: "test-ns", - }, updated) - if err != nil { - t.Fatalf("Failed to get updated StepRun: %v", err) - } - if updated.Status.Phase != enums.PhaseRunning { - t.Fatalf("StepRun status phase = %v, want %v", updated.Status.Phase, enums.PhaseRunning) - } - if !reflect.DeepEqual(updated.Status.Needs, []string{"prior-step"}) { - t.Fatalf("expected Needs to be preserved, got %v", updated.Status.Needs) - } -} - -func TestMergeStepRunStatus_DedupesEffectsByKeyWhenSeqZero(t *testing.T) { - existing := &runsv1alpha1.StepRunStatus{ - Effects: []runsv1alpha1.EffectRecord{ - {Seq: 1, Key: "effect-1", Status: "succeeded"}, - }, - } - incoming := &runsv1alpha1.StepRunStatus{ - Effects: []runsv1alpha1.EffectRecord{ - {Seq: 0, Key: "effect-1", Status: "succeeded"}, - {Seq: 0, Key: "effect-2", Status: "succeeded"}, - }, - } - - merged, err := mergeStepRunStatus(existing, incoming) - if err != nil { - t.Fatalf("mergeStepRunStatus() error = %v", err) - } - if len(merged.Effects) != 2 { - t.Fatalf("expected 2 effects after merge, got %d", len(merged.Effects)) - } - if merged.Effects[0].Key != "effect-1" { - t.Fatalf("expected existing effect to remain first, got %q", merged.Effects[0].Key) - } - if merged.Effects[0].Seq != 1 { - t.Fatalf("expected existing effect seq to remain 1, got %d", merged.Effects[0].Seq) - } - if merged.Effects[1].Key != "effect-2" { - t.Fatalf("expected new effect to be appended, got %q", merged.Effects[1].Key) - } -} - -func TestClassifyK8sError(t *testing.T) { - conflict := apierrors.NewConflict( - schema.GroupResource{Group: "runs.bubustack.io", Resource: "storyruns"}, - "example", - errors.New("conflict"), - ) - if err := classifyK8sError(conflict); !errors.Is(err, sdkerrors.ErrConflict) { - t.Fatalf("expected ErrConflict sentinel, got %v", err) - } - - timeout := apierrors.NewTimeoutError("timed out", 1) - if err := classifyK8sError(timeout); !errors.Is(err, sdkerrors.ErrRetryable) { - t.Fatalf("expected ErrRetryable for timeout, got %v", err) - } - - if err := classifyK8sError(nil); err != nil { - t.Fatalf("expected nil passthrough, got %v", err) + if updatedStepRun.Status.Phase != "Succeeded" { + t.Errorf("StepRun status phase = %v, want 'Succeeded'", updatedStepRun.Status.Phase) } } diff --git a/k8s/doc.go b/k8s/doc.go deleted file mode 100644 index 732a46d..0000000 --- a/k8s/doc.go +++ /dev/null @@ -1,16 +0,0 @@ -/* -Package k8s contains the SDK's controller-runtime client helpers for StoryRun, -StepRun, and Impulse operations. - -Consumers using these helpers need Kubernetes authentication plus RBAC that -matches the methods they call. The common SDK paths require at least: - - - StoryRun launch and lookup: `storytriggers` `create`/`get` and `storyruns` `get` - - StoryRun stop/status updates: `storyruns` `get` and `storyruns/status` `patch` - - StepRun status updates: `stepruns` `get` and `stepruns/status` `patch` - - Impulse trigger counters: `impulses` `get` and `impulses/status` `patch` - -Additional controller or application features may require broader read access, -but the list above covers the baseline SDK operations exposed by this package. -*/ -package k8s diff --git a/k8s/impulse_stats.go b/k8s/impulse_stats.go deleted file mode 100644 index f4e284e..0000000 --- a/k8s/impulse_stats.go +++ /dev/null @@ -1,130 +0,0 @@ -package k8s - -import ( - "context" - "fmt" - "strings" - "time" - - v1alpha1 "github.com/bubustack/bobrapet/api/v1alpha1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/util/retry" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -// TriggerStatsDelta describes the per-trigger counters to apply to an Impulse status. -type TriggerStatsDelta struct { - // TriggersReceived increments the total number of trigger attempts observed by the impulse. - TriggersReceived int64 - // StoriesLaunched increments the total number of StoryRuns successfully created. - StoriesLaunched int64 - // FailedTriggers increments the total number of trigger attempts that failed before launch. - FailedTriggers int64 - // ThrottledTriggers increments the total number of triggers rejected by throttling policy. - ThrottledTriggers int64 - // LastTrigger records when the most recent trigger attempt was received. - LastTrigger time.Time - // LastSuccess records when the most recent successful StoryRun launch completed. - LastSuccess *time.Time - // LastThrottled records when the most recent trigger was throttled. - LastThrottled *time.Time -} - -func (d TriggerStatsDelta) isZero() bool { - lastSuccessEmpty := d.LastSuccess == nil || d.LastSuccess.IsZero() - lastThrottledEmpty := d.LastThrottled == nil || d.LastThrottled.IsZero() - return d.TriggersReceived == 0 && - d.StoriesLaunched == 0 && - d.FailedTriggers == 0 && - d.ThrottledTriggers == 0 && - d.LastTrigger.IsZero() && - lastSuccessEmpty && - lastThrottledEmpty -} - -func (d TriggerStatsDelta) validate() error { - switch { - case d.TriggersReceived < 0: - return fmt.Errorf("trigger stats delta TriggersReceived must not be negative") - case d.StoriesLaunched < 0: - return fmt.Errorf("trigger stats delta StoriesLaunched must not be negative") - case d.FailedTriggers < 0: - return fmt.Errorf("trigger stats delta FailedTriggers must not be negative") - case d.ThrottledTriggers < 0: - return fmt.Errorf("trigger stats delta ThrottledTriggers must not be negative") - default: - return nil - } -} - -// UpdateImpulseTriggerStats applies the provided delta to the impulse status counters. -// Callers need RBAC for `impulses` `get` and `impulses/status` `patch`. -// Uses retry-on-conflict to avoid lost updates when the controller patches the same -// Impulse status concurrently. -func (c *Client) UpdateImpulseTriggerStats( - ctx context.Context, - impulseName string, - namespace string, - delta TriggerStatsDelta, -) error { - if ctx == nil { - return fmt.Errorf("context must not be nil") - } - impulseName = strings.TrimSpace(impulseName) - if impulseName == "" { - return fmt.Errorf("impulse name is required") - } - if err := delta.validate(); err != nil { - return err - } - if delta.isZero() { - return nil - } - - targetNamespace := strings.TrimSpace(namespace) - if targetNamespace == "" { - targetNamespace = c.namespace - } - if targetNamespace == "" { - return fmt.Errorf("failed to resolve namespace for impulse %q", impulseName) - } - - key := types.NamespacedName{Name: impulseName, Namespace: targetNamespace} - return retry.RetryOnConflict(retry.DefaultBackoff, func() error { - apiCtx, cancel := context.WithTimeout(ctx, getOperationTimeout()) - defer cancel() - - var impulse v1alpha1.Impulse - if err := c.Get(apiCtx, key, &impulse); err != nil { - return wrapK8sError(err, "failed to get impulse '%s' in namespace '%s'", impulseName, targetNamespace) - } - - before := impulse.DeepCopy() - status := &impulse.Status - status.ObservedGeneration = impulse.Generation - status.TriggersReceived += delta.TriggersReceived - status.StoriesLaunched += delta.StoriesLaunched - status.FailedTriggers += delta.FailedTriggers - status.ThrottledTriggers += delta.ThrottledTriggers - - status.LastTrigger = advanceTimestamp(status.LastTrigger, &delta.LastTrigger) - status.LastSuccess = advanceTimestamp(status.LastSuccess, delta.LastSuccess) - status.LastThrottled = advanceTimestamp(status.LastThrottled, delta.LastThrottled) - - return c.Status().Patch(apiCtx, &impulse, client.MergeFrom(before)) - }) -} - -// advanceTimestamp returns the newer of current and candidate. If candidate is -// nil or zero it returns current unchanged. -func advanceTimestamp(current *metav1.Time, candidate *time.Time) *metav1.Time { - if candidate == nil || candidate.IsZero() { - return current - } - mt := metav1.NewTime(candidate.UTC()) - if current == nil || current.Before(&metav1.Time{Time: candidate.UTC()}) { - return &mt - } - return current -} diff --git a/k8s/naming.go b/k8s/naming.go deleted file mode 100644 index ee3d77f..0000000 --- a/k8s/naming.go +++ /dev/null @@ -1,80 +0,0 @@ -package k8s - -import ( - "crypto/sha1" - "encoding/hex" - "fmt" - "strings" -) - -const dns1123MaxLength = 63 - -// ComposeDNS1123Name builds a DNS-1123 compliant name from provided parts. -// It preserves readability when possible and appends a hash suffix when needed. -func ComposeDNS1123Name(parts ...string) string { - baseParts := make([]string, 0, len(parts)) - for _, part := range parts { - normalized := normalizeDNS1123Part(part) - if normalized != "" { - baseParts = append(baseParts, normalized) - } - } - base := strings.Join(baseParts, "-") - if base == "" { - base = "resource" - } - if len(base) <= dns1123MaxLength { - return base - } - - sum := sha1.Sum([]byte(base)) - suffix := hex.EncodeToString(sum[:])[:8] - prefixLen := dns1123MaxLength - 1 - len(suffix) - if prefixLen < 1 { - if len(suffix) > dns1123MaxLength { - return suffix[:dns1123MaxLength] - } - return suffix - } - - prefix := strings.TrimSuffix(base[:prefixLen], "-") - if prefix == "" { - prefix = "resource" - } - name := fmt.Sprintf("%s-%s", prefix, suffix) - if len(name) > dns1123MaxLength { - name = strings.TrimSuffix(name[:dns1123MaxLength], "-") - if name == "" { - return suffix - } - } - return name -} - -func normalizeDNS1123Part(part string) string { - part = strings.ToLower(strings.TrimSpace(part)) - if part == "" { - return "" - } - - var builder strings.Builder - lastDash := false - for _, r := range part { - switch { - case r >= 'a' && r <= 'z': - builder.WriteRune(r) - lastDash = false - case r >= '0' && r <= '9': - builder.WriteRune(r) - lastDash = false - default: - if builder.Len() == 0 || lastDash { - continue - } - builder.WriteByte('-') - lastDash = true - } - } - - return strings.Trim(builder.String(), "-") -} diff --git a/k8s/naming_test.go b/k8s/naming_test.go deleted file mode 100644 index 691bfc1..0000000 --- a/k8s/naming_test.go +++ /dev/null @@ -1,33 +0,0 @@ -package k8s - -import ( - "regexp" - "strings" - "testing" -) - -var dns1123LabelPattern = regexp.MustCompile(`^[a-z0-9]([-a-z0-9]*[a-z0-9])?$`) - -func TestComposeDNS1123NameNormalizesInvalidCharacters(t *testing.T) { - got := ComposeDNS1123Name("My_App", "UPPER", "--bad--") - want := "my-app-upper-bad" - if got != want { - t.Fatalf("ComposeDNS1123Name() = %q, want %q", got, want) - } -} - -func TestComposeDNS1123NameFallsBackWhenPartsNormalizeEmpty(t *testing.T) { - if got := ComposeDNS1123Name("!!!", "---"); got != "resource" { //nolint:goconst - t.Fatalf("ComposeDNS1123Name() = %q, want %q", got, "resource") - } -} - -func TestComposeDNS1123NameLongNameRemainsCompliant(t *testing.T) { - got := ComposeDNS1123Name(strings.Repeat("A", 80), "Part_With_Invalid_Chars") - if len(got) > dns1123MaxLength { - t.Fatalf("name length = %d, want <= %d", len(got), dns1123MaxLength) - } - if !dns1123LabelPattern.MatchString(got) { - t.Fatalf("generated name %q is not DNS-1123 compliant", got) - } -} diff --git a/k8s/rest_config_test.go b/k8s/rest_config_test.go deleted file mode 100644 index 14fafc6..0000000 --- a/k8s/rest_config_test.go +++ /dev/null @@ -1,71 +0,0 @@ -package k8s - -import ( - "testing" - "time" - - "github.com/bubustack/core/contracts" - "k8s.io/client-go/rest" -) - -func TestApplyDefaultRestConfigSettings_AppliesBaselineDefaults(t *testing.T) { - cfg := &rest.Config{} - - applyDefaultRestConfigSettings(cfg) - - if cfg.QPS != 20 { - t.Fatalf("QPS = %v, want 20", cfg.QPS) - } - if cfg.Burst != 40 { - t.Fatalf("Burst = %d, want 40", cfg.Burst) - } - if cfg.Timeout != 30*time.Second { - t.Fatalf("Timeout = %s, want 30s", cfg.Timeout) - } -} - -func TestApplyEnvOverridesToRestConfig_OverridesTimeoutAndUserAgent(t *testing.T) { - t.Setenv(contracts.K8sTimeoutEnv, "45s") - t.Setenv(contracts.K8sUserAgentEnv, "custom-agent") - - cfg := &rest.Config{ - QPS: 33, - Burst: 66, - Timeout: 5 * time.Second, - UserAgent: "old-agent", - } - - applyEnvOverridesToRestConfig(cfg) - - if cfg.Timeout != 45*time.Second { - t.Fatalf("Timeout = %s, want 45s", cfg.Timeout) - } - if cfg.UserAgent != "custom-agent" { - t.Fatalf("UserAgent = %q, want %q", cfg.UserAgent, "custom-agent") - } - if cfg.QPS != 33 { - t.Fatalf("QPS = %v, want 33", cfg.QPS) - } - if cfg.Burst != 66 { - t.Fatalf("Burst = %d, want 66", cfg.Burst) - } -} - -func TestApplyEnvOverridesToRestConfig_IgnoresInvalidTimeoutAndKeepsDefaultUserAgent(t *testing.T) { - t.Setenv(contracts.K8sTimeoutEnv, "not-a-duration") - t.Setenv(contracts.K8sUserAgentEnv, "") - - cfg := &rest.Config{ - Timeout: 12 * time.Second, - UserAgent: "original-agent", - } - - applyEnvOverridesToRestConfig(cfg) - - if cfg.Timeout != 12*time.Second { - t.Fatalf("Timeout = %s, want 12s", cfg.Timeout) - } - if cfg.UserAgent != "bubu-sdk-go" { //nolint:goconst - t.Fatalf("UserAgent = %q, want %q", cfg.UserAgent, "bubu-sdk-go") - } -} diff --git a/k8s/trigger_delivery.go b/k8s/trigger_delivery.go deleted file mode 100644 index af6ea09..0000000 --- a/k8s/trigger_delivery.go +++ /dev/null @@ -1,302 +0,0 @@ -package k8s - -import ( - "context" - "crypto/sha256" - "encoding/hex" - "encoding/json" - "errors" - "fmt" - "os" - "strconv" - "strings" - "time" - - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - sdkerrors "github.com/bubustack/bubu-sdk-go/pkg/errors" - "github.com/bubustack/core/contracts" - "github.com/bubustack/core/templating" -) - -const ( - triggerDedupeModeEnv = "BUBU_TRIGGER_DEDUPE_MODE" - triggerDedupeKeyTemplateEnv = "BUBU_TRIGGER_DEDUPE_KEY_TEMPLATE" - triggerRetryMaxAttemptsEnv = "BUBU_TRIGGER_RETRY_MAX_ATTEMPTS" - triggerRetryBaseDelayEnv = "BUBU_TRIGGER_RETRY_BASE_DELAY" - triggerRetryMaxDelayEnv = "BUBU_TRIGGER_RETRY_MAX_DELAY" - triggerRetryBackoffEnv = "BUBU_TRIGGER_RETRY_BACKOFF" - triggerThrottleRateEnv = "BUBU_TRIGGER_THROTTLE_RATE_PER_SECOND" - triggerThrottleBurstEnv = "BUBU_TRIGGER_THROTTLE_BURST" - triggerThrottleMaxInFlight = "BUBU_TRIGGER_THROTTLE_MAX_IN_FLIGHT" -) - -type triggerDeliveryPolicy struct { - dedupe *triggerDedupePolicy - retry *triggerRetryPolicy -} - -type triggerDedupePolicy struct { - mode string - keyTemplate string -} - -type triggerRetryPolicy struct { - maxAttempts int - baseDelay time.Duration - maxDelay time.Duration - backoff string -} - -func loadTriggerDeliveryPolicyFromEnv() (*triggerDeliveryPolicy, error) { //nolint:gocyclo - mode := strings.ToLower(strings.TrimSpace(os.Getenv(triggerDedupeModeEnv))) - keyTemplate := strings.TrimSpace(os.Getenv(triggerDedupeKeyTemplateEnv)) - maxAttemptsRaw := strings.TrimSpace(os.Getenv(triggerRetryMaxAttemptsEnv)) - baseDelayRaw := strings.TrimSpace(os.Getenv(triggerRetryBaseDelayEnv)) - maxDelayRaw := strings.TrimSpace(os.Getenv(triggerRetryMaxDelayEnv)) - backoff := strings.ToLower(strings.TrimSpace(os.Getenv(triggerRetryBackoffEnv))) - - if mode == "" && keyTemplate == "" && maxAttemptsRaw == "" && baseDelayRaw == "" && maxDelayRaw == "" && backoff == "" { //nolint:lll - return nil, nil - } - - policy := &triggerDeliveryPolicy{} - if mode != "" || keyTemplate != "" { - if mode == "" && keyTemplate != "" { - return nil, fmt.Errorf("%s requires %s=key", triggerDedupeKeyTemplateEnv, triggerDedupeModeEnv) - } - switch mode { - case "none", "token", "key": //nolint:goconst - default: - return nil, fmt.Errorf("%s must be one of none, token, key", triggerDedupeModeEnv) - } - policy.dedupe = &triggerDedupePolicy{ - mode: mode, - keyTemplate: keyTemplate, - } - } - - if maxAttemptsRaw != "" || baseDelayRaw != "" || maxDelayRaw != "" || backoff != "" { - retry := &triggerRetryPolicy{backoff: backoff} - if maxAttemptsRaw != "" { - val, err := strconv.Atoi(maxAttemptsRaw) - if err != nil || val < 0 { - return nil, fmt.Errorf("%s must be a non-negative integer", triggerRetryMaxAttemptsEnv) - } - retry.maxAttempts = val - } - if baseDelayRaw != "" { - parsed, err := parsePositiveDuration(baseDelayRaw) - if err != nil { - return nil, fmt.Errorf("%s invalid: %w", triggerRetryBaseDelayEnv, err) - } - retry.baseDelay = parsed - } - if maxDelayRaw != "" { - parsed, err := parsePositiveDuration(maxDelayRaw) - if err != nil { - return nil, fmt.Errorf("%s invalid: %w", triggerRetryMaxDelayEnv, err) - } - retry.maxDelay = parsed - } - if backoff != "" { - switch backoff { - case "exponential", "linear", "constant": - default: - return nil, fmt.Errorf("%s must be one of exponential, linear, constant", triggerRetryBackoffEnv) - } - } - policy.retry = retry - } - return policy, nil -} - -func resolveTriggerTokenForPolicy( - ctx context.Context, - clientNamespace string, - storyName string, - storyNamespace string, - inputs map[string]any, - policy *triggerDeliveryPolicy, - existingToken string, -) (string, error) { - if policy == nil || policy.dedupe == nil { - return existingToken, nil - } - - switch policy.dedupe.mode { - case "none", "": - return existingToken, nil - case "token": - if strings.TrimSpace(existingToken) == "" { - return "", fmt.Errorf("trigger token required by %s=token", triggerDedupeModeEnv) - } - return existingToken, nil - case "key": - if strings.TrimSpace(policy.dedupe.keyTemplate) == "" { - return "", fmt.Errorf("trigger key template required by %s=key", triggerDedupeModeEnv) - } - computed, err := computeTriggerKey(ctx, clientNamespace, storyName, storyNamespace, inputs, policy.dedupe.keyTemplate) - if err != nil { - return "", err - } - if existingToken != "" && existingToken != computed { - return "", fmt.Errorf("trigger token mismatch: provided token does not match %s output", triggerDedupeKeyTemplateEnv) - } - return computed, nil - default: - return "", fmt.Errorf("unsupported dedupe mode %q", policy.dedupe.mode) - } -} - -func computeTriggerKey( - ctx context.Context, - clientNamespace string, - storyName string, - storyNamespace string, - inputs map[string]any, - templateText string, -) (string, error) { - eval, err := templating.New(templating.Config{Deterministic: true, EvaluationTimeout: 250 * time.Millisecond}) - if err != nil { - return "", fmt.Errorf("init trigger template evaluator: %w", err) - } - defer eval.Close() - - impulseName := strings.TrimSpace(os.Getenv(contracts.ImpulseNameEnv)) - impulseNamespace := strings.TrimSpace(os.Getenv(contracts.ImpulseNamespaceEnv)) - if impulseNamespace == "" { - impulseNamespace = clientNamespace - } - targetNamespace := strings.TrimSpace(storyNamespace) - if targetNamespace == "" { - targetNamespace = clientNamespace - } - - vars := map[string]any{ - "inputs": inputs, - "story": map[string]any{ - "name": storyName, - "namespace": targetNamespace, - }, - "impulse": map[string]any{ - "name": impulseName, - "namespace": impulseNamespace, - }, - } - - resolved, err := eval.ResolveTemplateString(ctx, templateText, vars) - if err != nil { - return "", fmt.Errorf("resolve trigger key template: %w", err) - } - key := "" - switch v := resolved.(type) { - case string: - key = v - default: - encoded, err := json.Marshal(v) - if err != nil { - return "", fmt.Errorf("marshal trigger key: %w", err) - } - key = string(encoded) - } - key = strings.TrimSpace(key) - if key == "" { - return "", fmt.Errorf("trigger key resolved to empty value") - } - sum := sha256.Sum256([]byte(key)) - return hex.EncodeToString(sum[:]), nil -} - -func retryTriggerStory( - ctx context.Context, - policy *triggerRetryPolicy, - run func(context.Context) (*runsv1alpha1.StoryRun, error), -) (*runsv1alpha1.StoryRun, error) { - attempts := 1 - if policy != nil && policy.maxAttempts > 0 { - attempts = policy.maxAttempts - } - if attempts <= 1 { - return run(ctx) - } - - var lastErr error - for attempt := 1; attempt <= attempts; attempt++ { - if attempt > 1 { - delay := computeRetryDelay(policy, attempt-1) - if delay > 0 { - select { - case <-ctx.Done(): - return nil, ctx.Err() - case <-time.After(delay): - } - } - } - result, err := run(ctx) - if err == nil { - return result, nil - } - lastErr = err - if !isRetryableTriggerError(err) || attempt == attempts { - break - } - } - return nil, lastErr -} - -func isRetryableTriggerError(err error) bool { - if err == nil { - return false - } - if errors.Is(err, sdkerrors.ErrRetryable) { - return true - } - return errors.Is(classifyK8sError(err), sdkerrors.ErrRetryable) -} - -func computeRetryDelay(policy *triggerRetryPolicy, attempt int) time.Duration { - if policy == nil { - return 0 - } - base := policy.baseDelay - if base <= 0 { - base = time.Second - } - maxDelay := policy.maxDelay - if maxDelay <= 0 { - maxDelay = 5 * time.Minute // sensible cap to prevent overflow - } - delay := base //nolint:ineffassign - switch policy.backoff { - case "linear": - delay = base * time.Duration(attempt) - case "constant": - delay = base - default: - delay = base - if attempt > 1 { - // Cap the shift to prevent integer overflow (shift >= 63 wraps to negative). - shift := min(attempt-1, 62) - delay = base * time.Duration(1< maxDelay { - delay = maxDelay - } - return jitterDuration(delay) -} - -func parsePositiveDuration(raw string) (time.Duration, error) { - raw = strings.TrimSpace(raw) - if raw == "" { - return 0, fmt.Errorf("duration must be set") - } - parsed, err := time.ParseDuration(raw) - if err != nil { - return 0, fmt.Errorf("parse duration: %w", err) - } - if parsed <= 0 { - return 0, fmt.Errorf("duration must be positive") - } - return parsed, nil -} diff --git a/k8s/trigger_delivery_test.go b/k8s/trigger_delivery_test.go deleted file mode 100644 index 79f5db6..0000000 --- a/k8s/trigger_delivery_test.go +++ /dev/null @@ -1,99 +0,0 @@ -package k8s - -import ( - "context" - "errors" - "net/url" - "syscall" - "testing" - "time" - - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - sdkerrors "github.com/bubustack/bubu-sdk-go/pkg/errors" -) - -func TestClassifyK8sErrorRetryableTransportErrors(t *testing.T) { - tests := []struct { - name string - err error - }{ - { - name: "connection reset", - err: &url.Error{ - Op: "Post", - URL: "https://api.example.test/storyruns", - Err: syscall.ECONNRESET, - }, - }, - { - name: "connection refused", - err: &url.Error{ - Op: "Post", - URL: "https://api.example.test/storyruns", - Err: syscall.ECONNREFUSED, - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if err := classifyK8sError(tt.err); !errors.Is(err, sdkerrors.ErrRetryable) { - t.Fatalf("expected ErrRetryable, got %v", err) - } - }) - } -} - -func TestRetryTriggerStoryRetriesTransientTransportError(t *testing.T) { - policy := &triggerRetryPolicy{ - maxAttempts: 3, - baseDelay: time.Millisecond, - maxDelay: time.Millisecond, - backoff: "constant", - } - - attempts := 0 - expected := &runsv1alpha1.StoryRun{} - got, err := retryTriggerStory(context.Background(), policy, func(context.Context) (*runsv1alpha1.StoryRun, error) { - attempts++ - if attempts < 3 { - return nil, &url.Error{ - Op: "Post", - URL: "https://api.example.test/storyruns", - Err: syscall.ECONNRESET, - } - } - return expected, nil - }) - if err != nil { - t.Fatalf("retryTriggerStory() error = %v", err) - } - if got != expected { - t.Fatalf("retryTriggerStory() returned unexpected StoryRun pointer: got %+v want %+v", got, expected) - } - if attempts != 3 { - t.Fatalf("expected 3 attempts, got %d", attempts) - } -} - -func TestRetryTriggerStoryDoesNotRetryPermanentError(t *testing.T) { - policy := &triggerRetryPolicy{ - maxAttempts: 3, - baseDelay: time.Millisecond, - maxDelay: time.Millisecond, - backoff: "constant", - } - - attempts := 0 - permanentErr := errors.New("invalid trigger payload") - _, err := retryTriggerStory(context.Background(), policy, func(context.Context) (*runsv1alpha1.StoryRun, error) { - attempts++ - return nil, permanentErr - }) - if !errors.Is(err, permanentErr) { - t.Fatalf("expected permanent error to be returned, got %v", err) - } - if attempts != 1 { - t.Fatalf("expected permanent error to stop retries after 1 attempt, got %d", attempts) - } -} diff --git a/k8s/trigger_throttle.go b/k8s/trigger_throttle.go deleted file mode 100644 index 955fda8..0000000 --- a/k8s/trigger_throttle.go +++ /dev/null @@ -1,143 +0,0 @@ -package k8s - -import ( - "context" - "fmt" - "os" - "strconv" - "strings" - "sync" - "time" - - "golang.org/x/time/rate" -) - -type triggerThrottlePolicy struct { - ratePerSecond int - burst int - maxInFlight int -} - -type triggerThrottle struct { - limiter *rate.Limiter - sem chan struct{} -} - -var ( - throttleMu sync.Mutex - throttleKey string - throttleImpl *triggerThrottle -) - -func loadTriggerThrottlePolicyFromEnv() (*triggerThrottlePolicy, error) { - rateRaw := strings.TrimSpace(os.Getenv(triggerThrottleRateEnv)) - burstRaw := strings.TrimSpace(os.Getenv(triggerThrottleBurstEnv)) - maxRaw := strings.TrimSpace(os.Getenv(triggerThrottleMaxInFlight)) - if rateRaw == "" && burstRaw == "" && maxRaw == "" { - return nil, nil - } - policy := &triggerThrottlePolicy{} - if rateRaw != "" { - val, err := strconv.Atoi(rateRaw) - if err != nil || val < 0 { - return nil, fmt.Errorf("%s must be a non-negative integer", triggerThrottleRateEnv) - } - policy.ratePerSecond = val - } - if burstRaw != "" { - val, err := strconv.Atoi(burstRaw) - if err != nil || val < 0 { - return nil, fmt.Errorf("%s must be a non-negative integer", triggerThrottleBurstEnv) - } - policy.burst = val - } - if maxRaw != "" { - val, err := strconv.Atoi(maxRaw) - if err != nil || val < 0 { - return nil, fmt.Errorf("%s must be a non-negative integer", triggerThrottleMaxInFlight) - } - policy.maxInFlight = val - } - return policy, nil -} - -func getTriggerThrottle() (*triggerThrottle, error) { - policy, err := loadTriggerThrottlePolicyFromEnv() - if err != nil || policy == nil { - return nil, err - } - key := fmt.Sprintf("rate=%d;burst=%d;max=%d", policy.ratePerSecond, policy.burst, policy.maxInFlight) - throttleMu.Lock() - defer throttleMu.Unlock() - if throttleImpl != nil && throttleKey == key { - return throttleImpl, nil - } - throttleKey = key - throttleImpl = newTriggerThrottle(policy) - return throttleImpl, nil -} - -func newTriggerThrottle(policy *triggerThrottlePolicy) *triggerThrottle { - if policy == nil { - return nil - } - var limiter *rate.Limiter - if policy.ratePerSecond > 0 { - burst := policy.burst - if burst <= 0 { - burst = policy.ratePerSecond - } - limiter = rate.NewLimiter(rate.Limit(policy.ratePerSecond), burst) - } - var sem chan struct{} - if policy.maxInFlight > 0 { - sem = make(chan struct{}, policy.maxInFlight) - } - if limiter == nil && sem == nil { - return nil - } - return &triggerThrottle{limiter: limiter, sem: sem} -} - -func (t *triggerThrottle) Acquire(ctx context.Context) (bool, func(), error) { - if t == nil { - return false, func() {}, nil - } - waited := false - release := func() {} - if t.sem != nil { - select { - case t.sem <- struct{}{}: - release = func() { <-t.sem } - default: - waited = true - select { - case t.sem <- struct{}{}: - release = func() { <-t.sem } - case <-ctx.Done(): - return waited, nil, ctx.Err() - } - } - } - if t.limiter != nil { - res := t.limiter.Reserve() - if !res.OK() { - release() - return waited, nil, fmt.Errorf("trigger throttle reservation failed") - } - delay := res.Delay() - if delay > 0 { - waited = true - timer := time.NewTimer(delay) - defer timer.Stop() - select { - case <-timer.C: - case <-ctx.Done(): - res.Cancel() - release() - return waited, nil, ctx.Err() - } - } - } - return waited, release, nil -} diff --git a/k8s/trigger_throttle_test.go b/k8s/trigger_throttle_test.go deleted file mode 100644 index 5853165..0000000 --- a/k8s/trigger_throttle_test.go +++ /dev/null @@ -1,160 +0,0 @@ -package k8s - -import ( - "context" - "errors" - "testing" - "time" -) - -func TestLoadTriggerThrottlePolicyFromEnvEmpty(t *testing.T) { - t.Setenv(triggerThrottleRateEnv, "") - t.Setenv(triggerThrottleBurstEnv, "") - t.Setenv(triggerThrottleMaxInFlight, "") - policy, err := loadTriggerThrottlePolicyFromEnv() - if err != nil { - t.Fatalf("expected no error, got %v", err) - } - if policy != nil { - t.Fatalf("expected nil policy, got %#v", policy) - } -} - -func TestLoadTriggerThrottlePolicyFromEnvInvalid(t *testing.T) { - t.Setenv(triggerThrottleRateEnv, "-1") - if _, err := loadTriggerThrottlePolicyFromEnv(); err == nil { - t.Fatal("expected error for negative ratePerSecond") - } -} - -func TestNewTriggerThrottleDefaultsBurst(t *testing.T) { - policy := &triggerThrottlePolicy{ratePerSecond: 5} - throttle := newTriggerThrottle(policy) - if throttle == nil || throttle.limiter == nil { - t.Fatal("expected limiter to be created") - } - if got := throttle.limiter.Burst(); got != 5 { - t.Fatalf("expected burst to default to rate (5), got %d", got) - } -} - -func TestTriggerThrottleAcquireWaitsForSemaphoreRelease(t *testing.T) { - throttle := newTriggerThrottle(&triggerThrottlePolicy{maxInFlight: 1}) - if throttle == nil { - t.Fatal("expected throttle to be created") - } - - waited, release, err := throttle.Acquire(context.Background()) - if err != nil { - t.Fatalf("first Acquire() failed: %v", err) - } - if waited { - t.Fatal("first Acquire() should not report waiting") - } - if release == nil { - t.Fatal("first Acquire() should return release func") - } - - type acquireResult struct { - waited bool - release func() - err error - } - resultCh := make(chan acquireResult, 1) - go func() { - waited, release, err := throttle.Acquire(context.Background()) - resultCh <- acquireResult{waited: waited, release: release, err: err} - }() - - select { - case result := <-resultCh: - if result.release != nil { - result.release() - } - t.Fatal("second Acquire() should block until the first release") - case <-time.After(20 * time.Millisecond): - } - - release() - - select { - case result := <-resultCh: - if result.err != nil { - t.Fatalf("second Acquire() failed: %v", result.err) - } - if !result.waited { - t.Fatal("second Acquire() should report waiting under semaphore contention") - } - if result.release == nil { - t.Fatal("second Acquire() should return release func") - } - result.release() - case <-time.After(200 * time.Millisecond): - t.Fatal("second Acquire() did not resume after release") - } - - if got := len(throttle.sem); got != 0 { - t.Fatalf("expected semaphore to be fully released, got %d in-flight", got) - } -} - -func TestTriggerThrottleAcquireCancelsWhileWaitingOnSemaphore(t *testing.T) { - throttle := newTriggerThrottle(&triggerThrottlePolicy{maxInFlight: 1}) - if throttle == nil { - t.Fatal("expected throttle to be created") - } - - _, release, err := throttle.Acquire(context.Background()) - if err != nil { - t.Fatalf("first Acquire() failed: %v", err) - } - defer release() - - ctx, cancel := context.WithTimeout(context.Background(), 20*time.Millisecond) - defer cancel() - - waited, release2, err := throttle.Acquire(ctx) - if !waited { - t.Fatal("Acquire() should report waiting when blocked on semaphore") - } - if release2 != nil { - t.Fatal("Acquire() should not return release func on cancellation") - } - if !errors.Is(err, context.DeadlineExceeded) { - t.Fatalf("expected context deadline error, got %v", err) - } -} - -func TestTriggerThrottleAcquireCancelsDuringLimiterWaitAndReleasesSemaphore(t *testing.T) { - throttle := newTriggerThrottle(&triggerThrottlePolicy{ - ratePerSecond: 1, - burst: 1, - maxInFlight: 1, - }) - if throttle == nil { - t.Fatal("expected throttle to be created") - } - - _, release, err := throttle.Acquire(context.Background()) - if err != nil { - t.Fatalf("initial Acquire() failed: %v", err) - } - release() - - ctx, cancel := context.WithTimeout(context.Background(), 20*time.Millisecond) - defer cancel() - - waited, release2, err := throttle.Acquire(ctx) - if !waited { - t.Fatal("Acquire() should report waiting when limiter delay is non-zero") - } - if release2 != nil { - t.Fatal("Acquire() should not return release func on limiter cancellation") - } - if !errors.Is(err, context.DeadlineExceeded) { - t.Fatalf("expected context deadline error, got %v", err) - } - if got := len(throttle.sem); got != 0 { - t.Fatalf("expected limiter cancellation to release semaphore, got %d in-flight", got) - } -} diff --git a/logs.go b/logs.go deleted file mode 100644 index 2a626ab..0000000 --- a/logs.go +++ /dev/null @@ -1,409 +0,0 @@ -package sdk - -import ( - "context" - "encoding/json" - "errors" - "fmt" - "io" - "log/slog" - "os" - "path" - "path/filepath" - "regexp" - "strings" - "sync" - "time" - - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - "github.com/bubustack/bobrapet/pkg/storage" - "github.com/bubustack/bubu-sdk-go/k8s" - sdkenv "github.com/bubustack/bubu-sdk-go/pkg/env" - "github.com/bubustack/core/contracts" - k8sruntime "k8s.io/apimachinery/pkg/runtime" -) - -// ErrLogsUnavailable indicates that logs cannot be published (for example, when -// not running inside a StepRun or when storage is disabled). Callers may treat -// this as a soft failure. -var ErrLogsUnavailable = errors.New("log publishing unavailable: not running inside a StepRun or storage disabled") - -const ( - defaultLogContentType = "text/plain; charset=utf-8" - logPatchTimeout = 5 * time.Second - defaultLogCaptureSize = 1024 * 1024 // 1 MiB - defaultLogFileMaxSize = 5 * 1024 * 1024 - logAllowedRootsEnv = "BUBU_LOG_ALLOWED_ROOTS" - logFileMaxBytesEnv = "BUBU_LOG_FILE_MAX_BYTES" -) - -var ( - logPublisherMu sync.Mutex - logPublisherInst *logPublisher - logPublisherErr error // only set for permanent errors (missing env) - logClientFactory = func() (logPatcher, error) { return k8s.SharedClient() } - logSegmentSanitize = regexp.MustCompile(`[^a-zA-Z0-9._-]+`) -) - -type logPatcher interface { - PatchStepRunStatus(ctx context.Context, stepRunName string, status runsv1alpha1.StepRunStatus) error -} - -type logPublisher struct { - client logPatcher - stepRunID string - namespace string - storyRun string - storyName string - stepName string - engram string -} - -type logCapture struct { - buf *cappedBuffer -} - -func (c *logCapture) Drain() []byte { - if c == nil || c.buf == nil { - return nil - } - return c.buf.Drain() -} - -type cappedBuffer struct { - mu sync.Mutex - max int - data []byte -} - -func newCappedBuffer(max int) *cappedBuffer { - if max <= 0 { - max = defaultLogCaptureSize - } - return &cappedBuffer{max: max} -} - -func (b *cappedBuffer) Write(p []byte) (int, error) { - if b == nil || len(p) == 0 { - return len(p), nil - } - b.mu.Lock() - defer b.mu.Unlock() - - if b.max <= 0 { - return len(p), nil - } - if len(p) >= b.max { - b.data = append(b.data[:0], p[len(p)-b.max:]...) - return len(p), nil - } - if len(b.data)+len(p) > b.max { - overflow := len(b.data) + len(p) - b.max - b.data = append(b.data[overflow:], p...) - return len(p), nil - } - b.data = append(b.data, p...) - return len(p), nil -} - -func (b *cappedBuffer) Drain() []byte { - if b == nil { - return nil - } - b.mu.Lock() - defer b.mu.Unlock() - if len(b.data) == 0 { - return nil - } - out := make([]byte, len(b.data)) - copy(out, b.data) - b.data = b.data[:0] - return out -} - -func newDefaultLoggerWithCapture() (*slog.Logger, *logCapture) { - opts := &slog.HandlerOptions{} - if isDebugEnabled() { - opts.Level = slog.LevelDebug - } - buf := newCappedBuffer(defaultLogCaptureSize) - writer := io.MultiWriter(os.Stdout, buf) - logger := slog.New(slog.NewJSONHandler(writer, opts)) - return logger, &logCapture{buf: buf} -} - -func publishCapturedLogs(ctx context.Context) { - capture := logCaptureFromContext(ctx) - if capture == nil { - return - } - payload := capture.Drain() - if len(payload) == 0 { - return - } - if err := PublishLogsWithContentType(ctx, payload, "application/x-ndjson"); err != nil && !errors.Is(err, ErrLogsUnavailable) { //nolint:lll - LoggerFromContext(ctx).Warn("Failed to publish captured logs", "error", err) - } -} - -// PublishLogs uploads log bytes to storage (when enabled) and patches StepRun.status.logs -// with a storage reference. If storage is disabled, it returns ErrLogsUnavailable. -func PublishLogs(ctx context.Context, payload []byte) error { - return PublishLogsWithContentType(ctx, payload, "") -} - -// PublishLogsWithContentType is like PublishLogs but lets callers set a content type. -func PublishLogsWithContentType(ctx context.Context, payload []byte, contentType string) error { - if len(payload) == 0 { - return nil - } - publisher, err := getLogPublisher() - if err != nil { - return err - } - sm, err := storage.SharedManager(ctx) - if err != nil { - return fmt.Errorf("failed to create storage manager: %w", err) - } - if sm == nil || sm.GetStore() == nil { - return ErrLogsUnavailable - } - objectPath := buildLogStoragePath(publisher, time.Now().UTC()) - if objectPath == "" { - return fmt.Errorf("failed to build log storage path") - } - ctype := strings.TrimSpace(contentType) - if ctype == "" { - ctype = defaultLogContentType - } - if err := sm.WriteBlob(ctx, objectPath, ctype, payload); err != nil { - return err - } - - ref := map[string]any{ - storage.StorageRefKey: objectPath, - storage.StorageContentTypeKey: ctype, - } - if schema, version := logSchemaMetadata(publisher); schema != "" { - ref[storage.StorageSchemaKey] = schema - if version != "" { - ref[storage.StorageSchemaVersionKey] = version - } - } - - raw, err := json.Marshal(ref) - if err != nil { - return fmt.Errorf("failed to marshal log reference: %w", err) - } - patch := runsv1alpha1.StepRunStatus{ - Logs: &k8sruntime.RawExtension{Raw: raw}, - } - ctx, cancel := context.WithTimeout(ctx, logPatchTimeout) - defer cancel() - return publisher.client.PatchStepRunStatus(ctx, publisher.stepRunID, patch) -} - -// PublishLogFile reads the provided file and publishes its contents as logs. -func PublishLogFile(ctx context.Context, path string, contentType string) error { //nolint:revive - data, err := readAllowedLogFile(path) - if err != nil { - return err - } - return PublishLogsWithContentType(ctx, data, contentType) -} - -func readAllowedLogFile(path string) ([]byte, error) { //nolint:revive - resolvedPath, err := resolveAllowedLogFilePath(path) - if err != nil { - return nil, err - } - info, err := os.Stat(resolvedPath) - if err != nil { - return nil, fmt.Errorf("failed to stat log file: %w", err) - } - if !info.Mode().IsRegular() { - return nil, fmt.Errorf("log file path must reference a regular file") - } - maxBytes := resolveLogFileMaxBytes() - if maxBytes > 0 && info.Size() > int64(maxBytes) { - return nil, fmt.Errorf("log file exceeds max bytes (%d > %d)", info.Size(), maxBytes) - } - data, err := os.ReadFile(resolvedPath) - if err != nil { - return nil, fmt.Errorf("failed to read log file: %w", err) - } - return data, nil -} - -func resolveAllowedLogFilePath(path string) (string, error) { //nolint:revive - cleanPath := strings.TrimSpace(path) - if cleanPath == "" { - return "", fmt.Errorf("log file path is required") - } - absPath, err := filepath.Abs(cleanPath) - if err != nil { - return "", fmt.Errorf("failed to resolve log file path: %w", err) - } - resolvedPath, err := filepath.EvalSymlinks(absPath) - if err != nil { - return "", fmt.Errorf("failed to resolve log file path: %w", err) - } - for _, root := range resolveAllowedLogRoots() { - if pathWithinRoot(resolvedPath, root) { - return resolvedPath, nil - } - } - return "", fmt.Errorf("log file path %q is outside allowed roots", cleanPath) -} - -func resolveAllowedLogRoots() []string { - rawRoots := strings.TrimSpace(os.Getenv(logAllowedRootsEnv)) - var candidates []string - if rawRoots == "" { - candidates = defaultLogAllowedRoots() - } else { - candidates = strings.Split(rawRoots, string(os.PathListSeparator)) - } - - roots := make([]string, 0, len(candidates)) - seen := make(map[string]struct{}, len(candidates)) - for _, candidate := range candidates { - root := canonicalizeLogRoot(candidate) - if root == "" { - continue - } - if _, ok := seen[root]; ok { - continue - } - seen[root] = struct{}{} - roots = append(roots, root) - } - return roots -} - -func defaultLogAllowedRoots() []string { - roots := []string{"/tmp", "/var/tmp", "/var/log", os.TempDir()} - if wd, err := os.Getwd(); err == nil && strings.TrimSpace(wd) != "" { - roots = append(roots, wd) - } - return roots -} - -func canonicalizeLogRoot(path string) string { //nolint:revive - cleanPath := strings.TrimSpace(path) - if cleanPath == "" { - return "" - } - absPath, err := filepath.Abs(cleanPath) - if err != nil { - return "" - } - resolvedPath, err := filepath.EvalSymlinks(absPath) - if err == nil { - return filepath.Clean(resolvedPath) - } - return filepath.Clean(absPath) -} - -func pathWithinRoot(path string, root string) bool { //nolint:revive - if strings.TrimSpace(path) == "" || strings.TrimSpace(root) == "" { - return false - } - rel, err := filepath.Rel(root, path) - if err != nil { - return false - } - return rel == "." || (rel != ".." && !strings.HasPrefix(rel, ".."+string(os.PathSeparator))) -} - -func resolveLogFileMaxBytes() int { - return sdkenv.GetInt(logFileMaxBytesEnv, defaultLogFileMaxSize) -} - -func getLogPublisher() (*logPublisher, error) { - logPublisherMu.Lock() - defer logPublisherMu.Unlock() - if logPublisherInst != nil { - return logPublisherInst, nil - } - if logPublisherErr != nil { - // Permanent error (e.g. missing env) — don't retry. - return nil, logPublisherErr - } - stepRunID := strings.TrimSpace(os.Getenv(contracts.StepRunNameEnv)) - if stepRunID == "" { - logPublisherErr = ErrLogsUnavailable - return nil, logPublisherErr - } - client, err := logClientFactory() - if err != nil { - // Transient error — don't cache, allow retry on next call. - return nil, fmt.Errorf("failed to initialize log client: %w", err) - } - logPublisherInst = &logPublisher{ - client: client, - stepRunID: stepRunID, - namespace: k8s.ResolvePodNamespace(), - storyRun: strings.TrimSpace(os.Getenv(contracts.StoryRunIDEnv)), - storyName: strings.TrimSpace(os.Getenv(contracts.StoryNameEnv)), - stepName: strings.TrimSpace(os.Getenv(contracts.StepNameEnv)), - engram: strings.TrimSpace(os.Getenv(contracts.EngramNameEnv)), - } - return logPublisherInst, nil -} - -func buildLogStoragePath(p *logPublisher, ts time.Time) string { - if p == nil || strings.TrimSpace(p.stepRunID) == "" { - return "" - } - segments := []string{"logs"} - if v := sanitizeLogSegment(p.storyRun); v != "" { - segments = append(segments, v) - } - if v := sanitizeLogSegment(p.stepName); v != "" { - segments = append(segments, v) - } - if v := sanitizeLogSegment(p.stepRunID); v != "" { - segments = append(segments, v) - } - filename := fmt.Sprintf("%d.log", ts.UnixNano()) - segments = append(segments, filename) - key := path.Join(segments...) - return storage.NamespacedKey(strings.TrimSpace(p.namespace), key) -} - -func sanitizeLogSegment(raw string) string { - trimmed := strings.TrimSpace(raw) - if trimmed == "" { - return "" - } - cleaned := logSegmentSanitize.ReplaceAllString(trimmed, "-") - cleaned = strings.Trim(cleaned, "-_.") - return cleaned -} - -func logSchemaMetadata(p *logPublisher) (string, string) { - if p == nil { - return "", "" - } - namespace := strings.TrimSpace(p.namespace) - storyName := strings.TrimSpace(p.storyName) - stepName := strings.TrimSpace(p.stepName) - engramName := strings.TrimSpace(p.engram) - - var schema string - switch { - case namespace != "" && engramName != "": - schema = fmt.Sprintf("bubu://engram/%s/%s/logs", namespace, engramName) - case namespace != "" && storyName != "" && stepName != "": - schema = fmt.Sprintf("bubu://story/%s/%s/steps/%s/logs", namespace, storyName, stepName) - case storyName != "" && stepName != "": - schema = fmt.Sprintf("bubu://story/%s/steps/%s/logs", storyName, stepName) - } - - version := strings.TrimSpace(os.Getenv(contracts.EngramVersionEnv)) - if version == "" { - version = strings.TrimSpace(os.Getenv(contracts.StoryVersionEnv)) - } - return schema, version -} diff --git a/logs_test.go b/logs_test.go deleted file mode 100644 index 1f8a519..0000000 --- a/logs_test.go +++ /dev/null @@ -1,64 +0,0 @@ -package sdk - -import ( - "os" - "path/filepath" - "strings" - "testing" -) - -func TestReadAllowedLogFileRejectsPathOutsideConfiguredRoots(t *testing.T) { - allowedDir := t.TempDir() - disallowedDir := t.TempDir() - path := filepath.Join(disallowedDir, "app.log") - if err := os.WriteFile(path, []byte("hello"), 0o600); err != nil { - t.Fatalf("write log file: %v", err) - } - - t.Setenv(logAllowedRootsEnv, allowedDir) - - _, err := readAllowedLogFile(path) - if err == nil { - t.Fatal("expected path outside configured roots to fail") - } - if !strings.Contains(err.Error(), "outside allowed roots") { - t.Fatalf("unexpected error: %v", err) - } -} - -func TestReadAllowedLogFileRejectsOversizedFiles(t *testing.T) { - root := t.TempDir() - path := filepath.Join(root, "large.log") - if err := os.WriteFile(path, []byte("abcdef"), 0o600); err != nil { - t.Fatalf("write log file: %v", err) - } - - t.Setenv(logAllowedRootsEnv, root) - t.Setenv(logFileMaxBytesEnv, "4") - - _, err := readAllowedLogFile(path) - if err == nil { - t.Fatal("expected oversized log file to fail") - } - if !strings.Contains(err.Error(), "exceeds max bytes") { - t.Fatalf("unexpected error: %v", err) - } -} - -func TestReadAllowedLogFileAllowsConfiguredRoot(t *testing.T) { - root := t.TempDir() - path := filepath.Join(root, "app.log") - if err := os.WriteFile(path, []byte("hello"), 0o600); err != nil { - t.Fatalf("write log file: %v", err) - } - - t.Setenv(logAllowedRootsEnv, root) - - data, err := readAllowedLogFile(path) - if err != nil { - t.Fatalf("readAllowedLogFile returned error: %v", err) - } - if string(data) != "hello" { - t.Fatalf("unexpected content: %q", string(data)) - } -} diff --git a/media/media.go b/media/media.go deleted file mode 100644 index d83c9d8..0000000 --- a/media/media.go +++ /dev/null @@ -1,155 +0,0 @@ -// Package media provides helpers for offloading large streaming payloads to -// shared object storage while keeping small payloads inline. -package media - -import ( - "context" - crypto_rand "crypto/rand" - "encoding/hex" - "fmt" - "os" - "path" - "regexp" - "strconv" - "strings" - "time" - - "github.com/bubustack/bobrapet/pkg/storage" - "github.com/bubustack/core/contracts" -) - -const defaultInlineLimit = 48 * 1024 // 48 KiB keeps short bursts inline for real-time UX - -// StorageReference describes an object persisted to the shared storage backend. -type StorageReference struct { - // Path is the object path inside the configured shared storage backend. - Path string `json:"path"` - // ContentType is the MIME type recorded for the stored object. - ContentType string `json:"contentType,omitempty"` - // SizeBytes is the stored blob size when known. - SizeBytes int `json:"sizeBytes,omitempty"` -} - -// WriteOptions control how object paths are composed when offloading payloads. -type WriteOptions struct { - // Namespace scopes the object path to the owning runtime namespace. - Namespace string - // StoryRun scopes the object path to the owning StoryRun. - StoryRun string - // Step scopes the object path to the owning step. - Step string - // Scope appends additional caller-defined path segments. - Scope []string - // ContentType records the MIME type written for the object. - ContentType string - // InlineLimit overrides the default inline/offload threshold. - InlineLimit int -} - -// MaybeOffloadBlob stores the provided data if it exceeds the inline limit. -// Returns a storage reference when offload happens; otherwise nil. -func MaybeOffloadBlob( - ctx context.Context, - sm *storage.StorageManager, - data []byte, - opts WriteOptions, -) (*StorageReference, error) { - if sm == nil || sm.GetStore() == nil || len(data) == 0 { - return nil, nil - } - limit := inlineLimit(opts.InlineLimit) - if len(data) <= limit { - return nil, nil - } - - contentType := firstNonEmpty(opts.ContentType, "application/octet-stream") - objectPath := buildPath(opts, time.Now().UTC()) - if err := sm.WriteBlob(ctx, objectPath, contentType, data); err != nil { - return nil, err - } - - return &StorageReference{ - Path: objectPath, - ContentType: contentType, - SizeBytes: len(data), - }, nil -} - -// ReadBlob loads raw bytes for the given reference. -func ReadBlob(ctx context.Context, sm *storage.StorageManager, ref *StorageReference) ([]byte, error) { - if ref == nil || strings.TrimSpace(ref.Path) == "" { - return nil, fmt.Errorf("storage reference missing path") - } - if sm == nil || sm.GetStore() == nil { - return nil, fmt.Errorf("storage is disabled") - } - return sm.ReadBlob(ctx, ref.Path) -} - -func inlineLimit(override int) int { - if override > 0 { - return override - } - if v := os.Getenv(contracts.MediaInlineSizeEnv); v != "" { - if parsed, err := strconv.Atoi(v); err == nil && parsed > 0 { - return parsed - } - } - if v := os.Getenv(contracts.MaxInlineSizeEnv); v != "" { - if parsed, err := strconv.Atoi(v); err == nil && parsed > 0 { - return parsed - } - } - return defaultInlineLimit -} - -func buildPath(opts WriteOptions, ts time.Time) string { - segments := []string{"streams"} - for _, seg := range []string{opts.Namespace, opts.StoryRun, opts.Step} { - if sanitized := sanitizeSegment(seg); sanitized != "" { - segments = append(segments, sanitized) - } - } - for _, scope := range opts.Scope { - if sanitized := sanitizeSegment(scope); sanitized != "" { - segments = append(segments, sanitized) - } - } - segments = append(segments, ts.Format("2006/01/02")) - filename := fmt.Sprintf("%d-%06d-%s.bin", ts.Unix(), ts.Nanosecond()/1000, randHexSuffix(4)) - segments = append(segments, filename) - return path.Join(segments...) -} - -var sanitizeRe = regexp.MustCompile(`[^a-zA-Z0-9._-]+`) - -func sanitizeSegment(raw string) string { - trimmed := strings.TrimSpace(raw) - if trimmed == "" { - return "" - } - cleaned := sanitizeRe.ReplaceAllString(trimmed, "-") - cleaned = strings.Trim(cleaned, "-_.") - if cleaned == "" { - return "" - } - if len(cleaned) > 64 { - cleaned = cleaned[:64] - } - return strings.ToLower(cleaned) -} - -func randHexSuffix(n int) string { - b := make([]byte, n) - _, _ = crypto_rand.Read(b) - return hex.EncodeToString(b) -} - -func firstNonEmpty(values ...string) string { - for _, v := range values { - if strings.TrimSpace(v) != "" { - return v - } - } - return "" -} diff --git a/media/media_test.go b/media/media_test.go deleted file mode 100644 index 18f40d1..0000000 --- a/media/media_test.go +++ /dev/null @@ -1,151 +0,0 @@ -package media - -import ( - "strings" - "testing" - "time" -) - -func TestSanitizeSegment(t *testing.T) { - tests := []struct { - name string - in string - want string - }{ - {"empty", "", ""}, - {"whitespace only", " ", ""}, - {"simple", "hello", "hello"}, - {"uppercase", "Hello", "hello"}, - {"special chars", "foo/bar:baz", "foo-bar-baz"}, - {"unicode", "cafe\u0301", "cafe"}, - {"leading special", "---foo", "foo"}, - {"trailing special", "foo---", "foo"}, - {"all special", "@#$%", ""}, - {"dots and dashes", "a.b-c_d", "a.b-c_d"}, - {"long string truncated", strings.Repeat("a", 100), strings.Repeat("a", 64)}, - {"mixed", " My Story Run! ", "my-story-run"}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := sanitizeSegment(tt.in) - if got != tt.want { - t.Errorf("sanitizeSegment(%q) = %q, want %q", tt.in, got, tt.want) - } - }) - } -} - -func TestBuildPath(t *testing.T) { - ts := time.Date(2025, 6, 15, 10, 30, 0, 500000000, time.UTC) - - tests := []struct { - name string - opts WriteOptions - contains []string - }{ - { - name: "full scope", - opts: WriteOptions{Namespace: "ns1", StoryRun: "sr1", Step: "step1"}, - contains: []string{ - "streams/", "ns1/", "sr1/", "step1/", "2025/06/15/", - }, - }, - { - name: "empty scope", - opts: WriteOptions{}, - contains: []string{ - "streams/", "2025/06/15/", - }, - }, - { - name: "with extra scope", - opts: WriteOptions{Scope: []string{"transport-a", "partition-0"}}, - contains: []string{ - "streams/", "transport-a/", "partition-0/", "2025/06/15/", - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - p := buildPath(tt.opts, ts) - for _, sub := range tt.contains { - if !strings.Contains(p, sub) { - t.Errorf("buildPath() = %q, missing %q", p, sub) - } - } - if strings.Contains(p, "\\") { - t.Errorf("buildPath() = %q, contains backslash", p) - } - if !strings.HasSuffix(p, ".bin") { - t.Errorf("buildPath() = %q, should end with .bin", p) - } - }) - } -} - -func TestBuildPathUniqueness(t *testing.T) { - ts := time.Date(2025, 6, 15, 10, 30, 0, 500000000, time.UTC) - opts := WriteOptions{Namespace: "ns", StoryRun: "sr", Step: "s"} - - seen := make(map[string]bool) - for i := range 100 { - p := buildPath(opts, ts) - if seen[p] { - t.Fatalf("collision on iteration %d: %q", i, p) - } - seen[p] = true - } -} - -func TestRandHexSuffix(t *testing.T) { - s := randHexSuffix(4) - if len(s) != 8 { - t.Errorf("randHexSuffix(4) length = %d, want 8", len(s)) - } - s2 := randHexSuffix(4) - if s == s2 { - t.Errorf("two calls returned same value: %q", s) - } -} - -func TestFirstNonEmpty(t *testing.T) { - tests := []struct { - name string - values []string - want string - }{ - {"first", []string{"a", "b"}, "a"}, - {"skip empty", []string{"", "b"}, "b"}, - {"skip whitespace", []string{" ", "b"}, "b"}, - {"all empty", []string{"", ""}, ""}, - {"none", nil, ""}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := firstNonEmpty(tt.values...) - if got != tt.want { - t.Errorf("firstNonEmpty(%v) = %q, want %q", tt.values, got, tt.want) - } - }) - } -} - -func TestInlineLimit(t *testing.T) { - tests := []struct { - name string - override int - want int - }{ - {"default", 0, defaultInlineLimit}, - {"override", 1024, 1024}, - {"negative ignored", -1, defaultInlineLimit}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := inlineLimit(tt.override) - if got != tt.want { - t.Errorf("inlineLimit(%d) = %d, want %d", tt.override, got, tt.want) - } - }) - } -} diff --git a/pkg/env/env.go b/pkg/env/env.go deleted file mode 100644 index e0b97b1..0000000 --- a/pkg/env/env.go +++ /dev/null @@ -1,57 +0,0 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -// Package env provides small helpers for parsing validated environment -// overrides used across the SDK runtime. -package env - -import ( - "log/slog" - "os" - "strconv" - "strings" - "time" -) - -// GetDuration parses a duration from an environment variable, returning defaultValue if unset or invalid. -func GetDuration(key string, defaultValue time.Duration) time.Duration { - valStr := strings.TrimSpace(os.Getenv(key)) - if valStr == "" { - return defaultValue - } - val, err := time.ParseDuration(valStr) - if err != nil || val <= 0 { - slog.Warn("ignoring invalid env var duration, using default", - "key", key, "value", valStr, "default", defaultValue) - return defaultValue - } - return val -} - -// GetInt parses an integer from an environment variable, returning defaultValue if unset or invalid. -func GetInt(key string, defaultValue int) int { - valStr := strings.TrimSpace(os.Getenv(key)) - if valStr == "" { - return defaultValue - } - val, err := strconv.Atoi(valStr) - if err != nil || val <= 0 { - slog.Warn("ignoring invalid env var integer, using default", - "key", key, "value", valStr, "default", defaultValue) - return defaultValue - } - return val -} diff --git a/pkg/env/env_test.go b/pkg/env/env_test.go deleted file mode 100644 index a888548..0000000 --- a/pkg/env/env_test.go +++ /dev/null @@ -1,81 +0,0 @@ -package env - -import ( - "bytes" - "log/slog" - "strings" - "testing" - "time" -) - -func TestGetDuration(t *testing.T) { - const key = "TEST_DURATION_ENV" - defaultValue := 5 * time.Second - - t.Run("unset uses default", func(t *testing.T) { - t.Setenv(key, "") - if got := GetDuration(key, defaultValue); got != defaultValue { - t.Fatalf("GetDuration() = %s, want %s", got, defaultValue) - } - }) - - t.Run("valid duration is parsed", func(t *testing.T) { - t.Setenv(key, "150ms") - if got := GetDuration(key, defaultValue); got != 150*time.Millisecond { - t.Fatalf("GetDuration() = %s, want 150ms", got) - } - }) - - t.Run("invalid duration warns and falls back", func(t *testing.T) { - t.Setenv(key, "not-a-duration") - - var buf bytes.Buffer - prev := slog.Default() - logger := slog.New(slog.NewJSONHandler(&buf, &slog.HandlerOptions{Level: slog.LevelDebug})) - slog.SetDefault(logger) - defer slog.SetDefault(prev) - - if got := GetDuration(key, defaultValue); got != defaultValue { - t.Fatalf("GetDuration() = %s, want default %s", got, defaultValue) - } - if out := buf.String(); out == "" || !strings.Contains(out, key) { - t.Fatalf("expected warning log for invalid duration, got %q", out) - } - }) -} - -func TestGetInt(t *testing.T) { - const key = "TEST_INT_ENV" - defaultValue := 5 - - t.Run("unset uses default", func(t *testing.T) { - t.Setenv(key, "") - if got := GetInt(key, defaultValue); got != defaultValue { - t.Fatalf("GetInt() = %d, want %d", got, defaultValue) - } - }) - - t.Run("valid int is parsed", func(t *testing.T) { - t.Setenv(key, "42") - if got := GetInt(key, defaultValue); got != 42 { - t.Fatalf("GetInt() = %d, want 42", got) - } - }) - - t.Run("invalid int warns and falls back", func(t *testing.T) { - t.Setenv(key, "-1") - - var buf bytes.Buffer - prev := slog.Default() - logger := slog.New(slog.NewJSONHandler(&buf, &slog.HandlerOptions{Level: slog.LevelDebug})) - slog.SetDefault(logger) - defer slog.SetDefault(prev) - - if got := GetInt(key, defaultValue); got != defaultValue { - t.Fatalf("GetInt() = %d, want default %d", got, defaultValue) - } - if out := buf.String(); out == "" || !strings.Contains(out, key) { - t.Fatalf("expected warning log for invalid int, got %q", out) - } - }) -} diff --git a/pkg/errors/errors.go b/pkg/errors/errors.go index 7563f53..02d7008 100644 --- a/pkg/errors/errors.go +++ b/pkg/errors/errors.go @@ -1,34 +1,10 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - package errors import "errors" -// ErrRetryable marks an operation failure as safe to retry. -var ErrRetryable = errors.New("retryable") - -// ErrConflict marks an optimistic-concurrency or resource-version conflict. -var ErrConflict = errors.New("conflict") - -// ErrNotFound marks that the requested resource or object does not exist. -var ErrNotFound = errors.New("not found") - -// ErrNilContext marks that a required context argument was nil. -var ErrNilContext = errors.New("nil context") - -// ErrInvalidTransition marks that a requested state transition is not allowed. -var ErrInvalidTransition = errors.New("invalid phase transition") +var ( + ErrRetryable = errors.New("retryable") + ErrConflict = errors.New("conflict") + ErrNotFound = errors.New("not found") + ErrNilContext = errors.New("nil context") +) diff --git a/pkg/errors/errors_test.go b/pkg/errors/errors_test.go deleted file mode 100644 index 29ac909..0000000 --- a/pkg/errors/errors_test.go +++ /dev/null @@ -1,32 +0,0 @@ -package errors - -import ( - stderrors "errors" - "fmt" - "testing" -) - -func TestSentinelIdentity(t *testing.T) { - tests := []struct { - name string - err error - }{ - {name: "retryable", err: ErrRetryable}, - {name: "conflict", err: ErrConflict}, - {name: "not found", err: ErrNotFound}, - {name: "nil context", err: ErrNilContext}, - {name: "invalid transition", err: ErrInvalidTransition}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if !stderrors.Is(tt.err, tt.err) { - t.Fatalf("expected sentinel %q to match itself via errors.Is", tt.name) - } - wrapped := fmt.Errorf("wrap: %w", tt.err) - if !stderrors.Is(wrapped, tt.err) { - t.Fatalf("expected wrapped sentinel %q to remain detectable via errors.Is", tt.name) - } - }) - } -} diff --git a/pkg/kube/apply/apply.go b/pkg/kube/apply/apply.go new file mode 100644 index 0000000..8ab7316 --- /dev/null +++ b/pkg/kube/apply/apply.go @@ -0,0 +1,23 @@ +package apply + +import ( + "context" + + sdkerrors "github.com/bubustack/bubu-sdk-go/pkg/errors" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// FieldManager is the stable SSA field manager for this SDK. +const FieldManager = "bubu-sdk-go" + +// Apply performs Server-Side Apply with a stable field manager. +func Apply(ctx context.Context, c client.Client, obj client.Object, force bool) error { + if ctx == nil { + return sdkerrors.ErrNilContext + } + obj.SetManagedFields(nil) + return c.Patch(ctx, obj, client.Apply, &client.PatchOptions{ + FieldManager: FieldManager, + Force: &force, + }) +} diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index f741b30..e4b3b5e 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -1,39 +1,17 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - // Package metrics provides OpenTelemetry-based observability for the SDK and allows // developers to register custom metrics for their engrams and impulses. package metrics import ( "context" - "errors" - "fmt" - "log" - "sync" - "github.com/bubustack/bubu-sdk-go/pkg/observability" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" - metricnoop "go.opentelemetry.io/otel/metric/noop" ) var ( - meter metric.Meter = metricnoop.NewMeterProvider().Meter("bubu-sdk") + meter metric.Meter // SDK built-in metrics hydrationSizeBytes metric.Int64Histogram @@ -49,187 +27,94 @@ var ( clientBufferFlushes metric.Int64Counter // Streaming reconnect metrics - streamReconnectAttempts metric.Int64Counter - streamReconnectFailures metric.Int64Counter - streamBackpressureTimeouts metric.Int64Counter - - metricsDisabled bool - - clientBufferObserversMu sync.RWMutex - clientBufferObserverSeq int64 - clientBufferSizeObservers = make(map[int64]func() float64) - clientBufferBytesObservers = make(map[int64]func() float64) + streamReconnectAttempts metric.Int64Counter + streamReconnectFailures metric.Int64Counter ) func init() { - if err := initializeMetrics(); err != nil { - log.Printf("bubu sdk: metrics disabled during initialization: %v", err) - } -} + meter = otel.Meter("bubu-sdk") -func initializeMetrics() error { - if !observability.MetricsEnabled() { - disableMetrics() - return nil - } - return initializeMetricsWithMeter(otel.Meter("bubu-sdk")) -} - -func initializeMetricsWithMeter(m metric.Meter) error { - resetMetricInstruments() - meter = m - metricsDisabled = false - - var errs []error var err error hydrationSizeBytes, err = meter.Int64Histogram( "bubu.storage.hydration.size_bytes", metric.WithDescription("Size of hydrated data in bytes"), metric.WithUnit("By"), ) - if err != nil { - errs = append(errs, fmt.Errorf("create bubu.storage.hydration.size_bytes: %w", err)) - } + _ = err + // If instrument creation fails, continue without panicking dehydrationSizeBytes, err = meter.Int64Histogram( "bubu.storage.dehydration.size_bytes", metric.WithDescription("Size of dehydrated data in bytes"), metric.WithUnit("By"), ) - if err != nil { - errs = append(errs, fmt.Errorf("create bubu.storage.dehydration.size_bytes: %w", err)) - } + _ = err streamThroughput, err = meter.Int64Counter( "bubu.stream.messages_total", metric.WithDescription("Total messages processed in stream"), ) - if err != nil { - errs = append(errs, fmt.Errorf("create bubu.stream.messages_total: %w", err)) - } + _ = err k8sOperationDuration, err = meter.Float64Histogram( "bubu.k8s.operation.duration_seconds", metric.WithDescription("Duration of Kubernetes API operations"), metric.WithUnit("s"), ) - if err != nil { - errs = append(errs, fmt.Errorf("create bubu.k8s.operation.duration_seconds: %w", err)) - } + _ = err // Reconnect attempt/failure counters for streaming client streamReconnectAttempts, err = meter.Int64Counter( "bubu.stream.reconnect.attempts_total", metric.WithDescription("Total reconnect attempts by SDK client"), ) - if err != nil { - errs = append(errs, fmt.Errorf("create bubu.stream.reconnect.attempts_total: %w", err)) - } + _ = err streamReconnectFailures, err = meter.Int64Counter( "bubu.stream.reconnect.failures_total", metric.WithDescription("Total reconnect failures (terminal) by SDK client"), ) - if err != nil { - errs = append(errs, fmt.Errorf("create bubu.stream.reconnect.failures_total: %w", err)) - } + _ = err // Client buffer: size/bytes gauges are registered by the buffer with callbacks clientBufferDrops, err = meter.Int64Counter( "bubu.stream.client_buffer.dropped_total", metric.WithDescription("Total messages dropped by SDK client buffer (oversize/overflow)"), ) - if err != nil { - errs = append(errs, fmt.Errorf("create bubu.stream.client_buffer.dropped_total: %w", err)) - } + _ = err clientBufferFlushes, err = meter.Int64Counter( "bubu.stream.client_buffer.flushed_total", metric.WithDescription("Total messages flushed from SDK client buffer after reconnect"), ) - if err != nil { - errs = append(errs, fmt.Errorf("create bubu.stream.client_buffer.flushed_total: %w", err)) - } + _ = err clientBufferAdds, err = meter.Int64Counter( "bubu.stream.client_buffer.added_total", metric.WithDescription("Total messages added to SDK client buffer due to transient errors"), ) - if err != nil { - errs = append(errs, fmt.Errorf("create bubu.stream.client_buffer.added_total: %w", err)) - } - - streamBackpressureTimeouts, err = meter.Int64Counter( - "bubu.stream.backpressure.timeouts_total", - metric.WithDescription("Total timeouts encountered while delivering stream messages due to backpressure"), - ) - if err != nil { - errs = append(errs, fmt.Errorf("create bubu.stream.backpressure.timeouts_total: %w", err)) - } - - if len(errs) > 0 { - disableMetrics() - return errors.Join(errs...) - } - return nil -} - -func disableMetrics() { - resetMetricInstruments() - meter = metricnoop.NewMeterProvider().Meter("bubu-sdk") - metricsDisabled = true -} - -func resetMetricInstruments() { - var ( - int64Histogram metric.Int64Histogram - int64Counter metric.Int64Counter - float64Histogram metric.Float64Histogram - float64Observable metric.Float64ObservableGauge - ) - hydrationSizeBytes = int64Histogram - dehydrationSizeBytes = int64Histogram - streamThroughput = int64Counter - k8sOperationDuration = float64Histogram - clientBufferAdds = int64Counter - clientBufferSizeGauge = float64Observable - clientBufferBytesGauge = float64Observable - clientBufferDrops = int64Counter - clientBufferFlushes = int64Counter - streamReconnectAttempts = int64Counter - streamReconnectFailures = int64Counter - streamBackpressureTimeouts = int64Counter + _ = err } // RecordHydrationSize records the size of hydrated data for observability. -func RecordHydrationSize(ctx context.Context, sizeBytes int64) { - if metricsDisabled { - return - } - hydrationSizeBytes.Record(ctx, sizeBytes) +func RecordHydrationSize(ctx context.Context, sizeBytes int64, stepRunID string) { + hydrationSizeBytes.Record(ctx, sizeBytes, + metric.WithAttributes(attribute.String("steprun_id", stepRunID))) } // RecordDehydrationSize records the size of dehydrated data for observability. -func RecordDehydrationSize(ctx context.Context, sizeBytes int64) { - if metricsDisabled { - return - } - dehydrationSizeBytes.Record(ctx, sizeBytes) +func RecordDehydrationSize(ctx context.Context, sizeBytes int64, stepRunID string) { + dehydrationSizeBytes.Record(ctx, sizeBytes, + metric.WithAttributes(attribute.String("steprun_id", stepRunID))) } // RecordStreamMessage increments the stream message counter. // Direction should be "received" or "sent". func RecordStreamMessage(ctx context.Context, direction string) { - if metricsDisabled { - return - } streamThroughput.Add(ctx, 1, metric.WithAttributes(attribute.String("direction", direction))) } // RecordK8sOperation records the duration of a Kubernetes API operation. func RecordK8sOperation(ctx context.Context, operation string, durationSec float64, success bool) { - if metricsDisabled { - return - } k8sOperationDuration.Record(ctx, durationSec, metric.WithAttributes( attribute.String("operation", operation), @@ -241,130 +126,57 @@ func RecordK8sOperation(ctx context.Context, operation string, durationSec float // RegisterClientBufferGauges registers async gauges for buffer size and bytes. // The callbacks will be polled by the OTel SDK. -func RegisterClientBufferGauges(sizeFn func() float64, bytesFn func() float64) func() { - if metricsDisabled { - return func() {} - } - clientBufferObserversMu.Lock() - defer clientBufferObserversMu.Unlock() - if clientBufferSizeGauge == nil { - clientBufferSizeGauge, _ = meter.Float64ObservableGauge( - "bubu.stream.client_buffer.current_size", - metric.WithDescription("Current number of messages buffered in SDK client"), - metric.WithUnit("{messages}"), - metric.WithFloat64Callback(observeClientBufferSizes), - ) - } - if clientBufferBytesGauge == nil { - clientBufferBytesGauge, _ = meter.Float64ObservableGauge( - "bubu.stream.client_buffer.current_bytes", - metric.WithDescription("Current total bytes buffered in SDK client"), - metric.WithUnit("By"), - metric.WithFloat64Callback(observeClientBufferBytes), - ) - } - id := clientBufferObserverSeq - clientBufferObserverSeq++ - clientBufferSizeObservers[id] = sizeFn - clientBufferBytesObservers[id] = bytesFn - return func() { - clientBufferObserversMu.Lock() - delete(clientBufferSizeObservers, id) - delete(clientBufferBytesObservers, id) - clientBufferObserversMu.Unlock() - } -} - -// observeClientBufferSizes iterates all registered client buffer size -// callbacks under an RLock and records their values via the provided OTel -// observer (`pkg/metrics/metrics.go:179-223`). -func observeClientBufferSizes(_ context.Context, obs metric.Float64Observer) error { - clientBufferObserversMu.RLock() - defer clientBufferObserversMu.RUnlock() - for id, fn := range clientBufferSizeObservers { - if fn == nil { - continue - } - func(observerID int64, sizeFn func() float64) { - defer func() { - if r := recover(); r != nil { - log.Printf("bubu sdk: client buffer size callback %d panicked: %v", observerID, r) - } - }() +func RegisterClientBufferGauges(sizeFn func() float64, bytesFn func() float64) { + // Best-effort registration; ignore errors to avoid impacting runtime + clientBufferSizeGauge, _ = meter.Float64ObservableGauge( + "bubu.stream.client_buffer.current_size", + metric.WithDescription("Current number of messages buffered in SDK client"), + metric.WithUnit("{messages}"), + metric.WithFloat64Callback(func(_ context.Context, obs metric.Float64Observer) error { obs.Observe(sizeFn()) - }(id, fn) - } - return nil -} - -func observeClientBufferBytes(_ context.Context, obs metric.Float64Observer) error { - clientBufferObserversMu.RLock() - defer clientBufferObserversMu.RUnlock() - for id, fn := range clientBufferBytesObservers { - if fn == nil { - continue - } - func(observerID int64, bytesFn func() float64) { - defer func() { - if r := recover(); r != nil { - log.Printf("bubu sdk: client buffer bytes callback %d panicked: %v", observerID, r) - } - }() + return nil + }), + ) + clientBufferBytesGauge, _ = meter.Float64ObservableGauge( + "bubu.stream.client_buffer.current_bytes", + metric.WithDescription("Current total bytes buffered in SDK client"), + metric.WithUnit("By"), + metric.WithFloat64Callback(func(_ context.Context, obs metric.Float64Observer) error { obs.Observe(bytesFn()) - }(id, fn) - } - return nil + return nil + }), + ) + _ = clientBufferSizeGauge + _ = clientBufferBytesGauge } // RecordClientBufferDrop increments drop counter with a reason label. func RecordClientBufferDrop(ctx context.Context, reason string) { - if metricsDisabled { - return - } - clientBufferDrops.Add(ctx, 1, metric.WithAttributes(attribute.String("reason", reason))) + clientBufferDrops.Add(ctx, 1) } // RecordClientBufferFlush increments flush counter by count. func RecordClientBufferFlush(ctx context.Context, count int) { - if metricsDisabled || count <= 0 { - return + if count > 0 { + clientBufferFlushes.Add(ctx, int64(count)) } - clientBufferFlushes.Add(ctx, int64(count)) } // RecordClientBufferAdded increments add counter with a reason label. func RecordClientBufferAdded(ctx context.Context, reason string) { - if metricsDisabled { - return - } clientBufferAdds.Add(ctx, 1, metric.WithAttributes(attribute.String("reason", reason))) } // Stream reconnect metrics helpers -// RecordStreamReconnectAttempt increments the reconnect attempts counter with reason labeling. -func RecordStreamReconnectAttempt(ctx context.Context, reason string) { - if metricsDisabled { - return - } - streamReconnectAttempts.Add(ctx, 1, metric.WithAttributes(attribute.String("reason", reason))) +// RecordStreamReconnectAttempt increments the reconnect attempts counter. +func RecordStreamReconnectAttempt(ctx context.Context) { + streamReconnectAttempts.Add(ctx, 1) } -// RecordStreamReconnectFailure increments the reconnect failures counter (terminal) with reason labeling. -func RecordStreamReconnectFailure(ctx context.Context, reason string) { - if metricsDisabled { - return - } - streamReconnectFailures.Add(ctx, 1, metric.WithAttributes(attribute.String("reason", reason))) -} - -// RecordStreamBackpressureTimeout records occurrences of backpressure timeouts. -// Stage should denote where the timeout happened (e.g., "client_receiver", "server_reader"). -func RecordStreamBackpressureTimeout(ctx context.Context, stage string) { - if metricsDisabled { - return - } - streamBackpressureTimeouts.Add(ctx, 1, metric.WithAttributes(attribute.String("stage", stage))) +// RecordStreamReconnectFailure increments the reconnect failures counter (terminal). +func RecordStreamReconnectFailure(ctx context.Context) { + streamReconnectFailures.Add(ctx, 1) } // Custom metrics API for developers @@ -399,14 +211,7 @@ func Gauge(name, description, unit string, callback func() float64) error { metric.WithDescription(description), metric.WithUnit(unit), metric.WithFloat64Callback(func(_ context.Context, obs metric.Float64Observer) error { - func() { - defer func() { - if r := recover(); r != nil { - log.Printf("bubu sdk: gauge %q callback panicked: %v", name, r) - } - }() - obs.Observe(callback()) - }() + obs.Observe(callback()) return nil })) return err diff --git a/pkg/metrics/metrics_test.go b/pkg/metrics/metrics_test.go deleted file mode 100644 index 76b0c19..0000000 --- a/pkg/metrics/metrics_test.go +++ /dev/null @@ -1,85 +0,0 @@ -package metrics - -import ( - "context" - "errors" - "testing" - - "go.opentelemetry.io/otel/metric" - metricnoop "go.opentelemetry.io/otel/metric/noop" -) - -type failingMeter struct { - metricnoop.Meter -} - -func (failingMeter) Int64Histogram(name string, options ...metric.Int64HistogramOption) (metric.Int64Histogram, error) { - return nil, errors.New("boom") -} - -func TestInitializeMetricsWithMeter_DisablesMetricsOnInstrumentError(t *testing.T) { - prevMeter := meter - prevDisabled := metricsDisabled - prevHydration := hydrationSizeBytes - prevDehydration := dehydrationSizeBytes - prevStream := streamThroughput - prevK8s := k8sOperationDuration - prevAdds := clientBufferAdds - prevSizeGauge := clientBufferSizeGauge - prevBytesGauge := clientBufferBytesGauge - prevDrops := clientBufferDrops - prevFlushes := clientBufferFlushes - prevReconnectAttempts := streamReconnectAttempts - prevReconnectFailures := streamReconnectFailures - prevBackpressure := streamBackpressureTimeouts - t.Cleanup(func() { - meter = prevMeter - metricsDisabled = prevDisabled - hydrationSizeBytes = prevHydration - dehydrationSizeBytes = prevDehydration - streamThroughput = prevStream - k8sOperationDuration = prevK8s - clientBufferAdds = prevAdds - clientBufferSizeGauge = prevSizeGauge - clientBufferBytesGauge = prevBytesGauge - clientBufferDrops = prevDrops - clientBufferFlushes = prevFlushes - streamReconnectAttempts = prevReconnectAttempts - streamReconnectFailures = prevReconnectFailures - streamBackpressureTimeouts = prevBackpressure - }) - - err := initializeMetricsWithMeter(failingMeter{}) - if err == nil { - t.Fatal("expected metrics initialization to fail") - } - if !metricsDisabled { - t.Fatal("expected metrics to be disabled after initialization failure") - } - - RecordHydrationSize(context.Background(), 1) - RecordStreamMessage(context.Background(), "sent") -} - -func TestInitializeMetricsWithMeter_SucceedsWithNoopMeter(t *testing.T) { - prevMeter := meter - prevDisabled := metricsDisabled - t.Cleanup(func() { - meter = prevMeter - metricsDisabled = prevDisabled - }) - - err := initializeMetricsWithMeter(metricnoop.NewMeterProvider().Meter("test")) - if err != nil { - t.Fatalf("expected metrics initialization to succeed: %v", err) - } - if metricsDisabled { - t.Fatal("expected metrics to remain enabled") - } - - unregister := RegisterClientBufferGauges(func() float64 { return 1 }, func() float64 { return 2 }) - if unregister == nil { - t.Fatal("expected unregister callback") - } - unregister() -} diff --git a/pkg/observability/observability.go b/pkg/observability/observability.go deleted file mode 100644 index 5e3ad6d..0000000 --- a/pkg/observability/observability.go +++ /dev/null @@ -1,127 +0,0 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package observability - -import ( - "log/slog" - "os" - "strings" - "sync" - - "github.com/bubustack/core/contracts" - "github.com/bubustack/core/runtime/featuretoggles" - "go.opentelemetry.io/otel" - "go.opentelemetry.io/otel/propagation" - "go.opentelemetry.io/otel/trace" - "go.opentelemetry.io/otel/trace/noop" -) - -var ( - initOnce sync.Once - metricsEnabled bool - tracingEnabled bool - tracePropagationEnabled bool - prop propagation.TextMapPropagator - noopTracerProvider = noop.NewTracerProvider() -) - -func resetConfigForTesting() { - initOnce = sync.Once{} - metricsEnabled = false - tracingEnabled = false - tracePropagationEnabled = false - prop = nil -} - -func initConfig() { - toggles := featuretoggles.Features{ - MetricsEnabled: parseBoolEnv(contracts.SDKMetricsEnabledEnv, true), - TelemetryEnabled: parseBoolEnv(contracts.SDKTracingEnabledEnv, true), - TracePropagationEnabled: parseBoolEnv(contracts.TracePropagationEnv, true), - } - - featuretoggles.Apply(toggles, featuretoggles.Sink{ - EnableTelemetry: func(enabled bool) { - tracingEnabled = enabled - }, - EnableTracePropagation: func(enabled bool) { - tracePropagationEnabled = enabled - if enabled { - prop = propagation.NewCompositeTextMapPropagator( - propagation.TraceContext{}, - propagation.Baggage{}, - ) - } else { - prop = propagation.NewCompositeTextMapPropagator() - } - }, - EnableMetrics: func(enabled bool) { - metricsEnabled = enabled - }, - }) -} - -// MetricsEnabled reports whether SDK metrics emission is enabled. Defaults to true. -func MetricsEnabled() bool { - initOnce.Do(initConfig) - return metricsEnabled -} - -// TracingEnabled reports whether SDK tracing is enabled. Defaults to true. -func TracingEnabled() bool { - initOnce.Do(initConfig) - return tracingEnabled -} - -// TracePropagationEnabled reports whether OTEL propagators are active. -func TracePropagationEnabled() bool { - initOnce.Do(initConfig) - return tracePropagationEnabled -} - -// Propagator returns the configured propagator without mutating the process-global OTEL state. -func Propagator() propagation.TextMapPropagator { - initOnce.Do(initConfig) - if prop != nil { - return prop - } - return otel.GetTextMapPropagator() -} - -// Tracer returns a trace.Tracer that honors the tracing toggle. -func Tracer(name string) trace.Tracer { - if !TracingEnabled() { - return noopTracerProvider.Tracer(name) - } - return otel.Tracer(name) -} - -func parseBoolEnv(key string, def bool) bool { - val, ok := os.LookupEnv(key) - if !ok || val == "" { - return def - } - switch strings.ToLower(strings.TrimSpace(val)) { - case "1", "true", "t", "yes", "y", "on": - return true - case "0", "false", "f", "no", "n", "off": - return false - default: - slog.Default().Warn("ignoring invalid observability env override", "env", key, "value", val, "default", def) - return def - } -} diff --git a/pkg/observability/observability_test.go b/pkg/observability/observability_test.go deleted file mode 100644 index aaf4bb8..0000000 --- a/pkg/observability/observability_test.go +++ /dev/null @@ -1,221 +0,0 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package observability - -import ( - "bytes" - "context" - "log/slog" - "os" - "strings" - "testing" - - "github.com/bubustack/core/contracts" - "go.opentelemetry.io/otel/baggage" - "go.opentelemetry.io/otel/propagation" - "go.opentelemetry.io/otel/trace" -) - -func resetForTest(t *testing.T) { - t.Helper() - resetConfigForTesting() -} - -func TestMetricsEnabledFromEnv(t *testing.T) { - t.Setenv(contracts.SDKMetricsEnabledEnv, "false") - resetForTest(t) - if MetricsEnabled() { - t.Fatalf("expected metrics disabled via env") - } -} - -func TestTracingEnabledDefault(t *testing.T) { - _ = os.Unsetenv(contracts.SDKTracingEnabledEnv) - resetForTest(t) - if !TracingEnabled() { - t.Fatalf("expected tracing enabled by default") - } -} - -func TestTracePropagationEnabledInjectsHeadersWhenEnabled(t *testing.T) { - t.Setenv(contracts.TracePropagationEnv, "true") - resetForTest(t) - - if !TracePropagationEnabled() { - t.Fatalf("expected trace propagation enabled") - } - - traceID, err := trace.TraceIDFromHex("00112233445566778899aabbccddeeff") - if err != nil { - t.Fatalf("failed to create trace ID: %v", err) - } - spanID, err := trace.SpanIDFromHex("0011223344556677") - if err != nil { - t.Fatalf("failed to create span ID: %v", err) - } - - ctx := trace.ContextWithSpanContext(context.Background(), trace.NewSpanContext(trace.SpanContextConfig{ - TraceID: traceID, - SpanID: spanID, - TraceFlags: trace.FlagsSampled, - })) - member, err := baggage.NewMember("tenant", "engram") - if err != nil { - t.Fatalf("failed to create baggage member: %v", err) - } - bg, err := baggage.New(member) - if err != nil { - t.Fatalf("failed to create baggage: %v", err) - } - ctx = baggage.ContextWithBaggage(ctx, bg) - - carrier := propagation.MapCarrier{} - Propagator().Inject(ctx, carrier) - - if carrier.Get("traceparent") == "" { - t.Fatalf("expected traceparent header to be injected when propagation is enabled") - } - if carrier.Get("baggage") == "" { - t.Fatalf("expected baggage header to be injected when propagation is enabled") - } -} - -func TestTracePropagationDisabledSkipsHeaderInjection(t *testing.T) { - t.Setenv(contracts.TracePropagationEnv, "false") - resetForTest(t) - - if TracePropagationEnabled() { - t.Fatalf("expected trace propagation disabled") - } - - traceID, err := trace.TraceIDFromHex("00112233445566778899aabbccddeeff") - if err != nil { - t.Fatalf("failed to create trace ID: %v", err) - } - spanID, err := trace.SpanIDFromHex("0011223344556677") - if err != nil { - t.Fatalf("failed to create span ID: %v", err) - } - ctx := trace.ContextWithSpanContext(context.Background(), trace.NewSpanContext(trace.SpanContextConfig{ - TraceID: traceID, - SpanID: spanID, - TraceFlags: trace.FlagsSampled, - })) - - carrier := propagation.MapCarrier{} - Propagator().Inject(ctx, carrier) - - if carrier.Get("traceparent") != "" { - t.Fatalf("did not expect traceparent header when propagation is disabled") - } - if carrier.Get("baggage") != "" { - t.Fatalf("did not expect baggage header when propagation is disabled") - } -} - -func TestTracePropagationEnabledRoundTripExtractsTraceAndBaggage(t *testing.T) { - t.Setenv(contracts.TracePropagationEnv, "true") - resetForTest(t) - - if !TracePropagationEnabled() { - t.Fatalf("expected trace propagation enabled") - } - - traceID, err := trace.TraceIDFromHex("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") - if err != nil { - t.Fatalf("failed to create trace ID: %v", err) - } - spanID, err := trace.SpanIDFromHex("bbbbbbbbbbbbbbbb") - if err != nil { - t.Fatalf("failed to create span ID: %v", err) - } - - ctx := trace.ContextWithSpanContext(context.Background(), trace.NewSpanContext(trace.SpanContextConfig{ - TraceID: traceID, - SpanID: spanID, - TraceFlags: trace.FlagsSampled, - })) - member, err := baggage.NewMember("tenant", "engram") - if err != nil { - t.Fatalf("failed to create baggage member: %v", err) - } - bg, err := baggage.New(member) - if err != nil { - t.Fatalf("failed to create baggage: %v", err) - } - ctx = baggage.ContextWithBaggage(ctx, bg) - - carrier := propagation.MapCarrier{} - Propagator().Inject(ctx, carrier) - extracted := Propagator().Extract(context.Background(), carrier) - - extractedSpan := trace.SpanContextFromContext(extracted) - if !extractedSpan.IsValid() { - t.Fatalf("expected extracted span context to be valid") - } - if extractedSpan.TraceID() != traceID { - t.Fatalf("trace ID mismatch: got %s want %s", extractedSpan.TraceID(), traceID) - } - if extractedSpan.SpanID() != spanID { - t.Fatalf("span ID mismatch: got %s want %s", extractedSpan.SpanID(), spanID) - } - if got := baggage.FromContext(extracted).Member("tenant").Value(); got != "engram" { - t.Fatalf("baggage mismatch: got %q want %q", got, "engram") - } -} - -func TestTracePropagationDisabledIgnoresInboundHeadersOnExtract(t *testing.T) { - t.Setenv(contracts.TracePropagationEnv, "false") - resetForTest(t) - - if TracePropagationEnabled() { - t.Fatalf("expected trace propagation disabled") - } - - carrier := propagation.MapCarrier{ - "traceparent": "00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01", - "baggage": "tenant=engram", - } - - extracted := Propagator().Extract(context.Background(), carrier) - if trace.SpanContextFromContext(extracted).IsValid() { - t.Fatalf("did not expect valid extracted span context when propagation is disabled") - } - if members := baggage.FromContext(extracted).Members(); len(members) != 0 { - t.Fatalf("did not expect extracted baggage when propagation is disabled") - } -} - -func TestParseBoolEnvWarnsOnInvalidValue(t *testing.T) { - t.Setenv(contracts.TracePropagationEnv, "definitely-not-bool") - resetForTest(t) - - var buf bytes.Buffer - prevLogger := slog.Default() - logger := slog.New(slog.NewJSONHandler(&buf, &slog.HandlerOptions{Level: slog.LevelDebug})) - slog.SetDefault(logger) - t.Cleanup(func() { - slog.SetDefault(prevLogger) - }) - - if !TracePropagationEnabled() { - t.Fatalf("expected invalid value to fall back to default true") - } - if got := buf.String(); got == "" || !strings.Contains(got, contracts.TracePropagationEnv) { - t.Fatalf("expected warning log for invalid env, got %q", got) - } -} diff --git a/runtime/context.go b/runtime/context.go index b9c4f0f..b9a08a1 100644 --- a/runtime/context.go +++ b/runtime/context.go @@ -1,25 +1,8 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - package runtime import ( "encoding/json" "fmt" - "log/slog" "os" "reflect" "strconv" @@ -28,310 +11,160 @@ import ( "github.com/bubustack/bobrapet/pkg/refs" "github.com/bubustack/bubu-sdk-go/engram" - "github.com/bubustack/core/contracts" "github.com/mitchellh/mapstructure" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -const runtimeStrictUnmarshalEnv = "BUBU_RUNTIME_STRICT_UNMARSHAL" - // ExecutionContextData is the structure of the data provided by the // bobrapet controller to the SDK. type ExecutionContextData struct { - // Inputs carries the hydrated step inputs visible to the current engram. - Inputs map[string]any `json:"inputs"` - // Config carries the hydrated static configuration for the current engram. - Config map[string]any `json:"config"` - // Secrets contains the secret descriptors or literal values injected for the step. - Secrets map[string]string `json:"secrets"` - // StoryInfo identifies the current Story, StoryRun, Step, and StepRun. - StoryInfo engram.StoryInfo `json:"storyInfo"` - // CELContext exposes controller-provided CEL inputs and prior step outputs. - CELContext map[string]any `json:"celContext,omitempty"` - // Transports lists the declared story transports available to the runtime. - Transports []engram.TransportDescriptor `json:"transports,omitempty"` - // StoryRef identifies the Story resource when the controller provides it explicitly. - StoryRef *refs.StoryReference `json:"storyRef,omitempty"` - // StartedAt records when the controller says the current execution began. - StartedAt metav1.Time `json:"startedAt"` - // Storage configures the shared object storage backend, when enabled. - Storage *StorageConfig `json:"storage,omitempty"` - // Execution carries runtime tuning such as timeouts and inline-size limits. - Execution ExecutionInfo `json:"execution"` + Inputs map[string]any `json:"inputs"` + Config map[string]any `json:"config"` + Secrets map[string]string `json:"secrets"` + StoryInfo engram.StoryInfo `json:"storyInfo"` + StoryRef *refs.StoryReference `json:"storyRef,omitempty"` + StartedAt metav1.Time `json:"startedAt"` + Storage *StorageConfig `json:"storage,omitempty"` + Execution ExecutionInfo `json:"execution"` } // ExecutionInfo holds runtime parameters for the current step execution. type ExecutionInfo struct { - // Mode is the controller-selected execution mode for the step. - Mode string - // StepTimeout is the maximum runtime allowed for the step. - StepTimeout time.Duration - // MaxInlineSize is the maximum payload size kept inline before offload. + Mode string + StepTimeout time.Duration MaxInlineSize int - // GRPCPort is the listening port used by gRPC-based runtimes. - GRPCPort int + GRPCPort int } // StorageConfig holds the configuration for object storage. type StorageConfig struct { - // Provider identifies the backing storage implementation (for example, `s3`). Provider string - // S3 carries provider-specific settings when Provider is `s3`. - S3 *S3StorageConfig - // Timeout bounds storage read/write operations when set. - Timeout time.Duration + S3 *S3StorageConfig + Timeout time.Duration } // S3StorageConfig holds S3-specific storage configuration. type S3StorageConfig struct { - // Bucket is the target S3 bucket name. - Bucket string - // Region is the AWS region for the bucket. - Region string - // Endpoint overrides the default S3 endpoint when set. + Bucket string + Region string Endpoint string } // LoadExecutionContextData loads the execution context from environment variables. func LoadExecutionContextData() (*ExecutionContextData, error) { - data := &ExecutionContextData{ - Inputs: make(map[string]any), - Config: make(map[string]any), - Secrets: make(map[string]string), - } - - data.StoryInfo = loadStoryInfoFromEnv() - data.Execution.Mode = os.Getenv(contracts.ExecutionModeEnv) - data.StartedAt = parseStartedAtFromEnv() + startedAt := parseStartedAtFromEnv() + execCtxData := buildBaseExecutionContextData(startedAt) - if inputs, err := loadJSONMapEnv(contracts.TriggerDataEnv); err != nil { + if err := fillInputsFromEnv(execCtxData); err != nil { return nil, err - } else if len(inputs) > 0 { - data.Inputs = inputs } - - if config, err := loadJSONMapEnv(contracts.StepConfigEnv); err != nil { - return nil, err - } else if len(config) > 0 { - data.Config = config - } - if celCtx, err := loadJSONMapEnv(contracts.TemplateContextEnv); err != nil { + if err := fillConfigFromEnvJSON(execCtxData); err != nil { return nil, err - } else if len(celCtx) > 0 { - data.CELContext = celCtx - } - - if transports, err := loadTransportsFromEnv(); err != nil { - return nil, err - } else if len(transports) > 0 { - data.Transports = transports - } - - applyConfigAndSecretOverrides(data) - applyExecutionSettingsFromEnv(&data.Execution) - data.Storage = loadStorageConfigFromEnv() - - return data, nil -} - -func loadStoryInfoFromEnv() engram.StoryInfo { - return engram.StoryInfo{ - StoryName: os.Getenv(contracts.StoryNameEnv), - StoryRunID: os.Getenv(contracts.StoryRunIDEnv), - StepName: os.Getenv(contracts.StepNameEnv), - StepRunID: os.Getenv(contracts.StepRunNameEnv), - StepRunNamespace: os.Getenv(contracts.StepRunNamespaceEnv), } + fillExecutionInfoFromEnv(execCtxData) + fillStorageConfigFromEnv(execCtxData) + applyOverridesFromEnv(execCtxData) + return execCtxData, nil } func parseStartedAtFromEnv() metav1.Time { - if startedAtStr := os.Getenv(contracts.StartedAtEnv); startedAtStr != "" { + if startedAtStr := os.Getenv("BUBU_STARTED_AT"); startedAtStr != "" { + if t, err := time.Parse(time.RFC3339Nano, startedAtStr); err == nil { + return metav1.NewTime(t.UTC()) + } if t, err := time.Parse(time.RFC3339, startedAtStr); err == nil { - return metav1.Time{Time: t} + return metav1.NewTime(t.UTC()) } } - return metav1.NewTime(time.Now()) + return metav1.Now() } -func loadJSONMapEnv(key string) (map[string]any, error) { - raw := os.Getenv(key) - if strings.TrimSpace(raw) == "" { - return nil, nil - } - var payload map[string]any - if err := json.Unmarshal([]byte(raw), &payload); err != nil { - return nil, fmt.Errorf("failed to unmarshal %s: %w", key, err) +func buildBaseExecutionContextData(startedAt metav1.Time) *ExecutionContextData { + return &ExecutionContextData{ + Inputs: make(map[string]any), + Config: make(map[string]any), + Secrets: make(map[string]string), + StoryInfo: engram.StoryInfo{ + StoryName: os.Getenv("BUBU_STORY_NAME"), + StoryRunID: os.Getenv("BUBU_STORYRUN_ID"), + StepName: os.Getenv("BUBU_STEP_NAME"), + StepRunID: os.Getenv("BUBU_STEPRUN_NAME"), + StepRunNamespace: os.Getenv("BUBU_STEPRUN_NAMESPACE"), + }, + StartedAt: startedAt, } - return payload, nil } -func loadTransportsFromEnv() ([]engram.TransportDescriptor, error) { - raw := strings.TrimSpace(os.Getenv(contracts.TransportsEnv)) - if raw == "" { - return nil, nil - } - var entries []map[string]any - if err := json.Unmarshal([]byte(raw), &entries); err != nil { - return nil, fmt.Errorf("failed to unmarshal %s: %w", contracts.TransportsEnv, err) - } - transports := make([]engram.TransportDescriptor, 0, len(entries)) - seenNames := make(map[string]struct{}, len(entries)) - for i, entry := range entries { - td := engram.TransportDescriptor{} - if name, ok := entry["name"].(string); ok { - td.Name = strings.TrimSpace(name) - } - if kind, ok := entry["kind"].(string); ok { - td.Kind = strings.TrimSpace(kind) - } - if mode, ok := entry["mode"].(string); ok { - td.Mode = strings.TrimSpace(mode) - } - if td.Name == "" { - return nil, fmt.Errorf("invalid %s[%d]: name is required", contracts.TransportsEnv, i) - } - if td.Kind == "" { - return nil, fmt.Errorf("invalid %s[%d]: kind is required", contracts.TransportsEnv, i) - } - if _, exists := seenNames[td.Name]; exists { - return nil, fmt.Errorf("invalid %s[%d]: duplicate transport name %q", contracts.TransportsEnv, i, td.Name) - } - seenNames[td.Name] = struct{}{} - if td.Mode == "" { - td.Mode = "hot" - } else if !isValidTransportMode(td.Mode) { - return nil, fmt.Errorf( - "invalid %s[%d]: mode %q must be hot or fallback", - contracts.TransportsEnv, - i, - td.Mode, - ) - } - config := make(map[string]any) - for k, v := range entry { - switch k { - case "name", "kind", "mode": - continue - default: - config[k] = v - } - } - if len(config) > 0 { - td.Config = config +func fillInputsFromEnv(execCtxData *ExecutionContextData) error { + if inputsStr := os.Getenv("BUBU_INPUTS"); inputsStr != "" { + if err := json.Unmarshal([]byte(inputsStr), &execCtxData.Inputs); err != nil { + return fmt.Errorf("failed to unmarshal BUBU_INPUTS: %w", err) } - transports = append(transports, td) } - return transports, nil + return nil } -func isValidTransportMode(mode string) bool { - switch mode { - case "hot", "fallback": - return true - default: - return false - } -} - -func applyConfigAndSecretOverrides(data *ExecutionContextData) { - for _, envPair := range os.Environ() { - key, value, found := strings.Cut(envPair, "=") - if !found { - continue - } - switch { - case strings.HasPrefix(key, contracts.ConfigPrefixEnv): - configKey := strings.TrimPrefix(key, contracts.ConfigPrefixEnv) - if configKey != "" { - data.Config[configKey] = value - } - case strings.HasPrefix(key, contracts.SecretPrefixEnv): - secretKey := strings.TrimPrefix(key, contracts.SecretPrefixEnv) - if secretKey == "" { - continue - } - if strings.HasSuffix(secretKey, "_NAME") { - continue - } - if strings.Contains(secretKey, "__") { - // Bucket-scoped export used for descriptor expansion; skip SDK exposure. - continue - } - data.Secrets[secretKey] = value +func fillConfigFromEnvJSON(execCtxData *ExecutionContextData) error { + if configStr := os.Getenv("BUBU_CONFIG"); configStr != "" { + if err := json.Unmarshal([]byte(configStr), &execCtxData.Config); err != nil { + return fmt.Errorf("failed to unmarshal BUBU_CONFIG: %w", err) } } + return nil } -func applyExecutionSettingsFromEnv(exec *ExecutionInfo) { - if timeout := parseDurationEnv(contracts.StepTimeoutEnv); timeout > 0 { - exec.StepTimeout = timeout +func fillExecutionInfoFromEnv(execCtxData *ExecutionContextData) { + execCtxData.Execution.Mode = os.Getenv("BUBU_EXECUTION_MODE") + if timeoutStr := os.Getenv("BUBU_STEP_TIMEOUT"); timeoutStr != "" { + if d, err := time.ParseDuration(timeoutStr); err == nil { + execCtxData.Execution.StepTimeout = d + } } - if maxInline, ok := parsePositiveIntEnv(contracts.MaxInlineSizeEnv); ok { - exec.MaxInlineSize = maxInline + if maxSizeStr := os.Getenv("BUBU_MAX_INLINE_SIZE"); maxSizeStr != "" { + if size, err := strconv.Atoi(maxSizeStr); err == nil { + execCtxData.Execution.MaxInlineSize = size + } } - if grpcPort, ok := parsePositiveIntEnv(contracts.GRPCPortEnv); ok { - exec.GRPCPort = grpcPort + if grpcPortStr := os.Getenv("BUBU_GRPC_PORT"); grpcPortStr != "" { + if port, err := strconv.Atoi(grpcPortStr); err == nil { + execCtxData.Execution.GRPCPort = port + } } } -func parseDurationEnv(key string) time.Duration { - if raw := strings.TrimSpace(os.Getenv(key)); raw != "" { - d, err := time.ParseDuration(raw) - if err != nil { - warnInvalidRuntimeEnv(key, raw, "a positive duration string such as 5s") - return 0 +func fillStorageConfigFromEnv(execCtxData *ExecutionContextData) { + if provider := os.Getenv("BUBU_STORAGE_PROVIDER"); provider != "" { + execCtxData.Storage = &StorageConfig{Provider: provider} + if timeoutStr := os.Getenv("BUBU_STORAGE_TIMEOUT"); timeoutStr != "" { + if d, err := time.ParseDuration(timeoutStr); err == nil { + execCtxData.Storage.Timeout = d + } } - if d <= 0 { - warnInvalidRuntimeEnv(key, raw, "a positive duration string such as 5s") - return 0 + if provider == "s3" { + execCtxData.Storage.S3 = &S3StorageConfig{ + Bucket: os.Getenv("BUBU_STORAGE_S3_BUCKET"), + Region: os.Getenv("BUBU_STORAGE_S3_REGION"), + Endpoint: os.Getenv("BUBU_STORAGE_S3_ENDPOINT"), + } } - return d } - return 0 } -func parsePositiveIntEnv(key string) (int, bool) { - if raw := strings.TrimSpace(os.Getenv(key)); raw != "" { - value, err := strconv.Atoi(raw) - if err != nil { - warnInvalidRuntimeEnv(key, raw, "a positive integer") - return 0, false +func applyOverridesFromEnv(execCtxData *ExecutionContextData) { + for _, env := range os.Environ() { + parts := strings.SplitN(env, "=", 2) + if len(parts) != 2 { + continue } - if value <= 0 { - warnInvalidRuntimeEnv(key, raw, "a positive integer") - return 0, false + key, value := parts[0], parts[1] + if strings.HasPrefix(key, "BUBU_CONFIG_") { + configKey := strings.TrimPrefix(key, "BUBU_CONFIG_") + execCtxData.Config[configKey] = value + } else if strings.HasPrefix(key, "BUBU_SECRET_") { + secretKey := strings.TrimPrefix(key, "BUBU_SECRET_") + execCtxData.Secrets[secretKey] = value } - return value, true - } - return 0, false -} - -func warnInvalidRuntimeEnv(key string, raw string, expected string) { - slog.Default().Warn("Ignoring invalid runtime env override", "env", key, "value", raw, "expected", expected) -} - -func loadStorageConfigFromEnv() *StorageConfig { - provider := strings.TrimSpace(os.Getenv(contracts.StorageProviderEnv)) - if provider == "" { - return nil } - cfg := &StorageConfig{Provider: provider} - - if timeout := parseDurationEnv(contracts.StorageTimeoutEnv); timeout > 0 { - cfg.Timeout = timeout - } - - if provider != "s3" { - return cfg - } - - cfg.S3 = &S3StorageConfig{ - Bucket: strings.TrimSpace(os.Getenv(contracts.StorageS3BucketEnv)), - Region: strings.TrimSpace(os.Getenv(contracts.StorageS3RegionEnv)), - Endpoint: strings.TrimSpace(os.Getenv(contracts.StorageS3EndpointEnv)), - } - - return cfg } // UnmarshalFromMap is a helper to convert a map[string]any to a struct @@ -342,12 +175,11 @@ func UnmarshalFromMap[T any](data map[string]any) (T, error) { // This enables the decoder to handle type conversions automatically, // for example, converting a string "123" to an int 123. WeaklyTypedInput: true, - ErrorUnused: strictUnmarshalFromMapEnabled(), Result: &target, DecodeHook: mapstructure.ComposeDecodeHookFunc( // Support parsing time.Duration from strings like "5s", "1m" func(from reflect.Type, to reflect.Type, data any) (any, error) { - if to == reflect.TypeFor[time.Duration]() { + if to == reflect.TypeOf(time.Duration(0)) { switch v := data.(type) { case string: d, err := time.ParseDuration(v) @@ -355,11 +187,10 @@ func UnmarshalFromMap[T any](data map[string]any) (T, error) { return nil, err } return d, nil - case time.Duration: - return v, nil - } - if isNumericDurationValue(data) { - return nil, fmt.Errorf("duration values must be strings like 5s, not %T", data) + case int64: + return time.Duration(v), nil + case float64: + return time.Duration(v), nil } } return data, nil @@ -377,27 +208,3 @@ func UnmarshalFromMap[T any](data map[string]any) (T, error) { } return target, nil } - -func strictUnmarshalFromMapEnabled() bool { - raw := strings.TrimSpace(os.Getenv(runtimeStrictUnmarshalEnv)) - if raw == "" { - return false - } - enabled, err := strconv.ParseBool(raw) - if err != nil { - warnInvalidRuntimeEnv(runtimeStrictUnmarshalEnv, raw, "a boolean (true/false)") - return false - } - return enabled -} - -func isNumericDurationValue(value any) bool { - switch value.(type) { - case int, int8, int16, int32, int64, - uint, uint8, uint16, uint32, uint64, uintptr, - float32, float64: - return true - default: - return false - } -} diff --git a/runtime/context_fuzz_test.go b/runtime/context_fuzz_test.go index e7b7007..f04b1d4 100644 --- a/runtime/context_fuzz_test.go +++ b/runtime/context_fuzz_test.go @@ -1,20 +1,5 @@ //go:build go1.18 - -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ +// +build go1.18 package runtime @@ -41,15 +26,15 @@ func FuzzUnmarshalFromMap(f *testing.F) { // Try to unmarshal into a generic struct type TestStruct struct { - Key string `mapstructure:"key"` - Number int `mapstructure:"number"` - Float float64 `mapstructure:"float"` - Bool bool `mapstructure:"bool"` - Array []int `mapstructure:"array"` - Nested map[string]any `mapstructure:"nested"` - Duration time.Duration `mapstructure:"duration"` - Timeout time.Duration `mapstructure:"timeout"` - Deeply map[string]any `mapstructure:"deeply"` + Key string `mapstructure:"key"` + Number int `mapstructure:"number"` + Float float64 `mapstructure:"float"` + Bool bool `mapstructure:"bool"` + Array []int `mapstructure:"array"` + Nested map[string]interface{} `mapstructure:"nested"` + Duration time.Duration `mapstructure:"duration"` + Timeout time.Duration `mapstructure:"timeout"` + Deeply map[string]interface{} `mapstructure:"deeply"` } // Should not panic, even with malformed data diff --git a/runtime/context_test.go b/runtime/context_test.go index 4491a1d..8286947 100644 --- a/runtime/context_test.go +++ b/runtime/context_test.go @@ -1,47 +1,15 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - package runtime import ( - "bytes" - "log/slog" "os" "reflect" - "strings" "testing" "time" "github.com/bubustack/bubu-sdk-go/engram" - "github.com/bubustack/core/contracts" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -const sampleTransportConfig = `[ - { - "name":"rt", - "kind":"livekit", - "mode":"hot", - "livekit":{ - "room":"abc", - "participant":"def" - } - } -]` - func TestLoadExecutionContextData(t *testing.T) { now := time.Now().UTC() nowStr := now.Format(time.RFC3339Nano) @@ -55,18 +23,19 @@ func TestLoadExecutionContextData(t *testing.T) { { name: "complete context", envVars: map[string]string{ - contracts.TriggerDataEnv: `{"key":"value","number":123}`, - contracts.StepConfigEnv: `{"key1":"value1","key2":"value2"}`, - contracts.SecretPrefixEnv + "API": "secret123", - contracts.StoryNameEnv: "test-story", - contracts.StoryRunIDEnv: "run-123", - contracts.StepNameEnv: "test-step", - contracts.StepRunNameEnv: "step-run-123", - contracts.StartedAtEnv: nowStr, + "BUBU_INPUTS": `{"key":"value","number":123}`, + "BUBU_CONFIG_KEY1": "value1", + "BUBU_CONFIG_KEY2": "value2", + "BUBU_SECRET_API": "secret123", + "BUBU_STORY_NAME": "test-story", + "BUBU_STORYRUN_ID": "run-123", + "BUBU_STEP_NAME": "test-step", + "BUBU_STEPRUN_NAME": "step-run-123", + "BUBU_STARTED_AT": nowStr, }, want: &ExecutionContextData{ Inputs: map[string]any{"key": "value", "number": float64(123)}, // JSON unmarshals numbers to float64 - Config: map[string]any{"key1": "value1", "key2": "value2"}, + Config: map[string]any{"KEY1": "value1", "KEY2": "value2"}, Secrets: map[string]string{"API": "secret123"}, StoryInfo: engram.StoryInfo{ StoryName: "test-story", @@ -81,7 +50,7 @@ func TestLoadExecutionContextData(t *testing.T) { { name: "minimal context", envVars: map[string]string{ - contracts.StoryNameEnv: "minimal-story", + "BUBU_STORY_NAME": "minimal-story", }, want: &ExecutionContextData{ Inputs: make(map[string]any), @@ -93,36 +62,10 @@ func TestLoadExecutionContextData(t *testing.T) { }, wantErr: false, }, - { - name: "transports parsed", - envVars: map[string]string{ - contracts.TransportsEnv: sampleTransportConfig, - }, - want: &ExecutionContextData{ - Inputs: make(map[string]any), - Config: make(map[string]any), - Secrets: make(map[string]string), - StoryInfo: engram.StoryInfo{}, - Transports: []engram.TransportDescriptor{ - { - Name: "rt", - Kind: "livekit", - Mode: "hot", - Config: map[string]any{ - "livekit": map[string]any{ - "participant": "def", - "room": "abc", - }, - }, - }, - }, - }, - wantErr: false, - }, { name: "invalid JSON inputs", envVars: map[string]string{ - contracts.TriggerDataEnv: `{invalid json}`, + "BUBU_INPUTS": `{invalid json}`, }, wantErr: true, }, @@ -169,8 +112,8 @@ func TestLoadExecutionContextData(t *testing.T) { if got == nil { t.Fatal("LoadExecutionContextData() returned nil, want non-nil") } - // Ignore the StartedAt field for cases where it wasn't explicitly provided. - if tt.name == "minimal context" || tt.name == "empty environment" || tt.name == "transports parsed" { + // Ignore the StartedAt field for minimal and empty context as it's time-sensitive + if tt.name == "minimal context" || tt.name == "empty environment" { tt.want.StartedAt = got.StartedAt } @@ -182,193 +125,6 @@ func TestLoadExecutionContextData(t *testing.T) { } } -func TestLoadExecutionContextData_AllowsNullInputs(t *testing.T) { - t.Setenv(contracts.TriggerDataEnv, "null") - data, err := LoadExecutionContextData() - if err != nil { - t.Fatalf("LoadExecutionContextData() error = %v", err) - } - if data.Inputs == nil { - t.Fatalf("expected Inputs to be initialized, got nil") - } - if len(data.Inputs) != 0 { - t.Fatalf("expected Inputs to be empty map, got %v", data.Inputs) - } -} - -func TestLoadExecutionContextData_LoadsCELContext(t *testing.T) { - t.Setenv(contracts.TemplateContextEnv, `{"steps":{"a":{"ok":true}},"inputs":{"name":"demo"}}`) - - data, err := LoadExecutionContextData() - if err != nil { - t.Fatalf("LoadExecutionContextData() error = %v", err) - } - if data.CELContext == nil { - t.Fatal("expected CELContext to be populated") - } - steps, ok := data.CELContext["steps"].(map[string]any) - if !ok { - t.Fatalf("expected steps map in CELContext, got %#v", data.CELContext["steps"]) - } - if _, ok := steps["a"]; !ok { - t.Fatalf("expected step entry in CELContext, got %#v", steps) - } -} - -func TestLoadTransportsFromEnvDefaultsModeToHot(t *testing.T) { - t.Setenv(contracts.TransportsEnv, `[{"name":"rt","kind":"livekit"}]`) - - transports, err := loadTransportsFromEnv() - if err != nil { - t.Fatalf("loadTransportsFromEnv() error = %v", err) - } - if len(transports) != 1 { - t.Fatalf("expected 1 transport, got %d", len(transports)) - } - if transports[0].Mode != "hot" { //nolint:goconst - t.Fatalf("expected default mode hot, got %q", transports[0].Mode) - } -} - -func TestLoadTransportsFromEnvRejectsMissingName(t *testing.T) { - t.Setenv(contracts.TransportsEnv, `[{"kind":"livekit","mode":"hot"}]`) - - _, err := loadTransportsFromEnv() - if err == nil { - t.Fatal("expected missing name to be rejected") - } - if got := err.Error(); got != "invalid BUBU_TRANSPORTS[0]: name is required" { - t.Fatalf("unexpected error: %v", err) - } -} - -func TestLoadTransportsFromEnvRejectsMissingKind(t *testing.T) { - t.Setenv(contracts.TransportsEnv, `[{"name":"rt","mode":"hot"}]`) - - _, err := loadTransportsFromEnv() - if err == nil { - t.Fatal("expected missing kind to be rejected") - } - if got := err.Error(); got != "invalid BUBU_TRANSPORTS[0]: kind is required" { - t.Fatalf("unexpected error: %v", err) - } -} - -func TestLoadTransportsFromEnvRejectsInvalidMode(t *testing.T) { - t.Setenv(contracts.TransportsEnv, `[{"name":"rt","kind":"livekit","mode":"bi"}]`) - - _, err := loadTransportsFromEnv() - if err == nil { - t.Fatal("expected invalid mode to be rejected") - } - if got := err.Error(); got != `invalid BUBU_TRANSPORTS[0]: mode "bi" must be hot or fallback` { - t.Fatalf("unexpected error: %v", err) - } -} - -func TestLoadTransportsFromEnvRejectsDuplicateNames(t *testing.T) { - t.Setenv(contracts.TransportsEnv, `[ - {"name":"rt","kind":"livekit","mode":"hot"}, - {"name":"rt","kind":"webhook","mode":"fallback"} - ]`) - - _, err := loadTransportsFromEnv() - if err == nil { - t.Fatal("expected duplicate transport names to be rejected") - } - if got := err.Error(); got != `invalid BUBU_TRANSPORTS[1]: duplicate transport name "rt"` { - t.Fatalf("unexpected error: %v", err) - } -} - -func TestApplyExecutionSettingsFromEnvWarnsOnInvalidValues(t *testing.T) { - t.Setenv(contracts.StepTimeoutEnv, "not-a-duration") - t.Setenv(contracts.MaxInlineSizeEnv, "-1") - t.Setenv(contracts.GRPCPortEnv, "abc") - - var buf bytes.Buffer - prev := slog.Default() - logger := slog.New(slog.NewJSONHandler(&buf, &slog.HandlerOptions{Level: slog.LevelDebug})) - slog.SetDefault(logger) - defer slog.SetDefault(prev) - - exec := ExecutionInfo{ - StepTimeout: 5 * time.Second, - MaxInlineSize: 256, - GRPCPort: 8443, - } - - applyExecutionSettingsFromEnv(&exec) - - if exec.StepTimeout != 5*time.Second { - t.Fatalf("expected invalid step timeout override to be ignored, got %s", exec.StepTimeout) - } - if exec.MaxInlineSize != 256 { - t.Fatalf("expected invalid max inline size override to be ignored, got %d", exec.MaxInlineSize) - } - if exec.GRPCPort != 8443 { - t.Fatalf("expected invalid grpc port override to be ignored, got %d", exec.GRPCPort) - } - - output := buf.String() - for _, key := range []string{contracts.StepTimeoutEnv, contracts.MaxInlineSizeEnv, contracts.GRPCPortEnv} { - if !strings.Contains(output, key) { - t.Fatalf("expected warning log for %s, got %s", key, output) - } - } -} - -func TestLoadStorageConfigFromEnvWarnsOnInvalidTimeout(t *testing.T) { - t.Setenv(contracts.StorageProviderEnv, "s3") - t.Setenv(contracts.StorageTimeoutEnv, "0") - - var buf bytes.Buffer - prev := slog.Default() - logger := slog.New(slog.NewJSONHandler(&buf, &slog.HandlerOptions{Level: slog.LevelDebug})) - slog.SetDefault(logger) - defer slog.SetDefault(prev) - - cfg := loadStorageConfigFromEnv() - if cfg == nil { - t.Fatal("expected storage config to be created") - } - if cfg.Timeout != 0 { - t.Fatalf("expected invalid storage timeout override to be ignored, got %s", cfg.Timeout) - } - if !strings.Contains(buf.String(), contracts.StorageTimeoutEnv) { - t.Fatalf("expected warning log for %s, got %s", contracts.StorageTimeoutEnv, buf.String()) - } -} - -func TestLoadStorageConfigFromEnvTrimsValues(t *testing.T) { - t.Setenv(contracts.StorageProviderEnv, " s3 ") - t.Setenv(contracts.StorageS3BucketEnv, " bucket-a ") - t.Setenv(contracts.StorageS3RegionEnv, " us-east-1 ") - t.Setenv(contracts.StorageS3EndpointEnv, " https://example.invalid ") - - cfg := loadStorageConfigFromEnv() - if cfg == nil { - t.Fatal("expected storage config to be created") - } - if cfg.Provider != "s3" { - t.Fatalf("expected trimmed provider, got %q", cfg.Provider) - } - if cfg.S3 == nil { - t.Fatal("expected S3 config to be created") - } - if cfg.S3.Bucket != "bucket-a" || cfg.S3.Region != "us-east-1" || cfg.S3.Endpoint != "https://example.invalid" { - t.Fatalf("expected trimmed S3 fields, got %+v", cfg.S3) - } -} - -func TestLoadStorageConfigFromEnvIgnoresWhitespaceProvider(t *testing.T) { - t.Setenv(contracts.StorageProviderEnv, " ") - - if cfg := loadStorageConfigFromEnv(); cfg != nil { - t.Fatalf("expected whitespace provider to be ignored, got %+v", cfg) - } -} - func TestUnmarshalFromMap(t *testing.T) { type TestStruct struct { StringField string `mapstructure:"stringField"` @@ -493,42 +249,6 @@ func TestUnmarshalFromMap_NestedStructs(t *testing.T) { } } -func TestUnmarshalFromMap_StrictRejectsUnusedFields(t *testing.T) { - t.Setenv(runtimeStrictUnmarshalEnv, "true") - - type C struct { - Name string `mapstructure:"name"` - } - - _, err := UnmarshalFromMap[C](map[string]any{ - "name": "ok", - "extra": "unexpected", - }) - if err == nil { - t.Fatal("expected strict unmarshal mode to reject unused fields") - } - if !strings.Contains(err.Error(), "extra") { - t.Fatalf("expected error to mention unused field, got %v", err) - } -} - -func TestStrictUnmarshalFromMapEnabledWarnsOnInvalidEnv(t *testing.T) { - t.Setenv(runtimeStrictUnmarshalEnv, "definitely-not-bool") - - var buf bytes.Buffer - prev := slog.Default() - logger := slog.New(slog.NewJSONHandler(&buf, &slog.HandlerOptions{Level: slog.LevelDebug})) - slog.SetDefault(logger) - defer slog.SetDefault(prev) - - if strictUnmarshalFromMapEnabled() { - t.Fatal("expected invalid strict unmarshal env value to fall back to disabled") - } - if got := buf.String(); got == "" || !strings.Contains(got, runtimeStrictUnmarshalEnv) { - t.Fatalf("expected warning log for invalid strict unmarshal env, got %q", got) - } -} - func TestUnmarshalFromMap_Duration(t *testing.T) { type C struct { Timeout time.Duration `mapstructure:"timeout"` @@ -543,37 +263,13 @@ func TestUnmarshalFromMap_Duration(t *testing.T) { t.Errorf("Timeout = %v, want 150ms", cfg.Timeout) } - // Numeric durations are rejected to force explicit units. - _, err = UnmarshalFromMap[C](map[string]any{"timeout": int64(time.Second)}) - if err == nil { - t.Fatalf("expected error for numeric duration, got nil") - } - if !strings.Contains(err.Error(), "duration values must be strings like 5s") { - t.Fatalf("unexpected numeric duration error: %v", err) - } - - _, err = UnmarshalFromMap[C](map[string]any{"timeout": float64(time.Second)}) - if err == nil { - t.Fatalf("expected error for float duration, got nil") - } - if !strings.Contains(err.Error(), "duration values must be strings like 5s") { - t.Fatalf("unexpected float duration error: %v", err) - } - - _, err = UnmarshalFromMap[C](map[string]any{"timeout": uint(1)}) - if err == nil { - t.Fatalf("expected error for unsigned numeric duration, got nil") - } - if !strings.Contains(err.Error(), "duration values must be strings like 5s") { - t.Fatalf("unexpected unsigned numeric duration error: %v", err) - } - - _, err = UnmarshalFromMap[C](map[string]any{"timeout": float32(1)}) - if err == nil { - t.Fatalf("expected error for float32 duration, got nil") + // Integer nanoseconds + cfg, err = UnmarshalFromMap[C](map[string]any{"timeout": int64(time.Second)}) + if err != nil { + t.Fatalf("UnmarshalFromMap duration int64: %v", err) } - if !strings.Contains(err.Error(), "duration values must be strings like 5s") { - t.Fatalf("unexpected float32 duration error: %v", err) + if cfg.Timeout != time.Second { + t.Errorf("Timeout = %v, want 1s", cfg.Timeout) } // Invalid string should error @@ -583,24 +279,10 @@ func TestUnmarshalFromMap_Duration(t *testing.T) { } } -func TestUnmarshalFromMap_NilDataReturnsZeroValue(t *testing.T) { - type C struct { - Name string `mapstructure:"name"` - } - - cfg, err := UnmarshalFromMap[C](nil) - if err != nil { - t.Fatalf("expected nil data to decode to zero value, got %v", err) - } - if cfg != (C{}) { - t.Fatalf("expected zero value result, got %+v", cfg) - } -} - // Helper function to clean environment variables func cleanEnv(t *testing.T) { t.Helper() - prefixes := []string{contracts.PrefixEnv} + prefixes := []string{"BUBU_"} for _, prefix := range prefixes { for _, env := range os.Environ() { if len(env) > len(prefix) && env[:len(prefix)] == prefix { @@ -626,12 +308,12 @@ func indexOf(s, substr string) int { func TestExecutionContextData_StartedAt(t *testing.T) { // Test with valid RFC3339 time validTime := time.Now().Format(time.RFC3339) - err := os.Setenv(contracts.StartedAtEnv, validTime) + err := os.Setenv("BUBU_STARTED_AT", validTime) if err != nil { t.Fatalf("Setenv() error = %v", err) } defer func() { - err = os.Unsetenv(contracts.StartedAtEnv) + err = os.Unsetenv("BUBU_STARTED_AT") if err != nil { t.Fatalf("Unsetenv() error = %v", err) } diff --git a/schema_validation.go b/schema_validation.go deleted file mode 100644 index bc150e0..0000000 --- a/schema_validation.go +++ /dev/null @@ -1,759 +0,0 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package sdk - -import ( - "bytes" - "context" - "encoding/json" - "fmt" - "log/slog" - "os" - "strings" - "time" - "unicode" - - catalogv1alpha1 "github.com/bubustack/bobrapet/api/catalog/v1alpha1" - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - bubuv1alpha1 "github.com/bubustack/bobrapet/api/v1alpha1" - "github.com/bubustack/bobrapet/pkg/enums" - "github.com/bubustack/bobrapet/pkg/storage" - "github.com/bubustack/bubu-sdk-go/k8s" - sdkenv "github.com/bubustack/bubu-sdk-go/pkg/env" - "github.com/bubustack/bubu-sdk-go/runtime" - "github.com/bubustack/core/contracts" - "github.com/xeipuuv/gojsonschema" - apierrors "k8s.io/apimachinery/pkg/api/errors" - k8sruntime "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -type k8sGetter interface { - Get(context.Context, types.NamespacedName, client.Object, ...client.GetOption) error -} - -const ( - defaultSchemaLookupTimeout = 30 * time.Second - schemaAllowRuntimeRefsKeyword = "x-bubu-allow-runtime-refs" - schemaAllowTemplateStringsKeyword = "x-bubu-allow-template-strings" - templateStringPattern = `^\s*\$?\{\{[\s\S]+\}\}\s*$` -) - -type schemaNormalizationOptions struct { - allowRuntimeRefs bool - allowTemplateStrings bool -} - -func validateBatchInputs( - ctx context.Context, - k8sClient K8sClient, - execCtxData *runtime.ExecutionContextData, - inputs map[string]any, -) error { - logger := LoggerFromContext(ctx) - schema, schemaName, ok, err := resolveInputSchema(ctx, k8sClient, execCtxData, logger) - if err != nil { - return NewStructuredError( - runsv1alpha1.StructuredErrorTypeExecution, - fmt.Sprintf("input schema resolution failed: %v", err), - WithStructuredErrorCause(err), - WithStructuredErrorRetryable(true), - WithStructuredErrorExitClass(enums.ExitClassRetry), - ) - } - if !ok { - return nil - } - inputBytes, err := json.Marshal(inputs) - if err != nil { - return fmt.Errorf("failed to marshal inputs for schema validation: %w", err) - } - if err := validateJSONInputsBytes(inputBytes, schema, schemaName); err != nil { - return NewStructuredError( - runsv1alpha1.StructuredErrorTypeValidation, - fmt.Sprintf("input schema validation failed: %v", err), - WithStructuredErrorCause(err), - WithStructuredErrorExitClass(enums.ExitClassTerminal), - ) - } - return nil -} - -func validateBatchOutputs( - ctx context.Context, - k8sClient K8sClient, - execCtxData *runtime.ExecutionContextData, - outputBytes []byte, -) error { - logger := LoggerFromContext(ctx) - schema, schemaName, ok, err := resolveOutputSchema(ctx, k8sClient, execCtxData, logger) - if err != nil { - return NewStructuredError( - runsv1alpha1.StructuredErrorTypeExecution, - fmt.Sprintf("output schema resolution failed: %v", err), - WithStructuredErrorCause(err), - WithStructuredErrorRetryable(true), - WithStructuredErrorExitClass(enums.ExitClassRetry), - ) - } - if !ok { - return nil - } - if err := validateJSONOutputBytes(outputBytes, schema, schemaName); err != nil { - return NewStructuredError( - runsv1alpha1.StructuredErrorTypeValidation, - fmt.Sprintf("output schema validation failed: %v", err), - WithStructuredErrorCause(err), - WithStructuredErrorExitClass(enums.ExitClassTerminal), - ) - } - return nil -} - -func resolveInputSchema( - ctx context.Context, - k8sClient K8sClient, - execCtxData *runtime.ExecutionContextData, - logger *slog.Logger, -) (*k8sruntime.RawExtension, string, bool, error) { - template, ok, err := fetchEngramTemplate(ctx, k8sClient, execCtxData, logger) - if err != nil { - return nil, "", false, err - } - if !ok || template == nil { - return nil, "", false, nil - } - if template.Spec.InputSchema == nil || len(template.Spec.InputSchema.Raw) == 0 { - return nil, "", false, nil - } - schemaName := fmt.Sprintf("EngramTemplate %s inputs", template.Name) - return template.Spec.InputSchema, schemaName, true, nil -} - -func resolveOutputSchema( - ctx context.Context, - k8sClient K8sClient, - execCtxData *runtime.ExecutionContextData, - logger *slog.Logger, -) (*k8sruntime.RawExtension, string, bool, error) { - template, ok, err := fetchEngramTemplate(ctx, k8sClient, execCtxData, logger) - if err != nil { - return nil, "", false, err - } - if !ok || template == nil { - return nil, "", false, nil - } - if template.Spec.OutputSchema == nil || len(template.Spec.OutputSchema.Raw) == 0 { - return nil, "", false, nil - } - schemaName := fmt.Sprintf("EngramTemplate %s output", template.Name) - return template.Spec.OutputSchema, schemaName, true, nil -} - -func fetchEngramTemplate( - ctx context.Context, - k8sClient K8sClient, - execCtxData *runtime.ExecutionContextData, - logger *slog.Logger, -) (*catalogv1alpha1.EngramTemplate, bool, error) { - getter, ok := k8sClient.(k8sGetter) - if !ok { - logger.Debug("Schema validation skipped: k8s client does not support Get") - return nil, false, nil - } - namespace := resolveSchemaNamespace(execCtxData) - if namespace == "" { - logger.Debug("Schema validation skipped: step run namespace unavailable") - return nil, false, nil - } - lookupCtx, cancel := withSchemaLookupTimeout(ctx) - defer cancel() - engramName, err := resolveEngramName(lookupCtx, getter, execCtxData, namespace) - if err != nil { - return nil, false, fmt.Errorf("resolve engram name: %w", err) - } - if engramName == "" { - logger.Debug("Schema validation skipped: engram name unavailable") - return nil, false, nil - } - engram := &bubuv1alpha1.Engram{} - if err := getter.Get(lookupCtx, types.NamespacedName{Name: engramName, Namespace: namespace}, engram); err != nil { - if apierrors.IsForbidden(err) { - if logger != nil { - logger.Info( - "Schema validation skipped: Engram lookup forbidden; controller-side validation remains authoritative", - "engram", engramName, - "namespace", namespace, - ) - } - return nil, false, nil - } - return nil, false, fmt.Errorf("fetch engram: %w", err) - } - templateName := strings.TrimSpace(engram.Spec.TemplateRef.Name) - if templateName == "" { - logger.Debug("Schema validation skipped: engram template name missing", "engram", engramName) - return nil, false, nil - } - template := &catalogv1alpha1.EngramTemplate{} - if err := getter.Get(lookupCtx, types.NamespacedName{Name: templateName}, template); err != nil { - if apierrors.IsForbidden(err) { - if logger != nil { - logger.Info( - "Schema validation skipped: EngramTemplate lookup forbidden; controller-side validation remains authoritative", - "engram", engramName, - "template", templateName, - ) - } - return nil, false, nil - } - return nil, false, fmt.Errorf("fetch engram template: %w", err) - } - return template, true, nil -} - -func withSchemaLookupTimeout(ctx context.Context) (context.Context, context.CancelFunc) { - if ctx == nil { - ctx = context.Background() - } - timeout := sdkenv.GetDuration(contracts.K8sOperationTimeoutEnv, defaultSchemaLookupTimeout) - return context.WithTimeout(ctx, timeout) -} - -func resolveSchemaNamespace(execCtxData *runtime.ExecutionContextData) string { - if execCtxData != nil { - if ns := strings.TrimSpace(execCtxData.StoryInfo.StepRunNamespace); ns != "" { - return ns - } - } - return strings.TrimSpace(k8s.ResolvePodNamespace()) -} - -func resolveEngramName( - ctx context.Context, - getter k8sGetter, - execCtxData *runtime.ExecutionContextData, - namespace string, -) (string, error) { - if name := strings.TrimSpace(os.Getenv(contracts.EngramNameEnv)); name != "" { - return name, nil - } - if execCtxData == nil { - return "", nil - } - stepRunName := strings.TrimSpace(execCtxData.StoryInfo.StepRunID) - if stepRunName == "" { - return "", nil - } - stepRun := &runsv1alpha1.StepRun{} - if err := getter.Get(ctx, types.NamespacedName{Name: stepRunName, Namespace: namespace}, stepRun); err != nil { - return "", err - } - if stepRun.Spec.EngramRef == nil { - return "", nil - } - return strings.TrimSpace(stepRun.Spec.EngramRef.Name), nil -} - -func validateJSONInputsBytes(input []byte, schema *k8sruntime.RawExtension, schemaName string) error { - if schema == nil || len(schema.Raw) == 0 { - return nil - } - trimmed := trimLeadingSpace(input) - if len(trimmed) == 0 { - trimmed = []byte("{}") - } - return validateJSONAgainstSchema(trimmed, schema.Raw, schemaName) -} - -func validateJSONOutputBytes(output []byte, schema *k8sruntime.RawExtension, schemaName string) error { - if schema == nil || len(schema.Raw) == 0 { - return nil - } - trimmed := trimLeadingSpace(output) - if len(trimmed) == 0 { - return fmt.Errorf("%s output is empty but a schema is defined", schemaName) - } - return validateJSONAgainstSchema(trimmed, schema.Raw, schemaName) -} - -func trimLeadingSpace(raw []byte) []byte { - return bytes.TrimLeftFunc(raw, unicode.IsSpace) -} - -func validateJSONAgainstSchema(doc []byte, schema []byte, schemaName string) error { - normalizedSchema, err := normalizeSchemaBytes(schema) - if err != nil { - return fmt.Errorf("error validating against %s schema: failed to normalize schema: %w", schemaName, err) - } - schemaLoader := gojsonschema.NewStringLoader(string(normalizedSchema)) - documentLoader := gojsonschema.NewStringLoader(string(doc)) - result, err := gojsonschema.Validate(schemaLoader, documentLoader) - if err != nil { - return fmt.Errorf("error validating against %s schema: %w", schemaName, err) - } - if !result.Valid() { - var errs []string - for _, desc := range result.Errors() { - errs = append(errs, desc.String()) - } - return fmt.Errorf("object is invalid against %s schema: %v", schemaName, errs) - } - return nil -} - -func normalizeSchemaBytes(schema []byte) ([]byte, error) { - if len(schema) == 0 { - return schema, nil - } - var root any - if err := json.Unmarshal(schema, &root); err != nil { - return nil, err - } - if err := rejectExternalSchemaRefs(root); err != nil { - return nil, err - } - normalized := normalizeSchemaNodeWithOptions(root, schemaNormalizationOptions{}) - out, err := json.Marshal(normalized) - if err != nil { - return nil, err - } - return out, nil -} - -func rejectExternalSchemaRefs(node any) error { - switch typed := node.(type) { - case map[string]any: - for key, value := range typed { - if key == "$ref" { - ref, ok := value.(string) - if ok { - ref = strings.TrimSpace(ref) - if ref != "" && !strings.HasPrefix(ref, "#") { - return fmt.Errorf("external schema reference %q is not allowed", ref) - } - } - } - if err := rejectExternalSchemaRefs(value); err != nil { - return err - } - } - case []any: - for _, item := range typed { - if err := rejectExternalSchemaRefs(item); err != nil { - return err - } - } - } - return nil -} - -func normalizeSchemaNodeWithOptions(node any, inherited schemaNormalizationOptions) any { - switch typed := node.(type) { - case map[string]any: - options := schemaNormalizationOptionsForNode(typed, inherited) - normalized := normalizeObjectSchema(typed, options) - return allowRuntimeAlternatives(normalized, options) - case []any: - for i := range typed { - typed[i] = normalizeSchemaNodeWithOptions(typed[i], inherited) - } - return typed - default: - return node - } -} - -func schemaNormalizationOptionsForNode( - schema map[string]any, - inherited schemaNormalizationOptions, -) schemaNormalizationOptions { - options := inherited - if raw, ok := schema[schemaAllowRuntimeRefsKeyword]; ok { - if enabled, ok := raw.(bool); ok { - options.allowRuntimeRefs = enabled - } - } - if raw, ok := schema[schemaAllowTemplateStringsKeyword]; ok { - if enabled, ok := raw.(bool); ok { - options.allowTemplateStrings = enabled - } - } - return options -} - -func normalizeObjectSchema(obj map[string]any, options schemaNormalizationOptions) map[string]any { - requiredSet := liftInlineRequiredFlags(obj, options) - mergeRequiredSet(obj, requiredSet) - normalizeNestedSchemaLocations(obj, options) - return obj -} - -func allowRuntimeAlternatives(schema map[string]any, options schemaNormalizationOptions) any { //nolint:gocyclo - if schema == nil || len(schema) == 0 { //nolint:staticcheck - return schema - } - if !options.allowRuntimeRefs && !options.allowTemplateStrings { - return schema - } - if isImplicitRefSchema(schema) { - return schema - } - if anyOf, ok := schema["anyOf"].([]any); ok { - if options.allowRuntimeRefs && !schemaSliceHasImplicitRef(anyOf) { - schema["anyOf"] = append(anyOf, storageRefSchema(), configMapRefSchema(), secretRefSchema()) - } - if options.allowTemplateStrings && !schemaSliceHasTemplateString(schema["anyOf"].([]any)) { - schema["anyOf"] = append(schema["anyOf"].([]any), templateStringSchema()) - } - return schema - } - if oneOf, ok := schema["oneOf"].([]any); ok { - if options.allowRuntimeRefs && !schemaSliceHasImplicitRef(oneOf) { - schema["oneOf"] = append(oneOf, storageRefSchema(), configMapRefSchema(), secretRefSchema()) - } - if options.allowTemplateStrings && !schemaSliceHasTemplateString(schema["oneOf"].([]any)) { - schema["oneOf"] = append(schema["oneOf"].([]any), templateStringSchema()) - } - return schema - } - - alternatives := []any{schema} - if options.allowRuntimeRefs { - alternatives = append(alternatives, storageRefSchema(), configMapRefSchema(), secretRefSchema()) - } - if options.allowTemplateStrings { - alternatives = append(alternatives, templateStringSchema()) - } - wrapped := map[string]any{"anyOf": alternatives} - if title, ok := schema["title"]; ok { - wrapped["title"] = title - } - if desc, ok := schema["description"]; ok { - wrapped["description"] = desc - } - return wrapped -} - -func schemaSliceHasImplicitRef(schemas []any) bool { - for _, entry := range schemas { - if schemaMap, ok := entry.(map[string]any); ok && schemaHasImplicitRefAlternative(schemaMap) { - return true - } - } - return false -} - -func schemaSliceHasTemplateString(schemas []any) bool { - for _, entry := range schemas { - if schemaMap, ok := entry.(map[string]any); ok && schemaHasTemplateStringAlternative(schemaMap) { - return true - } - } - return false -} - -func schemaHasImplicitRefAlternative(schema map[string]any) bool { - if isImplicitRefSchema(schema) { - return true - } - return schemaContainsAlternative(schema, schemaHasImplicitRefAlternative) -} - -func schemaHasTemplateStringAlternative(schema map[string]any) bool { - if isTemplateStringSchema(schema) { - return true - } - return schemaContainsAlternative(schema, schemaHasTemplateStringAlternative) -} - -func schemaContainsAlternative(schema map[string]any, predicate func(map[string]any) bool) bool { - for _, key := range []string{"anyOf", "oneOf", "allOf"} { - raw, ok := schema[key].([]any) - if !ok { - continue - } - for _, candidate := range raw { - child, ok := candidate.(map[string]any) - if ok && predicate(child) { - return true - } - } - } - return false -} - -func isImplicitRefSchema(schema map[string]any) bool { - if isStorageRefSchema(schema) || isConfigMapRefSchema(schema) || isSecretRefSchema(schema) { - return true - } - return false -} - -func isStorageRefSchema(schema map[string]any) bool { - props, ok := schema["properties"].(map[string]any) - if !ok { - return false - } - _, hasRef := props[storage.StorageRefKey] - return hasRef -} - -func isConfigMapRefSchema(schema map[string]any) bool { - props, ok := schema["properties"].(map[string]any) - if !ok { - return false - } - _, hasRef := props["$bubuConfigMapRef"] - return hasRef -} - -func isSecretRefSchema(schema map[string]any) bool { - props, ok := schema["properties"].(map[string]any) - if !ok { - return false - } - _, hasRef := props["$bubuSecretRef"] - return hasRef -} - -func isTemplateStringSchema(schema map[string]any) bool { - if schema["type"] != "string" { - return false - } - pattern, hasPattern := schema["pattern"].(string) - return hasPattern && pattern == templateStringPattern -} - -func storageRefSchema() map[string]any { - return map[string]any{ - "type": "object", - "additionalProperties": false, - "properties": map[string]any{ - storage.StorageRefKey: map[string]any{"type": "string"}, - storage.StoragePathKey: map[string]any{"type": "string"}, - storage.StorageContentTypeKey: map[string]any{"type": "string"}, - storage.StorageSchemaKey: map[string]any{"type": "string"}, - storage.StorageSchemaVersionKey: map[string]any{"type": "string"}, - }, - "required": []any{storage.StorageRefKey}, - } -} - -func configMapRefSchema() map[string]any { - return map[string]any{ - "type": "object", - "additionalProperties": false, - "properties": map[string]any{ - "$bubuConfigMapRef": map[string]any{ - "anyOf": []any{ - map[string]any{"type": "string"}, - map[string]any{ - "type": "object", - "additionalProperties": false, - "properties": map[string]any{ - "name": map[string]any{"type": "string"}, - "key": map[string]any{"type": "string"}, - "namespace": map[string]any{"type": "string"}, - "format": map[string]any{ - "type": "string", - "enum": []any{"auto", "json", "raw"}, - }, - }, - "required": []any{"name", "key"}, - }, - }, - }, - }, - "required": []any{"$bubuConfigMapRef"}, - } -} - -func secretRefSchema() map[string]any { - return map[string]any{ - "type": "object", - "additionalProperties": false, - "properties": map[string]any{ - "$bubuSecretRef": map[string]any{ - "anyOf": []any{ - map[string]any{"type": "string"}, - map[string]any{ - "type": "object", - "additionalProperties": false, - "properties": map[string]any{ - "name": map[string]any{"type": "string"}, - "key": map[string]any{"type": "string"}, - "namespace": map[string]any{"type": "string"}, - "format": map[string]any{ - "type": "string", - "enum": []any{"auto", "json", "raw"}, - }, - }, - "required": []any{"name", "key"}, - }, - }, - }, - }, - "required": []any{"$bubuSecretRef"}, - } -} - -func templateStringSchema() map[string]any { - return map[string]any{ - "type": "string", - "pattern": templateStringPattern, - } -} - -func liftInlineRequiredFlags(obj map[string]any, options schemaNormalizationOptions) map[string]struct{} { - props, hasProps := obj["properties"].(map[string]any) - if !hasProps { - return nil - } - - requiredSet := map[string]struct{}{} - for propName, rawChild := range props { - cleaned := stripBooleanRequired(rawChild, propName, requiredSet) - props[propName] = normalizeSchemaNodeWithOptions(cleaned, options) - } - if len(requiredSet) == 0 { - return nil - } - return requiredSet -} - -func stripBooleanRequired(node any, propName string, requiredSet map[string]struct{}) any { - childMap, ok := node.(map[string]any) - if !ok { - return node - } - if raw, has := childMap["required"]; has { - if b, ok := raw.(bool); ok { - if b { - requiredSet[propName] = struct{}{} - } - delete(childMap, "required") - } - } - return childMap -} - -func mergeRequiredSet(obj map[string]any, requiredSet map[string]struct{}) { - if len(requiredSet) == 0 { - return - } - - existingList := extractExistingRequired(obj) - seen := make(map[string]struct{}, len(existingList)) - for _, name := range existingList { - seen[name] = struct{}{} - } - - for name := range requiredSet { - if _, already := seen[name]; !already { - existingList = append(existingList, name) - } - } - - out := make([]any, 0, len(existingList)) - for _, name := range existingList { - out = append(out, name) - } - obj["required"] = out -} - -func extractExistingRequired(obj map[string]any) []string { - raw, has := obj["required"] - if !has { - return nil - } - - switch typed := raw.(type) { - case []any: - var result []string - for _, v := range typed { - if s, ok := v.(string); ok { - result = append(result, s) - } - } - return result - case []string: - return append([]string{}, typed...) - default: - return nil - } -} - -func normalizeNestedSchemaLocations(obj map[string]any, options schemaNormalizationOptions) { - normalizeItemsNode(obj, options) - normalizeSingleSchemaField(obj, "additionalProperties", options) - normalizeMapOfSchemas(obj, "patternProperties", options) - normalizeSchemaSlice(obj, "allOf", options) - normalizeSchemaSlice(obj, "anyOf", options) - normalizeSchemaSlice(obj, "oneOf", options) - normalizeMapOfSchemas(obj, "definitions", options) - normalizeMapOfSchemas(obj, "$defs", options) - normalizeSingleSchemaField(obj, "not", options) -} - -func normalizeItemsNode(obj map[string]any, options schemaNormalizationOptions) { - items, has := obj["items"] - if !has { - return - } - switch typed := items.(type) { - case map[string]any, []any: - obj["items"] = normalizeSchemaNodeWithOptions(typed, options) - } -} - -func normalizeSingleSchemaField(obj map[string]any, key string, options schemaNormalizationOptions) { - if raw, has := obj[key]; has { - if schemaMap, ok := raw.(map[string]any); ok { - obj[key] = normalizeSchemaNodeWithOptions(schemaMap, options) - } - } -} - -func normalizeMapOfSchemas(obj map[string]any, key string, options schemaNormalizationOptions) { - raw, has := obj[key].(map[string]any) - if !has { - return - } - for k, v := range raw { - raw[k] = normalizeSchemaNodeWithOptions(v, options) - } -} - -func normalizeSchemaSlice(obj map[string]any, key string, options schemaNormalizationOptions) { - raw, has := obj[key].([]any) - if !has { - return - } - childOptions := options - if key == "oneOf" { - // Preserve oneOf exclusivity: inherited runtime/template broadening is - // applied once at the parent oneOf node, not injected into every branch. - childOptions.allowRuntimeRefs = false - childOptions.allowTemplateStrings = false - } - for i := range raw { - raw[i] = normalizeSchemaNodeWithOptions(raw[i], childOptions) - } - obj[key] = raw -} diff --git a/schema_validation_test.go b/schema_validation_test.go deleted file mode 100644 index f2cd5d1..0000000 --- a/schema_validation_test.go +++ /dev/null @@ -1,661 +0,0 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package sdk - -import ( - "context" - "encoding/json" - "errors" - "fmt" - "testing" - "time" - - catalogv1alpha1 "github.com/bubustack/bobrapet/api/catalog/v1alpha1" - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - bubuv1alpha1 "github.com/bubustack/bobrapet/api/v1alpha1" - "github.com/bubustack/bobrapet/pkg/enums" - "github.com/bubustack/bobrapet/pkg/refs" - "github.com/bubustack/bubu-sdk-go/engram" - "github.com/bubustack/bubu-sdk-go/runtime" - "github.com/bubustack/core/contracts" - "github.com/stretchr/testify/require" - apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - k8sruntime "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -type identityStorageManager struct{} - -func (identityStorageManager) Hydrate(ctx context.Context, data any) (any, error) { - return data, nil -} - -func (identityStorageManager) Dehydrate(ctx context.Context, data any, stepRunID string) (any, error) { - return data, nil -} - -type schemaK8sClient struct { - client.Client - patched []runsv1alpha1.StepRunStatus -} - -type failingSchemaClient struct { - patched []runsv1alpha1.StepRunStatus - err error -} - -type deadlineRecordingSchemaClient struct { - client.Client - deadlinesSeen []bool -} - -type blockingSchemaClient struct{} -type templateForbiddenSchemaClient struct { - client.Client - err error -} - -func (s *schemaK8sClient) TriggerStory( - ctx context.Context, - storyName string, - storyNamespace string, - inputs map[string]any, -) (*runsv1alpha1.StoryRun, error) { - return nil, nil -} - -func (s *schemaK8sClient) PatchStepRunStatus( - ctx context.Context, - stepRunName string, - patchData runsv1alpha1.StepRunStatus, -) error { - s.patched = append(s.patched, patchData) - return nil -} - -func (s *failingSchemaClient) TriggerStory( - ctx context.Context, - storyName string, - storyNamespace string, - inputs map[string]any, -) (*runsv1alpha1.StoryRun, error) { - return nil, nil -} - -func (s *failingSchemaClient) PatchStepRunStatus( - ctx context.Context, - stepRunName string, - patchData runsv1alpha1.StepRunStatus, -) error { - s.patched = append(s.patched, patchData) - return nil -} - -func (s *failingSchemaClient) Get(ctx context.Context, key types.NamespacedName, - obj client.Object, opts ...client.GetOption) error { - return s.err -} - -func (s *deadlineRecordingSchemaClient) TriggerStory( - ctx context.Context, - storyName string, - storyNamespace string, - inputs map[string]any, -) (*runsv1alpha1.StoryRun, error) { - return nil, nil -} - -func (s *deadlineRecordingSchemaClient) PatchStepRunStatus( - ctx context.Context, - stepRunName string, - patchData runsv1alpha1.StepRunStatus, -) error { - return nil -} - -func (s *deadlineRecordingSchemaClient) Get(ctx context.Context, key types.NamespacedName, obj client.Object, opts ...client.GetOption) error { //nolint:lll - _, ok := ctx.Deadline() - s.deadlinesSeen = append(s.deadlinesSeen, ok) - return s.Client.Get(ctx, key, obj, opts...) -} - -func (blockingSchemaClient) TriggerStory( - ctx context.Context, - storyName string, - storyNamespace string, - inputs map[string]any, -) (*runsv1alpha1.StoryRun, error) { - return nil, nil -} - -func (blockingSchemaClient) PatchStepRunStatus( - ctx context.Context, - stepRunName string, - patchData runsv1alpha1.StepRunStatus, -) error { - return nil -} - -func (blockingSchemaClient) Get(ctx context.Context, key types.NamespacedName, obj client.Object, opts ...client.GetOption) error { //nolint:lll - <-ctx.Done() - return ctx.Err() -} - -func (s *templateForbiddenSchemaClient) TriggerStory( - ctx context.Context, - storyName string, - storyNamespace string, - inputs map[string]any, -) (*runsv1alpha1.StoryRun, error) { - return nil, nil -} - -func (s *templateForbiddenSchemaClient) PatchStepRunStatus( - ctx context.Context, - stepRunName string, - patchData runsv1alpha1.StepRunStatus, -) error { - return nil -} - -func (s *templateForbiddenSchemaClient) Get(ctx context.Context, key types.NamespacedName, obj client.Object, opts ...client.GetOption) error { //nolint:lll - switch obj.(type) { - case *catalogv1alpha1.EngramTemplate: - return s.err - default: - return s.Client.Get(ctx, key, obj, opts...) - } -} - -func newSchemaClient(t *testing.T, objects ...client.Object) *schemaK8sClient { - t.Helper() - scheme := k8sruntime.NewScheme() - require.NoError(t, runsv1alpha1.AddToScheme(scheme)) - require.NoError(t, bubuv1alpha1.AddToScheme(scheme)) - require.NoError(t, catalogv1alpha1.AddToScheme(scheme)) - return &schemaK8sClient{ - Client: fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(objects...). - Build(), - } -} - -func TestHydrateAndUnmarshalInputs_ValidatesSchema(t *testing.T) { - t.Setenv(contracts.EngramNameEnv, "engram") - t.Setenv(contracts.SkipInputTemplatingEnv, "true") - - inputSchema, err := json.Marshal(map[string]any{ - "type": "object", - "properties": map[string]any{ - "foo": map[string]any{"type": "string"}, - }, - "required": []any{"foo"}, - }) - require.NoError(t, err) - - template := &catalogv1alpha1.EngramTemplate{ - ObjectMeta: metav1.ObjectMeta{ - Name: "template", - }, - Spec: catalogv1alpha1.EngramTemplateSpec{ - InputSchema: &k8sruntime.RawExtension{Raw: inputSchema}, - }, - } - engramObj := &bubuv1alpha1.Engram{ - ObjectMeta: metav1.ObjectMeta{ - Name: "engram", - Namespace: "default", - }, - Spec: bubuv1alpha1.EngramSpec{ - TemplateRef: refs.EngramTemplateReference{Name: template.Name}, - }, - } - - k8sClient := newSchemaClient(t, template, engramObj) - - execCtx := &runtime.ExecutionContextData{ - Inputs: map[string]any{"foo": 123}, - StoryInfo: engram.StoryInfo{ - StepRunID: "step-run", - StepRunNamespace: "default", - }, - StartedAt: metav1.Now(), - } - - _, err = hydrateAndUnmarshalInputs[struct{}, map[string]any](context.Background(), identityStorageManager{}, k8sClient, execCtx) //nolint:lll - require.Error(t, err) - require.Len(t, k8sClient.patched, 1) - - status := k8sClient.patched[0] - require.Equal(t, enums.PhaseFailed, status.Phase) - serr := requireStructuredStatusError(t, &status) - require.Equal(t, runsv1alpha1.StructuredErrorTypeValidation, serr.Type) -} - -func TestHandleResultAndPatchStatus_ValidatesOutputSchema(t *testing.T) { - t.Setenv(contracts.EngramNameEnv, "engram") - - outputSchema, err := json.Marshal(map[string]any{ - "type": "object", - "properties": map[string]any{ - "foo": map[string]any{"type": "string"}, - }, - "required": []any{"foo"}, - }) - require.NoError(t, err) - - template := &catalogv1alpha1.EngramTemplate{ - ObjectMeta: metav1.ObjectMeta{ - Name: "template", - }, - Spec: catalogv1alpha1.EngramTemplateSpec{ - OutputSchema: &k8sruntime.RawExtension{Raw: outputSchema}, - }, - } - engramObj := &bubuv1alpha1.Engram{ - ObjectMeta: metav1.ObjectMeta{ - Name: "engram", - Namespace: "default", - }, - Spec: bubuv1alpha1.EngramSpec{ - TemplateRef: refs.EngramTemplateReference{Name: template.Name}, - }, - } - - k8sClient := newSchemaClient(t, template, engramObj) - - execCtx := &runtime.ExecutionContextData{ - StoryInfo: engram.StoryInfo{ - StepRunID: "step-run", - StepRunNamespace: "default", - }, - StartedAt: metav1.Now(), - } - - result := engram.NewResultFrom(map[string]any{"foo": 123}) - succeeded, _, finalErr, patchErr := handleResultAndPatchStatus( - context.Background(), - identityStorageManager{}, - k8sClient, - execCtx, - result, - nil, - false, - nil, - ) - - require.False(t, succeeded) - require.Error(t, finalErr) - require.NoError(t, patchErr) - require.Len(t, k8sClient.patched, 1) - - status := k8sClient.patched[0] - require.Equal(t, enums.PhaseFailed, status.Phase) - serr := requireStructuredStatusError(t, &status) - require.Equal(t, runsv1alpha1.StructuredErrorTypeValidation, serr.Type) -} - -func TestHydrateAndUnmarshalInputs_FailsClosedWhenSchemaLookupErrors(t *testing.T) { - t.Setenv(contracts.EngramNameEnv, "engram") - t.Setenv(contracts.SkipInputTemplatingEnv, "true") - - k8sClient := &failingSchemaClient{err: fmt.Errorf("apiserver unavailable")} - execCtx := &runtime.ExecutionContextData{ - Inputs: map[string]any{"foo": "bar"}, - StoryInfo: engram.StoryInfo{ - StepRunID: "step-run", - StepRunNamespace: "default", - }, - StartedAt: metav1.Now(), - } - - _, err := hydrateAndUnmarshalInputs[struct{}, map[string]any](context.Background(), identityStorageManager{}, k8sClient, execCtx) //nolint:lll - require.Error(t, err) - require.Len(t, k8sClient.patched, 1) - - status := k8sClient.patched[0] - require.Equal(t, enums.PhaseFailed, status.Phase) - require.Equal(t, enums.ExitClassRetry, status.ExitClass) - serr := requireStructuredStatusError(t, &status) - require.Equal(t, runsv1alpha1.StructuredErrorTypeExecution, serr.Type) - require.Equal(t, runsv1alpha1.StructuredErrorExitClass(enums.ExitClassRetry), serr.ExitClass) -} - -func TestResolveInputSchema_AppliesBoundedDeadlineToLookupGets(t *testing.T) { - inputSchema, err := json.Marshal(map[string]any{ - "type": "object", - "properties": map[string]any{ - "foo": map[string]any{"type": "string"}, - }, - }) - require.NoError(t, err) - - template := &catalogv1alpha1.EngramTemplate{ - ObjectMeta: metav1.ObjectMeta{Name: "template"}, - Spec: catalogv1alpha1.EngramTemplateSpec{ - InputSchema: &k8sruntime.RawExtension{Raw: inputSchema}, - }, - } - engramObj := &bubuv1alpha1.Engram{ - ObjectMeta: metav1.ObjectMeta{ - Name: "engram", - Namespace: "default", - }, - Spec: bubuv1alpha1.EngramSpec{ - TemplateRef: refs.EngramTemplateReference{Name: template.Name}, - }, - } - stepRun := &runsv1alpha1.StepRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: "step-run", - Namespace: "default", - }, - Spec: runsv1alpha1.StepRunSpec{ - EngramRef: &refs.EngramReference{ - ObjectReference: refs.ObjectReference{Name: "engram"}, - }, - }, - } - - baseClient := newSchemaClient(t, template, engramObj, stepRun) - recordingClient := &deadlineRecordingSchemaClient{Client: baseClient.Client} - execCtx := &runtime.ExecutionContextData{ - StoryInfo: engram.StoryInfo{ - StepRunID: "step-run", - StepRunNamespace: "default", - }, - } - - schema, schemaName, ok, err := resolveInputSchema(context.Background(), recordingClient, execCtx, nil) //nolint:revive - require.NoError(t, err) - require.True(t, ok) - require.NotNil(t, schema) - require.Equal(t, "EngramTemplate template inputs", schemaName) - require.Len(t, recordingClient.deadlinesSeen, 3) - for i, sawDeadline := range recordingClient.deadlinesSeen { - require.Truef(t, sawDeadline, "expected lookup %d to use a bounded context", i+1) - } -} - -func TestFetchEngramTemplate_BlocksOnlyUntilSchemaLookupTimeout(t *testing.T) { - t.Setenv(contracts.EngramNameEnv, "engram") - t.Setenv(contracts.K8sOperationTimeoutEnv, "10ms") - - execCtx := &runtime.ExecutionContextData{ - StoryInfo: engram.StoryInfo{ - StepRunNamespace: "default", - }, - } - - type result struct { - template *catalogv1alpha1.EngramTemplate - ok bool - err error - } - done := make(chan result, 1) - go func() { - template, ok, err := fetchEngramTemplate(context.Background(), blockingSchemaClient{}, execCtx, nil) - done <- result{template: template, ok: ok, err: err} - }() - - select { - case res := <-done: - require.Nil(t, res.template) - require.False(t, res.ok) - require.Error(t, res.err) - require.True(t, errors.Is(res.err, context.DeadlineExceeded), "expected deadline exceeded, got %v", res.err) - case <-time.After(250 * time.Millisecond): - t.Fatal("schema lookup did not honor bounded timeout") - } -} - -func TestFetchEngramTemplate_SkipsForbiddenEngramLookup(t *testing.T) { - t.Setenv(contracts.EngramNameEnv, "engram") - - execCtx := &runtime.ExecutionContextData{ - StoryInfo: engram.StoryInfo{ - StepRunNamespace: "default", - }, - } - k8sClient := &failingSchemaClient{ - err: apierrors.NewForbidden( - schema.GroupResource{Group: "bubustack.io", Resource: "engrams"}, - "engram", - errors.New("denied"), - ), - } - - template, ok, err := fetchEngramTemplate(context.Background(), k8sClient, execCtx, nil) - require.Nil(t, template) - require.False(t, ok) - require.NoError(t, err) -} - -func TestFetchEngramTemplate_SkipsForbiddenTemplateLookup(t *testing.T) { - t.Setenv(contracts.EngramNameEnv, "engram") - - execCtx := &runtime.ExecutionContextData{ - StoryInfo: engram.StoryInfo{ - StepRunNamespace: "default", - }, - } - template := &catalogv1alpha1.EngramTemplate{ - ObjectMeta: metav1.ObjectMeta{Name: "template"}, - } - engramObj := &bubuv1alpha1.Engram{ - ObjectMeta: metav1.ObjectMeta{Name: "engram", Namespace: "default"}, - Spec: bubuv1alpha1.EngramSpec{ - TemplateRef: refs.EngramTemplateReference{Name: template.Name}, - }, - } - - base := newSchemaClient(t, engramObj, template) - k8sClient := &templateForbiddenSchemaClient{ - Client: base.Client, - err: apierrors.NewForbidden( - schema.GroupResource{Group: "catalog.bubustack.io", Resource: "engramtemplates"}, - "template", - errors.New("denied"), - ), - } - - resolved, ok, err := fetchEngramTemplate(context.Background(), k8sClient, execCtx, nil) - require.Nil(t, resolved) - require.False(t, ok) - require.NoError(t, err) -} - -func TestFetchEngramTemplate_IgnoresLegacySkipEnvAndStillFailsClosed(t *testing.T) { - t.Setenv("BUBU_SCHEMA_ALLOW_LOOKUP_SKIP_ON_ERROR", "true") - - execCtx := &runtime.ExecutionContextData{ - StoryInfo: engram.StoryInfo{ - StepRunID: "step-run", - StepRunNamespace: "default", - }, - } - k8sClient := &failingSchemaClient{ - err: apierrors.NewNotFound( - schema.GroupResource{Group: "bubustack.io", Resource: "stepruns"}, - "step-run", - ), - } - - template, ok, err := fetchEngramTemplate(context.Background(), k8sClient, execCtx, nil) - require.Nil(t, template) - require.False(t, ok) - require.Error(t, err) - require.ErrorContains(t, err, "resolve engram name") -} - -func TestValidateJSONAgainstSchemaRejectsExternalRefs(t *testing.T) { - err := validateJSONAgainstSchema( - []byte(`{"foo":"bar"}`), - []byte(`{"$ref":"https://example.com/schema.json"}`), - "test", - ) - require.ErrorContains(t, err, "external schema reference") -} - -func TestValidateJSONAgainstSchema_RejectsImplicitRuntimeAlternativesByDefault(t *testing.T) { - schemaBytes, err := json.Marshal(map[string]any{ - "type": "object", - "properties": map[string]any{ - "secret": map[string]any{"type": "string"}, - "color": map[string]any{ - "type": "string", - "enum": []any{"red", "blue"}, - }, - }, - "required": []any{"secret", "color"}, - }) - require.NoError(t, err) - - err = validateJSONAgainstSchema( - []byte(`{"secret":{"$bubuSecretRef":"ns/name:key"},"color":"red"}`), - schemaBytes, - "test", - ) - require.Error(t, err) - - err = validateJSONAgainstSchema( - []byte(`{"secret":"plain","color":"{{ .inputs.color }}"}`), - schemaBytes, - "test", - ) - require.Error(t, err) -} - -func TestValidateJSONAgainstSchema_AllowsRuntimeAlternativesWhenSchemaOptsIn(t *testing.T) { - schemaBytes, err := json.Marshal(map[string]any{ - "type": "object", - schemaAllowRuntimeRefsKeyword: true, - schemaAllowTemplateStringsKeyword: true, - "properties": map[string]any{ - "secret": map[string]any{"type": "string"}, - "color": map[string]any{ - "type": "string", - "enum": []any{"red", "blue"}, - }, - }, - "required": []any{"secret", "color"}, - }) - require.NoError(t, err) - - err = validateJSONAgainstSchema( - []byte(`{"secret":{"$bubuSecretRef":"ns/name:key"},"color":"{{ .inputs.color }}"}`), - schemaBytes, - "test", - ) - require.NoError(t, err) -} - -func TestValidateJSONAgainstSchema_AddsTemplateAlternativeWhenOneOfHasNonTemplatePattern(t *testing.T) { - schemaBytes, err := json.Marshal(map[string]any{ - "type": "object", - schemaAllowTemplateStringsKeyword: true, - "properties": map[string]any{ - "color": map[string]any{ - "oneOf": []any{ - map[string]any{ - "type": "string", - "pattern": "^[a-z]+$", - }, - map[string]any{ - "type": "integer", - }, - }, - }, - }, - "required": []any{"color"}, - }) - require.NoError(t, err) - - err = validateJSONAgainstSchema( - []byte(`{"color":"{{ .inputs.color }}"}`), - schemaBytes, - "test", - ) - require.NoError(t, err) -} - -func TestValidateJSONAgainstSchema_FieldOptInDoesNotBroadenSiblingFields(t *testing.T) { - schemaBytes, err := json.Marshal(map[string]any{ - "type": "object", - "properties": map[string]any{ - "allowedSecret": map[string]any{ - "type": "string", - schemaAllowRuntimeRefsKeyword: true, - }, - "allowedColor": map[string]any{ - "type": "string", - "enum": []any{"red", "blue"}, - schemaAllowTemplateStringsKeyword: true, - }, - "literalSecret": map[string]any{"type": "string"}, - "literalColor": map[string]any{ - "type": "string", - "enum": []any{"red", "blue"}, - }, - }, - "required": []any{"allowedSecret", "allowedColor", "literalSecret", "literalColor"}, - }) - require.NoError(t, err) - - err = validateJSONAgainstSchema( - []byte(`{"allowedSecret":{"$bubuSecretRef":"ns/name:key"},"allowedColor":"{{ .inputs.color }}","literalSecret":"plain","literalColor":"red"}`), //nolint:lll - schemaBytes, - "test", - ) - require.NoError(t, err) - - err = validateJSONAgainstSchema( - []byte(`{"allowedSecret":"plain","allowedColor":"red","literalSecret":{"$bubuSecretRef":"ns/name:key"},"literalColor":"{{ .inputs.color }}"}`), //nolint:lll - schemaBytes, - "test", - ) - require.Error(t, err) -} - -func TestValidateJSONAgainstSchema_IgnoresLegacyImplicitAlternativesEnv(t *testing.T) { - t.Setenv("BUBU_SCHEMA_ALLOW_IMPLICIT_RUNTIME_ALTERNATIVES", "true") - - schemaBytes, err := json.Marshal(map[string]any{ - "type": "object", - "properties": map[string]any{ - "secret": map[string]any{"type": "string"}, - "color": map[string]any{ - "type": "string", - "enum": []any{"red", "blue"}, - }, - }, - "required": []any{"secret", "color"}, - }) - require.NoError(t, err) - - err = validateJSONAgainstSchema( - []byte(`{"secret":{"$bubuSecretRef":"ns/name:key"},"color":"{{ .inputs.color }}"}`), - schemaBytes, - "test", - ) - require.Error(t, err) -} diff --git a/sdk.go b/sdk.go index 12e7c32..54f67c9 100644 --- a/sdk.go +++ b/sdk.go @@ -1,19 +1,3 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - // Package sdk provides the primary entry points for executing bobrapet components. // // This package contains the runtime logic that bootstraps an Engram or Impulse, @@ -45,7 +29,7 @@ limitations under the License. // // All entry points respect context cancellation. Batch engrams enforce a timeout // via BUBU_STEP_TIMEOUT. Streaming engrams implement graceful shutdown on SIGTERM -// with configurable drain timeouts via BUBU_SDK_GRACEFUL_SHUTDOWN_TIMEOUT. +// with configurable drain timeouts via BUBU_GRPC_GRACEFUL_SHUTDOWN_TIMEOUT. // // # Error Handling // @@ -56,23 +40,18 @@ package sdk import ( "context" - "errors" "fmt" "os" - "strings" "log/slog" runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" "github.com/bubustack/bubu-sdk-go/engram" "github.com/bubustack/bubu-sdk-go/k8s" - sdkerrors "github.com/bubustack/bubu-sdk-go/pkg/errors" - "github.com/bubustack/core/contracts" ) // ==== Logger injection via context ==== type ctxLoggerKey struct{} -type ctxLogCaptureKey struct{} // WithLogger stores a slog.Logger in the context for SDK use. // @@ -105,80 +84,12 @@ func WithLogger(ctx context.Context, logger *slog.Logger) context.Context { // Thread-safe and idempotent. func LoggerFromContext(ctx context.Context) *slog.Logger { if ctx == nil { - return newDefaultLogger() + return slog.New(slog.NewJSONHandler(os.Stdout, nil)) } if l, ok := ctx.Value(ctxLoggerKey{}).(*slog.Logger); ok && l != nil { return l } - return newDefaultLogger() -} - -func panicAsError(component string, recovered any) error { - label := strings.TrimSpace(component) - if label == "" { - label = "sdk component" - } - switch typed := recovered.(type) { - case error: - return fmt.Errorf("%s panicked: %w", label, typed) - case string: - return fmt.Errorf("%s panicked: %s", label, typed) - default: - return fmt.Errorf("%s panicked: %v", label, recovered) - } -} - -func callWithPanicRecovery[T any](component string, fn func() (T, error)) (value T, err error) { - defer func() { - if recovered := recover(); recovered != nil { - err = panicAsError(component, recovered) - } - }() - return fn() -} - -func callWithPanicRecoveryNoValue(component string, fn func() error) error { - _, err := callWithPanicRecovery(component, func() (struct{}, error) { - return struct{}{}, fn() - }) - return err -} - -func withDefaultLogger(ctx context.Context) (context.Context, *logCapture) { //nolint:unparam - if ctx == nil { - ctx = context.Background() - } - if l, ok := ctx.Value(ctxLoggerKey{}).(*slog.Logger); ok && l != nil { - return ctx, logCaptureFromContext(ctx) - } - logger, capture := newDefaultLoggerWithCapture() - ctx = context.WithValue(ctx, ctxLoggerKey{}, logger) - if capture != nil { - ctx = context.WithValue(ctx, ctxLogCaptureKey{}, capture) - } - return ctx, capture -} - -func logCaptureFromContext(ctx context.Context) *logCapture { - if ctx == nil { - return nil - } - if c, ok := ctx.Value(ctxLogCaptureKey{}).(*logCapture); ok && c != nil { - return c - } - return nil -} - -// WithTriggerToken attaches an idempotency token that StartStory passes through to the Kubernetes client. -// When provided, the SDK derives deterministic StoryRun names without relying on process-wide env vars. -// Nil contexts are accepted so callers can safely attach a token before choosing a base context. -func WithTriggerToken(ctx context.Context, token string) context.Context { - return k8s.WithTriggerToken(ctx, token) -} - -// TriggerTokenFromContext returns the trigger token stored in the context, if any. -func TriggerTokenFromContext(ctx context.Context) string { - return k8s.TriggerTokenFromContext(ctx) + return slog.New(slog.NewJSONHandler(os.Stdout, nil)) } // K8sClient defines the interface for Kubernetes operations required by the SDK. @@ -199,12 +110,7 @@ type K8sClient interface { // The inputs map is marshaled to JSON and stored in the StoryRun's spec.inputs field. // Returns the created StoryRun on success, or an error if creation fails. // Respects context cancellation and deadlines. - TriggerStory( - ctx context.Context, - storyName string, - storyNamespace string, - inputs map[string]any, - ) (*runsv1alpha1.StoryRun, error) + TriggerStory(ctx context.Context, storyName string, inputs map[string]any) (*runsv1alpha1.StoryRun, error) // PatchStepRunStatus updates the status of the named StepRun with the provided patch data. // The implementation should use field-wise merging to avoid clobbering controller-managed @@ -213,35 +119,6 @@ type K8sClient interface { PatchStepRunStatus(ctx context.Context, stepRunName string, patchData runsv1alpha1.StepRunStatus) error } -// DualEngram is implemented by Engram types that support both batch and streaming modes. -type DualEngram[C any, I any] interface { - engram.BatchEngram[C, I] - engram.StreamingEngram[C] -} - -const defaultExecutionMode = "batch" - -// ExecutionMode returns the execution mode supplied by the operator (e.g. "batch", "job", "deployment"). -// Defaults to "batch" when not set so local runs behave sensibly. -func ExecutionMode() string { - mode := strings.TrimSpace(os.Getenv(contracts.ExecutionModeEnv)) - if mode == "" { - return defaultExecutionMode - } - return strings.ToLower(mode) -} - -// Start launches the provided engram in either batch or streaming mode based on ExecutionMode. -// This lets engram entrypoints avoid direct environment inspection and stick to the SDK abstraction. -func Start[C any, I any](ctx context.Context, e DualEngram[C, I]) error { - switch ExecutionMode() { - case "", "job", "batch": - return StartBatch(ctx, e) - default: - return StartStreaming(ctx, e) - } -} - // StorageManager defines the interface for storage operations required by the SDK. // // This interface provides transparent data offloading for large inputs and outputs, @@ -255,15 +132,7 @@ func Start[C any, I any](ctx context.Context, e DualEngram[C, I]) error { // - Path traversal protection and validation // - OpenTelemetry metrics for operation latency and data sizes // -// Storage references use one of the formats below: -// - {"$bubuStorageRef": "outputs/steprun-id/path.json"} -// - {"$bubuStorageRef": "outputs/steprun-id/path.json", "$bubuStoragePath":"result.text"} -// - {"$bubuConfigMapRef": "namespace/name:key"} -// - {"$bubuSecretRef": "namespace/name:key"} -// - {"$bubuConfigMapRef": {"name":"cfg","key":"payload","namespace":"ns","format":"json"}} -// -// Supported formats for ConfigMap/Secret refs: auto (default), json, raw. -// When namespace is omitted, the SDK defaults to BUBU_POD_NAMESPACE/BUBU_STEPRUN_NAMESPACE. +// Storage references use the format {"$bubuStorageRef": "outputs/steprun-id/path.json"}. type StorageManager interface { // Hydrate recursively scans a data structure for storage references and replaces // them with the actual content from the storage backend. Returns the hydrated @@ -279,61 +148,9 @@ type StorageManager interface { Dehydrate(ctx context.Context, data any, stepRunID string) (any, error) } -// === Target Story Resolution === - -// TargetStory holds the target story information resolved from the Impulse's storyRef. -// This is set by the operator via environment variables. -type TargetStory struct { - // Name is the Story name from Impulse.spec.storyRef.name. - Name string - - // Namespace is the Story namespace from Impulse.spec.storyRef.namespace. - // Empty if the Story is in the same namespace as the Impulse. - Namespace string -} - -// GetTargetStory returns the target story configured via the Impulse's spec.storyRef. -// The operator injects these values as BUBU_TARGET_STORY_NAME and BUBU_TARGET_STORY_NAMESPACE -// environment variables when running an Impulse pod. -// -// Returns an error if BUBU_TARGET_STORY_NAME is not set, as the Impulse CRD requires -// a storyRef to be specified. -// -// Example: -// -// target, err := sdk.GetTargetStory() -// if err != nil { -// return fmt.Errorf("no target story configured: %w", err) -// } -// sr, err := sdk.StartStoryInNamespace(ctx, target.Name, target.Namespace, inputs) -func GetTargetStory() (TargetStory, error) { - name := strings.TrimSpace(os.Getenv(contracts.TargetStoryNameEnv)) - if name == "" { - return TargetStory{}, fmt.Errorf( - "target story not configured: %s environment variable is not set", - contracts.TargetStoryNameEnv, - ) - } - return TargetStory{ - Name: name, - Namespace: strings.TrimSpace(os.Getenv(contracts.TargetStoryNamespaceEnv)), - }, nil -} - -// MustGetTargetStory is like GetTargetStory but panics if the target story is not configured. -// Useful in main() where early failure is preferred over error handling. -func MustGetTargetStory() TargetStory { - target, err := GetTargetStory() - if err != nil { - panic(err) - } - return target -} - // === Story Helpers === // StartStory triggers a new StoryRun for the named Story with the provided inputs. -// For cross-namespace executions, use StartStoryInNamespace to explicitly set the Story namespace. // // This is the primary mechanism for programmatically initiating workflows, typically // used from within an Impulse. The SDK automatically resolves the correct namespace @@ -357,88 +174,18 @@ func MustGetTargetStory() TargetStory { // } // log.Printf("Triggered StoryRun: %s", sr.Name) func StartStory(ctx context.Context, storyName string, inputs map[string]any) (*runsv1alpha1.StoryRun, error) { - return StartStoryInNamespace(ctx, storyName, "", inputs) -} - -// StartStoryInNamespace is identical to StartStory but allows specifying the namespace of the -// referenced Story explicitly. Provide an empty namespace to use the default resolution -// (BUBU_TARGET_STORY_NAMESPACE or the pod namespace). -func StartStoryInNamespace( - ctx context.Context, - storyName string, - storyNamespace string, - inputs map[string]any, -) (*runsv1alpha1.StoryRun, error) { - if ctx == nil { - return nil, fmt.Errorf("context must not be nil") - } - k8sClient, err := k8s.SharedClient() + k8sClient, err := k8s.NewClient() if err != nil { return nil, fmt.Errorf("failed to create k8s client: %w", err) } - return k8sClient.TriggerStory(ctx, storyName, storyNamespace, inputs) -} - -// StartStoryWithToken behaves like StartStory but accepts a per-call token for deterministic retries. -func StartStoryWithToken( - ctx context.Context, - storyName string, - token string, - inputs map[string]any, -) (*runsv1alpha1.StoryRun, error) { - if ctx == nil { - return nil, fmt.Errorf("context must not be nil") - } - return StartStory(WithTriggerToken(ctx, token), storyName, inputs) -} - -// StartStoryWithTokenInNamespace behaves like StartStoryInNamespace but accepts a per-call token -// for deterministic retries. -func StartStoryWithTokenInNamespace( - ctx context.Context, - storyName string, - storyNamespace string, - token string, - inputs map[string]any, -) (*runsv1alpha1.StoryRun, error) { - if ctx == nil { - return nil, fmt.Errorf("context must not be nil") - } - return StartStoryInNamespace(WithTriggerToken(ctx, token), storyName, storyNamespace, inputs) -} - -// StopStory cancels an in-flight StoryRun in the current namespace. -// Equivalent to StopStoryInNamespace with an empty namespace. -func StopStory(ctx context.Context, storyRunName string) error { - return StopStoryInNamespace(ctx, storyRunName, "") -} - -// StopStoryInNamespace cancels an in-flight StoryRun by marking it finished. -// If the StoryRun does not exist, ErrStoryRunNotFound is returned. Already-terminal -// StoryRuns are treated as a no-op. Active StoryRuns in phases the SDK will not -// force-finish return an invalid-transition error from the underlying k8s client. -func StopStoryInNamespace(ctx context.Context, storyRunName, namespace string) error { - if ctx == nil { - return fmt.Errorf("context must not be nil") - } - k8sClient, err := k8s.SharedClient() - if err != nil { - return fmt.Errorf("failed to create k8s client: %w", err) - } - if err := k8sClient.StopStoryRun(ctx, storyRunName, namespace); err != nil { - if errors.Is(err, sdkerrors.ErrNotFound) { - return fmt.Errorf("storyrun %q not found: %w", storyRunName, ErrStoryRunNotFound) - } - return err - } - return nil + return k8sClient.TriggerStory(ctx, storyName, inputs) } // StartBatch is the type-safe entry point for batch engrams (Kubernetes Jobs). // // This function infers both config type C and input type I from the engram implementation, // providing full compile-time type safety. It orchestrates the complete lifecycle: -// 1. Load execution context from environment (BUBU_STEP_CONFIG, BUBU_TRIGGER_DATA, etc.) +// 1. Load execution context from environment (BUBU_CONFIG, BUBU_INPUTS, etc.) // 2. Unmarshal config and inputs into types C and I // 3. Call engram.Init with typed config and secrets // 4. Hydrate inputs from storage if needed @@ -469,7 +216,7 @@ func StartBatch[C any, I any](ctx context.Context, e engram.BatchEngram[C, I]) e // // This function infers config type C from the engram implementation, providing compile-time // type safety. It orchestrates the complete lifecycle: -// 1. Load execution context from environment (BUBU_STEP_CONFIG, etc.) +// 1. Load execution context from environment (BUBU_CONFIG, etc.) // 2. Unmarshal config into type C // 3. Call engram.Init with typed config and secrets // 4. Start gRPC server on BUBU_GRPC_PORT (default 50051) diff --git a/sdk_test.go b/sdk_test.go index b803f3b..78409e0 100644 --- a/sdk_test.go +++ b/sdk_test.go @@ -1,40 +1,19 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - package sdk import ( "context" "errors" "os" - "path/filepath" - "strings" "testing" - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/bubustack/bobrapet/pkg/enums" - "github.com/bubustack/bobrapet/pkg/storage" "github.com/bubustack/bubu-sdk-go/engram" "github.com/bubustack/bubu-sdk-go/k8s" "github.com/bubustack/bubu-sdk-go/runtime" - "github.com/bubustack/core/contracts" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/bubustack/bubu-sdk-go/storage" ) // Mocks @@ -57,125 +36,24 @@ func (m *MockBatchEngram[C, I]) Process( return nil, args.Error(1) } -type timeoutEngram struct{} - -func TestWithTriggerToken_AllowsNilContextAndStoresToken(t *testing.T) { - ctx := WithTriggerToken(nil, "token-123") //nolint:staticcheck - if ctx == nil { - t.Fatal("expected context when attaching token to nil context") - } - if got := TriggerTokenFromContext(ctx); got != "token-123" { - t.Fatalf("TriggerTokenFromContext() = %q, want %q", got, "token-123") - } -} - -func TestWithTriggerToken_EmptyTokenPreservesNilContext(t *testing.T) { - if got := WithTriggerToken(nil, ""); got != nil { //nolint:staticcheck - t.Fatalf("expected nil context passthrough for empty token, got %#v", got) - } -} - -func (timeoutEngram) Init(ctx context.Context, config struct{}, secrets *engram.Secrets) error { - return nil -} - -func (timeoutEngram) Process( - ctx context.Context, - execCtx *engram.ExecutionContext, - inputs struct{}, -) (*engram.Result, error) { - <-ctx.Done() - return nil, ctx.Err() -} - -type timeoutSuccessEngram struct{} - -func (timeoutSuccessEngram) Init(ctx context.Context, config struct{}, secrets *engram.Secrets) error { - return nil -} - -func (timeoutSuccessEngram) Process( - ctx context.Context, - execCtx *engram.ExecutionContext, - inputs struct{}, -) (*engram.Result, error) { - <-ctx.Done() - return &engram.Result{Data: "late-success"}, nil -} - -type panicProcessEngram struct{} - -func (panicProcessEngram) Init(ctx context.Context, config struct{}, secrets *engram.Secrets) error { - return nil -} - -func (panicProcessEngram) Process( - ctx context.Context, - execCtx *engram.ExecutionContext, - inputs struct{}, -) (*engram.Result, error) { - panic(errors.New("process boom")) -} - -type secretExpansionInitTrackingEngram struct { - initCalled bool -} - -func (e *secretExpansionInitTrackingEngram) Init( - ctx context.Context, - config struct{}, - secrets *engram.Secrets, -) error { - e.initCalled = true - return nil -} - -func (e *secretExpansionInitTrackingEngram) Process( - ctx context.Context, - execCtx *engram.ExecutionContext, - inputs struct{}, -) (*engram.Result, error) { - return engram.NewResultFrom("ok"), nil -} - -func TestCallWithPanicRecoveryNoValue_ConvertsPanicsToErrors(t *testing.T) { - cause := errors.New("boom") - err := callWithPanicRecoveryNoValue("test component", func() error { - panic(cause) - }) - assert.Error(t, err) - assert.Contains(t, err.Error(), "test component panicked") - assert.True(t, errors.Is(err, cause)) -} - func TestRun_Success_NoBridge(t *testing.T) { // Setup environment - err := os.Setenv(contracts.StoryNameEnv, "test-story") - if err != nil { - t.Fatalf("Setenv() error = %v", err) - } - err = os.Setenv(contracts.StepRunNameEnv, "test-step-run") + err := os.Setenv("BUBU_STORY_NAME", "test-story") if err != nil { t.Fatalf("Setenv() error = %v", err) } - err = os.Setenv(contracts.StepRunNamespaceEnv, "default") + err = os.Setenv("BUBU_STEPRUN_NAME", "test-step-run") if err != nil { t.Fatalf("Setenv() error = %v", err) } defer func() { - err = os.Unsetenv(contracts.StoryNameEnv) - if err != nil { - t.Fatalf("Unsetenv() error = %v", err) - } - }() - defer func() { - err = os.Unsetenv(contracts.StepRunNameEnv) + err = os.Unsetenv("BUBU_STORY_NAME") if err != nil { t.Fatalf("Unsetenv() error = %v", err) } }() defer func() { - err = os.Unsetenv(contracts.StepRunNamespaceEnv) + err = os.Unsetenv("BUBU_STEPRUN_NAME") if err != nil { t.Fatalf("Unsetenv() error = %v", err) } @@ -190,16 +68,16 @@ func TestRun_Success_NoBridge(t *testing.T) { mockEngram.On("Init", mock.Anything, mock.Anything, mock.Anything).Return(nil) mockEngram.On("Process", mock.Anything, mock.Anything, mock.Anything).Return(&engram.Result{Data: "success"}, nil) mockSM.On("Hydrate", mock.Anything, mock.Anything).Return(map[string]any{}, nil) - mockSM.On("Dehydrate", mock.Anything, "success", storage.NamespacedKey("default", "test-step-run")).Return("dehydrated", nil) //nolint:lll + mockSM.On("Dehydrate", mock.Anything, "success", "test-step-run").Return("dehydrated", nil) mockK8s.On("PatchStepRunStatus", mock.Anything, "test-step-run", mock.Anything).Return(nil) // Disable bridge - err = os.Setenv(contracts.HybridBridgeEnv, "false") + err = os.Setenv("BUBU_HYBRID_BRIDGE", "false") if err != nil { t.Fatalf("Setenv() error = %v", err) } defer func() { - err = os.Unsetenv(contracts.HybridBridgeEnv) + err = os.Unsetenv("BUBU_HYBRID_BRIDGE") if err != nil { t.Fatalf("Unsetenv() error = %v", err) } @@ -215,192 +93,55 @@ func TestRun_Success_NoBridge(t *testing.T) { mockK8s.AssertExpectations(t) } -func TestRunWithClients_InitFailurePatchesStatus(t *testing.T) { - t.Setenv(contracts.StoryNameEnv, "init-failure-story") - t.Setenv(contracts.StepRunNameEnv, "step-init-fail") - - initErr := errors.New("init boom") - - mockEngram := &MockBatchEngram[map[string]any, any]{} - mockEngram.On("Init", mock.Anything, mock.Anything, mock.Anything).Return(initErr) - - mockSM := &storage.MockManager{} - mockK8s := &k8s.MockClient{} - mockK8s.On("PatchStepRunStatus", mock.Anything, "step-init-fail", - mock.MatchedBy(func(status runsv1alpha1.StepRunStatus) bool { - return status.Phase == enums.PhaseFailed && - status.ExitCode == 1 && - status.ExitClass == enums.ExitClassTerminal && - strings.Contains(status.LastFailureMsg, initErr.Error()) - }), - ).Return(nil) - - err := runWithClients(context.Background(), mockEngram, mockK8s, mockSM) - assert.ErrorIs(t, err, initErr) - - mockEngram.AssertExpectations(t) - mockK8s.AssertExpectations(t) - mockSM.AssertNotCalled(t, "Hydrate", mock.Anything, mock.Anything) - mockSM.AssertNotCalled(t, "Dehydrate", mock.Anything, mock.Anything, mock.Anything) -} - -func TestRunWithClientsTimeoutForcesExitCode(t *testing.T) { - t.Setenv(contracts.StepTimeoutEnv, "1ms") - - execCtxData := &runtime.ExecutionContextData{ - Inputs: map[string]any{}, - Config: map[string]any{}, - Secrets: map[string]string{}, - StoryInfo: engram.StoryInfo{ - StoryName: "timeout-story", - StoryRunID: "story-run", - StepName: "step", - StepRunID: "step-run", - StepRunNamespace: "default", - }, - StartedAt: metav1.Now(), - } - - mockSM := &storage.MockManager{} - mockSM.On("Hydrate", mock.Anything, mock.Anything).Return(map[string]any{}, nil) - - mockK8s := &k8s.MockClient{} - mockK8s.On("PatchStepRunStatus", mock.Anything, "step-run", - mock.MatchedBy(func(status runsv1alpha1.StepRunStatus) bool { - return status.ExitCode == 124 && - status.ExitClass == enums.ExitClassRetry && - status.Phase == enums.PhaseTimeout - }), - ).Return(nil) - - originalExit := exitProcess - defer func() { exitProcess = originalExit }() - - var ( - exitCalled bool - exitCode int - ) - exitProcess = func(code int) { - exitCalled = true - exitCode = code +func TestBridgeToHub_Enabled_TimesOutGracefully(t *testing.T) { + // Enable bridge, set tiny timeout, and set a bogus hub to force dial timeout + err := os.Setenv("BUBU_HYBRID_BRIDGE", "true") + if err != nil { + t.Fatalf("Setenv() error = %v", err) } - - err := runWithClientsWithContext(context.Background(), timeoutEngram{}, mockK8s, mockSM, execCtxData) - if err == nil { - t.Fatalf("expected timeout error") + err = os.Setenv("BUBU_HYBRID_BRIDGE_TIMEOUT", "100ms") + if err != nil { + t.Fatalf("Setenv() error = %v", err) } - assert.True(t, errors.Is(err, ErrBatchTimeout)) - assert.True(t, exitCalled, "expected exitProcess to be invoked") - assert.Equal(t, 124, exitCode) - - mockSM.AssertExpectations(t) - mockK8s.AssertExpectations(t) -} - -func TestRunWithClientsTimeoutSuccessStillForcesExitCode(t *testing.T) { - t.Setenv(contracts.StepTimeoutEnv, "1ms") - - execCtxData := &runtime.ExecutionContextData{ - Inputs: map[string]any{}, - Config: map[string]any{}, - Secrets: map[string]string{}, - StoryInfo: engram.StoryInfo{ - StoryName: "timeout-story", - StoryRunID: "story-run", - StepName: "step", - StepRunID: "step-run", - StepRunNamespace: "default", - }, - StartedAt: metav1.Now(), + err = os.Setenv("DOWNSTREAM_HOST", "10.255.255.1:65535") // unroutable + if err != nil { + t.Fatalf("Setenv() error = %v", err) } - - mockK8s := &k8s.MockClient{} - mockK8s.On("PatchStepRunStatus", mock.Anything, "step-run", - mock.MatchedBy(func(status runsv1alpha1.StepRunStatus) bool { - return status.ExitCode == 124 && - status.ExitClass == enums.ExitClassRetry && - status.Phase == enums.PhaseTimeout - }), - ).Return(nil) - - originalExit := exitProcess - defer func() { exitProcess = originalExit }() - - var ( - exitCalled bool - exitCode int - ) - exitProcess = func(code int) { - exitCalled = true - exitCode = code + err = os.Setenv("DOWNSTREAM_HOST", "10.255.255.1:65535") // unroutable + if err != nil { + t.Fatalf("Setenv() error = %v", err) } + defer func() { + err = os.Unsetenv("BUBU_HYBRID_BRIDGE") + if err != nil { + t.Fatalf("Unsetenv() error = %v", err) + } + }() + defer func() { + err = os.Unsetenv("BUBU_HYBRID_BRIDGE_TIMEOUT") + if err != nil { + t.Fatalf("Unsetenv() error = %v", err) + } + }() + defer func() { + err = os.Unsetenv("DOWNSTREAM_HOST") + if err != nil { + t.Fatalf("Unsetenv() error = %v", err) + } + }() - err := runWithClientsWithContext(context.Background(), timeoutSuccessEngram{}, mockK8s, noopStorageManager{}, execCtxData) //nolint:lll + ctx := context.Background() + // Should return an error within timeout + err = bridgeToHub(ctx, []byte(`{"ok":true}`)) if err == nil { - t.Fatalf("expected timeout error") - } - assert.True(t, errors.Is(err, ErrBatchTimeout)) - assert.True(t, exitCalled, "expected exitProcess to be invoked") - assert.Equal(t, 124, exitCode) - - mockK8s.AssertExpectations(t) -} - -func TestRunWithClients_ProcessPanicPatchesStatus(t *testing.T) { - execCtxData := &runtime.ExecutionContextData{ - Inputs: map[string]any{}, - Config: map[string]any{}, - Secrets: map[string]string{}, - StoryInfo: engram.StoryInfo{ - StoryName: "panic-story", - StoryRunID: "story-run", - StepName: "step", - StepRunID: "step-run", - StepRunNamespace: "default", - }, - StartedAt: metav1.Now(), + t.Log("bridgeToHub unexpectedly succeeded against an unroutable address; environment may allow fast failures") } - - mockK8s := &k8s.MockClient{} - mockK8s.On("PatchStepRunStatus", mock.Anything, "step-run", - mock.MatchedBy(func(status runsv1alpha1.StepRunStatus) bool { - return status.Phase == enums.PhaseFailed && - status.ExitCode == 1 && - strings.Contains(status.LastFailureMsg, "panicked") - }), - ).Return(nil) - - err := runWithClientsWithContext(context.Background(), panicProcessEngram{}, mockK8s, noopStorageManager{}, execCtxData) //nolint:lll - assert.Error(t, err) - assert.Contains(t, err.Error(), "panicked") - - mockK8s.AssertExpectations(t) -} - -func TestInitializeEngramFailsOnSecretExpansionError(t *testing.T) { - missingDir := filepath.Join(t.TempDir(), "missing") - eng := &secretExpansionInitTrackingEngram{} - - err := initializeEngram[struct{}, struct{}]( - context.Background(), - eng, - noopStorageManager{}, - &runtime.ExecutionContextData{ - Config: map[string]any{}, - Secrets: map[string]string{"db": "file:" + missingDir}, - }, - ) - assert.Error(t, err) - assert.Contains(t, err.Error(), "failed to expand secrets") - assert.Contains(t, err.Error(), `secret "db" (file)`) - assert.NotContains(t, err.Error(), missingDir) - assert.False(t, eng.initCalled, "Init should not run when secret expansion fails") } func TestHandleResultAndPatchStatus(t *testing.T) { ctx := context.Background() execCtxData := &runtime.ExecutionContextData{ - StoryInfo: engram.StoryInfo{StepRunID: "step-1", StepRunNamespace: "default"}, + StoryInfo: engram.StoryInfo{StepRunID: "step-1"}, } processErr := errors.New("process error") patchErr := errors.New("patch error") @@ -463,18 +204,13 @@ func TestHandleResultAndPatchStatus(t *testing.T) { mockK8s := new(k8s.MockClient) if tt.expectDehydrate { - mockSM.On( - "Dehydrate", - mock.Anything, - mock.Anything, - storage.NamespacedKey(execCtxData.StoryInfo.StepRunNamespace, execCtxData.StoryInfo.StepRunID), - ).Return(mock.Anything, tt.dehydrateErr) + mockSM.On("Dehydrate", ctx, mock.Anything, execCtxData.StoryInfo.StepRunID).Return(mock.Anything, tt.dehydrateErr) } if tt.expectPatch { mockK8s.On("PatchStepRunStatus", ctx, execCtxData.StoryInfo.StepRunID, mock.Anything).Return(tt.patchErr) } - _, _, err, _ := handleResultAndPatchStatus(ctx, mockSM, mockK8s, execCtxData, tt.result, tt.processErr, false, nil) + _, _, err := handleResultAndPatchStatus(ctx, mockSM, mockK8s, execCtxData, tt.result, tt.processErr, false) if tt.wantErr { assert.Error(t, err) diff --git a/signal_payloads.go b/signal_payloads.go deleted file mode 100644 index cbd2b53..0000000 --- a/signal_payloads.go +++ /dev/null @@ -1,128 +0,0 @@ -package sdk - -import ( - "context" - "crypto/sha256" - "fmt" - "maps" - "strings" - "unicode/utf8" -) - -const ( - defaultSignalSampleBytes = 2048 -) - -// SignalMeta describes a lightweight summary of a payload that is safe for signals. -type SignalMeta struct { - // Format identifies the logical payload shape, such as "text" or "json". - Format string `json:"format,omitempty"` - // ContentType carries the MIME content type when one is known. - ContentType string `json:"contentType,omitempty"` - // SizeBytes reports the original payload size in bytes. - SizeBytes int `json:"sizeBytes,omitempty"` - // HashSHA256 optionally carries the lowercase hex SHA-256 digest of the original payload. - HashSHA256 string `json:"hashSha256,omitempty"` - // Attributes carries additional safe, non-secret metadata about the payload. - Attributes map[string]string `json:"attributes,omitempty"` -} - -// SignalEnvelope is the standard signal structure for metadata + optional samples. -type SignalEnvelope struct { - // Meta summarizes the full payload without embedding it directly in StepRun status. - Meta SignalMeta `json:"meta,omitempty"` - // Sample carries an optional, already-sanitized sample payload when callers choose to include one. - Sample any `json:"sample,omitempty"` -} - -// TextSignalOptions controls how text payloads are summarized for signals. -type TextSignalOptions struct { - // Format overrides the logical payload format label stored in SignalMeta.Format. - Format string - // ContentType sets SignalMeta.ContentType for the emitted signal. - ContentType string - // SampleBytes caps the inline sample size in bytes when callers choose to include a text sample. - // When zero, EmitTextSignal preserves the default metadata-only behavior unless SampleExtras is set. - SampleBytes int - // IncludeHash enables SHA-256 hashing of the original text into SignalMeta.HashSHA256. - IncludeHash bool - // Attributes attaches additional safe metadata to SignalMeta.Attributes. - Attributes map[string]string - // SampleExtras adds additional sample metadata alongside the sampled text. - SampleExtras map[string]any -} - -// EmitTextSignal emits a metadata summary for text payloads. By default it keeps -// metadata-only behavior; callers can opt into bounded inline samples via -// SampleBytes or SampleExtras. The full payload should still be passed via -// outputs/storage references when downstream steps need the complete content. -func EmitTextSignal(ctx context.Context, key string, text string, opts TextSignalOptions) error { - sizeBytes := len(text) - meta := SignalMeta{ - Format: firstNonEmpty(opts.Format, "text"), - ContentType: strings.TrimSpace(opts.ContentType), - SizeBytes: sizeBytes, - Attributes: opts.Attributes, - } - if opts.IncludeHash { - sum := sha256.Sum256([]byte(text)) - meta.HashSHA256 = fmt.Sprintf("%x", sum) - } - payload := SignalEnvelope{ - Meta: meta, - Sample: buildTextSignalSample(text, opts), - } - return EmitSignal(ctx, key, payload) -} - -func firstNonEmpty(values ...string) string { - for _, value := range values { - if strings.TrimSpace(value) != "" { - return strings.TrimSpace(value) - } - } - return "" -} - -func mergeSample(base map[string]any, extras map[string]any) map[string]any { - if len(extras) == 0 { - return base - } - out := make(map[string]any, len(base)+len(extras)) - maps.Copy(out, base) - for key, value := range extras { - if _, exists := out[key]; !exists { - out[key] = value - } - } - return out -} - -func buildTextSignalSample(text string, opts TextSignalOptions) any { - if opts.SampleBytes == 0 && len(opts.SampleExtras) == 0 { - return nil - } - limit := opts.SampleBytes - if limit <= 0 { - limit = defaultSignalSampleBytes - } - sampleText, truncated := truncateUTF8StringBytes(text, limit) - sample := map[string]any{ - "text": sampleText, - } - if truncated { - sample["truncated"] = true - } - return mergeSample(sample, opts.SampleExtras) -} - -func truncateUTF8StringBytes(text string, maxBytes int) (string, bool) { - if maxBytes <= 0 || len(text) <= maxBytes { - return text, false - } - truncated := text[:maxBytes] - for len(truncated) > 0 && !utf8.ValidString(truncated) { - truncated = truncated[:len(truncated)-1] - } - return truncated, true -} diff --git a/signal_payloads_test.go b/signal_payloads_test.go deleted file mode 100644 index c367cc8..0000000 --- a/signal_payloads_test.go +++ /dev/null @@ -1,114 +0,0 @@ -package sdk - -import ( - "context" - "encoding/json" - "testing" - - "github.com/bubustack/core/contracts" -) - -func TestEmitTextSignalPreservesMetadataOnlyByDefault(t *testing.T) { - t.Setenv(contracts.StepRunNameEnv, "step-1") - testResetSignalEmitter() - - prevFactory := signalClientFactory - patcher := &recordingSignalPatcher{} - signalClientFactory = func() (signalPatcher, error) { return patcher, nil } - t.Cleanup(func() { - signalClientFactory = prevFactory - testResetSignalEmitter() - }) - - if err := EmitTextSignal(context.Background(), "summary", "hello world", TextSignalOptions{}); err != nil { - t.Fatalf("EmitTextSignal(...) returned error: %v", err) - } - - if len(patcher.calls) != 1 { - t.Fatalf("expected one patch call, got %d", len(patcher.calls)) - } - - var envelope SignalEnvelope - if err := json.Unmarshal(patcher.calls[0].Signals["summary"].Raw, &envelope); err != nil { - t.Fatalf("unmarshal signal envelope: %v", err) - } - if envelope.Sample != nil { - t.Fatalf("expected metadata-only signal by default, got sample %#v", envelope.Sample) - } -} - -func TestEmitTextSignalIncludesBoundedSampleWhenRequested(t *testing.T) { - t.Setenv(contracts.StepRunNameEnv, "step-1") - testResetSignalEmitter() - - prevFactory := signalClientFactory - patcher := &recordingSignalPatcher{} - signalClientFactory = func() (signalPatcher, error) { return patcher, nil } - t.Cleanup(func() { - signalClientFactory = prevFactory - testResetSignalEmitter() - }) - - if err := EmitTextSignal(context.Background(), "summary", "hello world", TextSignalOptions{ - SampleBytes: 5, - SampleExtras: map[string]any{ - "lang": "en", - }, - }); err != nil { - t.Fatalf("EmitTextSignal(...) returned error: %v", err) - } - - var envelope map[string]any - if err := json.Unmarshal(patcher.calls[0].Signals["summary"].Raw, &envelope); err != nil { - t.Fatalf("unmarshal signal envelope: %v", err) - } - sample, ok := envelope["sample"].(map[string]any) - if !ok { - t.Fatalf("expected sample object, got %#v", envelope["sample"]) - } - if sample["text"] != "hello" { - t.Fatalf("unexpected sampled text: %#v", sample["text"]) - } - if sample["truncated"] != true { - t.Fatalf("expected truncated marker, got %#v", sample["truncated"]) - } - if sample["lang"] != "en" { - t.Fatalf("expected sample extras to merge, got %#v", sample["lang"]) - } -} - -func TestEmitTextSignalUsesDefaultSampleSizeWhenOnlyExtrasRequested(t *testing.T) { - t.Setenv(contracts.StepRunNameEnv, "step-1") - testResetSignalEmitter() - - prevFactory := signalClientFactory - patcher := &recordingSignalPatcher{} - signalClientFactory = func() (signalPatcher, error) { return patcher, nil } - t.Cleanup(func() { - signalClientFactory = prevFactory - testResetSignalEmitter() - }) - - text := make([]byte, defaultSignalSampleBytes+10) - for i := range text { - text[i] = 'a' - } - - if err := EmitTextSignal(context.Background(), "summary", string(text), TextSignalOptions{ - SampleExtras: map[string]any{"source": "generated"}, - }); err != nil { - t.Fatalf("EmitTextSignal(...) returned error: %v", err) - } - - var envelope map[string]any - if err := json.Unmarshal(patcher.calls[0].Signals["summary"].Raw, &envelope); err != nil { - t.Fatalf("unmarshal signal envelope: %v", err) - } - sample := envelope["sample"].(map[string]any) - if len(sample["text"].(string)) != defaultSignalSampleBytes { - t.Fatalf("expected default sample length %d, got %d", defaultSignalSampleBytes, len(sample["text"].(string))) - } - if sample["source"] != "generated" { - t.Fatalf("expected merged sample extras, got %#v", sample["source"]) - } -} diff --git a/signal_replay.go b/signal_replay.go deleted file mode 100644 index 1b8bc8e..0000000 --- a/signal_replay.go +++ /dev/null @@ -1,103 +0,0 @@ -package sdk - -import ( - "context" - "fmt" - "os" - "sort" - "strings" - - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - "github.com/bubustack/bubu-sdk-go/k8s" - "github.com/bubustack/core/contracts" - "k8s.io/apimachinery/pkg/types" -) - -type signalReplayClient interface { - GetNamespace() string - GetStepRun(ctx context.Context, key types.NamespacedName, stepRun *runsv1alpha1.StepRun) error -} - -type sharedSignalReplayClient struct { - client *k8s.Client -} - -func (c sharedSignalReplayClient) GetNamespace() string { - if c.client == nil { - return "" - } - return c.client.GetNamespace() -} - -func (c sharedSignalReplayClient) GetStepRun( - ctx context.Context, - key types.NamespacedName, - stepRun *runsv1alpha1.StepRun, -) error { - return c.client.Get(ctx, key, stepRun) -} - -var signalReplayClientFactory = func() (signalReplayClient, error) { - client, err := k8s.SharedClient() - if err != nil { - return nil, err - } - return sharedSignalReplayClient{client: client}, nil -} - -// ReplaySignals returns the signal events for a StepRun, optionally filtered by sequence. -// When stepRunName or namespace are empty, environment defaults are used. -func ReplaySignals( - ctx context.Context, - stepRunName, - namespace string, - sinceSeq uint64) ([]runsv1alpha1.SignalEvent, - error, -) { - if ctx == nil { - ctx = context.Background() - } - stepRunName = strings.TrimSpace(stepRunName) - if stepRunName == "" { - stepRunName = strings.TrimSpace(os.Getenv(contracts.StepRunNameEnv)) - } - if stepRunName == "" { - return nil, ErrSignalsUnavailable - } - namespace = strings.TrimSpace(namespace) - if namespace == "" { - namespace = strings.TrimSpace(os.Getenv(contracts.StepRunNamespaceEnv)) - } - client, err := signalReplayClientFactory() - if err != nil { - return nil, fmt.Errorf("failed to initialize signal replay client: %w", err) - } - if namespace == "" { - namespace = client.GetNamespace() - if namespace == "" { - return nil, ErrSignalsUnavailable - } - } - - var stepRun runsv1alpha1.StepRun - if err := client.GetStepRun(ctx, types.NamespacedName{Name: stepRunName, Namespace: namespace}, &stepRun); err != nil { - return nil, fmt.Errorf("failed to fetch StepRun for signal replay: %w", err) - } - - if len(stepRun.Status.SignalEvents) == 0 { - return nil, nil - } - - filtered := make([]runsv1alpha1.SignalEvent, 0, len(stepRun.Status.SignalEvents)) - for _, evt := range stepRun.Status.SignalEvents { - if sinceSeq > 0 && evt.Seq <= sinceSeq { - continue - } - filtered = append(filtered, evt) - } - if len(filtered) == 0 { - return nil, nil - } - sort.Slice(filtered, func(i, j int) bool { return filtered[i].Seq < filtered[j].Seq }) - return filtered, nil -} diff --git a/signal_replay_test.go b/signal_replay_test.go deleted file mode 100644 index c55ec97..0000000 --- a/signal_replay_test.go +++ /dev/null @@ -1,87 +0,0 @@ -package sdk - -import ( - "context" - "errors" - "testing" - - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - "github.com/bubustack/core/contracts" - "github.com/stretchr/testify/require" - "k8s.io/apimachinery/pkg/types" -) - -type stubSignalReplayClient struct { - namespace string - stepRun runsv1alpha1.StepRun - getErr error - lastCtx context.Context - lastKey types.NamespacedName -} - -func (s *stubSignalReplayClient) GetNamespace() string { - return s.namespace -} - -func (s *stubSignalReplayClient) GetStepRun( - ctx context.Context, - key types.NamespacedName, - stepRun *runsv1alpha1.StepRun, -) error { - s.lastCtx = ctx - s.lastKey = key - if s.getErr != nil { - return s.getErr - } - *stepRun = s.stepRun - return nil -} - -func TestReplaySignalsReturnsUnavailableWithoutStepRunContext(t *testing.T) { - t.Setenv(contracts.StepRunNameEnv, "") - t.Setenv(contracts.StepRunNamespaceEnv, "") - - prevFactory := signalReplayClientFactory - clientInitialized := false - signalReplayClientFactory = func() (signalReplayClient, error) { - clientInitialized = true - return nil, errors.New("should not initialize client") - } - t.Cleanup(func() { - signalReplayClientFactory = prevFactory - }) - - events, err := ReplaySignals(nil, "", "", 0) //nolint:staticcheck - require.ErrorIs(t, err, ErrSignalsUnavailable) - require.Nil(t, events) - require.False(t, clientInitialized, "client should not initialize when step-run identity is absent") -} - -func TestReplaySignalsAcceptsNilContextAndUsesClientNamespace(t *testing.T) { - prevFactory := signalReplayClientFactory - client := &stubSignalReplayClient{ - namespace: "signals-ns", - stepRun: runsv1alpha1.StepRun{ - Status: runsv1alpha1.StepRunStatus{ - SignalEvents: []runsv1alpha1.SignalEvent{ - {Seq: 2, Key: "later"}, - {Seq: 1, Key: "earlier"}, - }, - }, - }, - } - signalReplayClientFactory = func() (signalReplayClient, error) { - return client, nil - } - t.Cleanup(func() { - signalReplayClientFactory = prevFactory - }) - - events, err := ReplaySignals(nil, "step-1", "", 0) //nolint:staticcheck - require.NoError(t, err) - require.Len(t, events, 2) - require.NotNil(t, client.lastCtx) - require.Equal(t, types.NamespacedName{Name: "step-1", Namespace: "signals-ns"}, client.lastKey) - require.Equal(t, uint64(1), events[0].Seq) - require.Equal(t, uint64(2), events[1].Seq) -} diff --git a/signal_sequence.go b/signal_sequence.go deleted file mode 100644 index 9f08640..0000000 --- a/signal_sequence.go +++ /dev/null @@ -1,57 +0,0 @@ -package sdk - -import ( - "context" - "sync/atomic" - "time" -) - -// SequencedSignal wraps a signal payload with a monotonically increasing sequence. -type SequencedSignal struct { - // Seq is the monotonic sequence number assigned to the signal event. - Seq uint64 `json:"seq"` - // EmittedAt records when the signal was emitted in UTC. - EmittedAt time.Time `json:"emittedAt,omitempty"` - // Value carries the original signal payload associated with Seq. - Value any `json:"value,omitempty"` -} - -// SignalSequence is a process-local monotonic sequence generator for signals. -type SignalSequence struct { - counter atomic.Uint64 -} - -// NewSignalSequence constructs a SignalSequence starting at the provided value. -func NewSignalSequence(start uint64) *SignalSequence { - seq := &SignalSequence{} - if start > 0 { - seq.counter.Store(start) - } - return seq -} - -// Next increments and returns the next sequence value. -func (s *SignalSequence) Next() uint64 { - if s == nil { - return 0 - } - return s.counter.Add(1) -} - -// EmitSequencedSignal emits a signal wrapped with a sequence number and timestamp. -func EmitSequencedSignal(ctx context.Context, key string, seq uint64, value any) error { - payload := SequencedSignal{ - Seq: seq, - EmittedAt: time.Now().UTC(), - Value: value, - } - return emitSignalEvent(ctx, key, payload, seq) -} - -// EmitSignalWithSequence emits a signal using the provided sequence generator. -func EmitSignalWithSequence(ctx context.Context, key string, seq *SignalSequence, value any) error { - if seq == nil { - return EmitSignal(ctx, key, value) - } - return EmitSequencedSignal(ctx, key, seq.Next(), value) -} diff --git a/signal_sequence_test.go b/signal_sequence_test.go deleted file mode 100644 index 873d3fe..0000000 --- a/signal_sequence_test.go +++ /dev/null @@ -1,20 +0,0 @@ -package sdk - -import "testing" - -func TestSignalSequenceNext(t *testing.T) { - seq := NewSignalSequence(0) - if got := seq.Next(); got != 1 { - t.Fatalf("Next() = %d", got) - } - if got := seq.Next(); got != 2 { - t.Fatalf("Next() = %d", got) - } -} - -func TestSignalSequenceStart(t *testing.T) { - seq := NewSignalSequence(10) - if got := seq.Next(); got != 11 { - t.Fatalf("Next() = %d", got) - } -} diff --git a/signals.go b/signals.go deleted file mode 100644 index 009d031..0000000 --- a/signals.go +++ /dev/null @@ -1,266 +0,0 @@ -package sdk - -import ( - context "context" - "crypto/sha256" - "encoding/json" - "errors" - "fmt" - "os" - "reflect" - "strings" - "sync" - "time" - - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - "github.com/bubustack/bubu-sdk-go/k8s" - sdkenv "github.com/bubustack/bubu-sdk-go/pkg/env" - "github.com/bubustack/core/contracts" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" -) - -// ErrSignalsUnavailable indicates that the current process cannot emit signals (e.g., -// it is not running inside a StepRun workload). Callers may treat this as a soft -// failure and continue without emitting metadata. -var ErrSignalsUnavailable = errors.New("signal emission unavailable: not running inside a StepRun") - -const ( - defaultMaxSignalPayloadBytes = 8 * 1024 - signalPatchTimeout = 3 * time.Second - signalMaxPayloadBytesEnv = "BUBU_SIGNAL_MAX_PAYLOAD_BYTES" -) - -var ( - signalEmitterMu sync.Mutex - signalEmitterInst *signalEmitter - signalEmitterErr error // only set for permanent errors (missing env) - signalClientFactory = func() (signalPatcher, error) { return k8s.SharedClient() } - defaultSignalSeq = NewSignalSequence(0) -) - -type signalPatcher interface { - PatchStepRunStatus(ctx context.Context, stepRunName string, status runsv1alpha1.StepRunStatus) error -} - -type signalEmitter struct { - client signalPatcher - stepRunID string -} - -// EmitSignal patches the current StepRun status with a small JSON payload so that -// controllers and CEL expressions can react to live metadata. When value is nil, -// the signal is cleared. The latest value stored in status.signals is always a -// compact summary so raw payloads are not persisted in StepRun status. Signal -// events are sequenced by default. -func EmitSignal(ctx context.Context, key string, value any) error { - key = strings.TrimSpace(key) - if key == "" { - return fmt.Errorf("signal key is required") - } - seq := uint64(0) - if defaultSignalSeq != nil { - seq = defaultSignalSeq.Next() - } - return emitSignalEvent(ctx, key, value, seq) -} - -func summarizeSignalForStatus(value any, raw []byte) map[string]any { - meta := summarizeSignalValue(value, raw) - meta["redacted"] = true - return meta -} - -func summarizeSignalValue(value any, raw []byte) map[string]any { - meta := map[string]any{ - "sizeBytes": len(raw), - } - if len(raw) > 0 { - sum := sha256.Sum256(raw) - meta["sha256"] = fmt.Sprintf("%x", sum) - } - kind, details := signalTypeSummary(unwrapSignalSummaryValue(value)) - meta["type"] = kind - if len(details) > 0 { - meta["details"] = details - } - return meta -} - -func unwrapSignalSummaryValue(value any) any { - switch v := value.(type) { - case SequencedSignal: - return v.Value - case *SequencedSignal: - if v == nil { - return nil - } - return v.Value - default: - return value - } -} - -func signalTypeSummary(value any) (string, map[string]any) { - if value == nil { - return "null", nil - } - v := reflect.ValueOf(value) - for v.Kind() == reflect.Pointer { - if v.IsNil() { - return "null", nil - } - v = v.Elem() - } - switch v.Kind() { - case reflect.Map: - return "map", map[string]any{"len": v.Len()} //nolint:goconst - case reflect.Slice, reflect.Array: - return "array", map[string]any{"len": v.Len()} - case reflect.String: - return "string", map[string]any{"len": v.Len()} - case reflect.Struct: - return "struct", nil - case reflect.Bool: - return "bool", nil - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - return "int", nil - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: - return "uint", nil - case reflect.Float32, reflect.Float64: - return "float", nil - default: - return v.Kind().String(), nil - } -} - -func getSignalEmitter() (*signalEmitter, error) { - signalEmitterMu.Lock() - defer signalEmitterMu.Unlock() - if signalEmitterInst != nil { - return signalEmitterInst, nil - } - if signalEmitterErr != nil { - // Permanent error (e.g. missing env) — don't retry. - return nil, signalEmitterErr - } - stepRunID := strings.TrimSpace(os.Getenv(contracts.StepRunNameEnv)) - if stepRunID == "" { - signalEmitterErr = ErrSignalsUnavailable - return nil, signalEmitterErr - } - client, err := signalClientFactory() - if err != nil { - // Transient error — don't cache, allow retry on next call. - return nil, fmt.Errorf("failed to initialize signal client: %w", err) - } - signalEmitterInst = &signalEmitter{ - client: client, - stepRunID: stepRunID, - } - return signalEmitterInst, nil -} - -func emitSignalEvent(ctx context.Context, key string, value any, seq uint64) error { - emitter, err := getSignalEmitter() - if err != nil { - return err - } - maxPayloadBytes := resolveSignalMaxPayloadBytes() - var payloadRaw []byte - var statusRaw []byte - if value != nil { - payloadRaw, err = json.Marshal(value) - if err != nil { - return fmt.Errorf("failed to marshal signal payload: %w", err) - } - statusValue := statusSignalValue(value, payloadRaw, maxPayloadBytes) - statusRaw, err = json.Marshal(statusValue) - if err != nil { - return fmt.Errorf("failed to marshal signal status summary: %w", err) - } - if len(statusRaw) > maxPayloadBytes { - return fmt.Errorf("signal summary for %s exceeds %d bytes", key, maxPayloadBytes) - } - } - return emitter.emit(ctx, key, statusRaw, value, payloadRaw, seq) -} - -func statusSignalValue(value any, raw []byte, maxPayloadBytes int) any { - switch v := value.(type) { - case SignalEnvelope: - if len(raw) <= maxPayloadBytes { - return v - } - case *SignalEnvelope: - if v != nil && len(raw) <= maxPayloadBytes { - return *v - } - case SignalMeta: - if len(raw) <= maxPayloadBytes { - return v - } - case *SignalMeta: - if v != nil && len(raw) <= maxPayloadBytes { - return *v - } - case SequencedSignal: - return statusSignalValue(v.Value, raw, maxPayloadBytes) - case *SequencedSignal: - if v != nil { - return statusSignalValue(v.Value, raw, maxPayloadBytes) - } - } - return summarizeSignalForStatus(value, raw) -} - -func resolveSignalMaxPayloadBytes() int { - return sdkenv.GetInt(signalMaxPayloadBytesEnv, defaultMaxSignalPayloadBytes) -} - -func (e *signalEmitter) emit(ctx context.Context, key string, statusRaw []byte, - signalValue any, payloadRaw []byte, seq uint64) error { - if e == nil { - return ErrSignalsUnavailable - } - if ctx == nil { - ctx = context.Background() - } - ctx, cancel := context.WithTimeout(ctx, signalPatchTimeout) - defer cancel() - value := runtime.RawExtension{Raw: statusRaw} - var eventPayload *runtime.RawExtension - if len(payloadRaw) > 0 { - eventSummaryRaw, err := json.Marshal(summarizeSignalValue(signalValue, payloadRaw)) - if err != nil { - return fmt.Errorf("failed to marshal signal event summary: %w", err) - } - eventPayload = &runtime.RawExtension{Raw: eventSummaryRaw} - } - var eventTime *metav1.Time - var events []runsv1alpha1.SignalEvent - if seq > 0 { - now := metav1.NewTime(time.Now().UTC()) - eventTime = &now - events = []runsv1alpha1.SignalEvent{{ - Seq: seq, - Key: key, - EmittedAt: eventTime, - Payload: eventPayload, - }} - } - patch := runsv1alpha1.StepRunStatus{ - Signals: map[string]runtime.RawExtension{key: value}, - SignalEvents: events, - } - return e.client.PatchStepRunStatus(ctx, e.stepRunID, patch) -} - -// testResetSignalEmitter resets the cached emitter between tests. -func testResetSignalEmitter() { - signalEmitterMu.Lock() - defer signalEmitterMu.Unlock() - signalEmitterInst = nil - signalEmitterErr = nil - defaultSignalSeq = NewSignalSequence(0) -} diff --git a/signals_test.go b/signals_test.go deleted file mode 100644 index 023429b..0000000 --- a/signals_test.go +++ /dev/null @@ -1,263 +0,0 @@ -package sdk - -import ( - "context" - "encoding/json" - "strings" - "testing" - - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - "github.com/bubustack/core/contracts" -) - -type recordingSignalPatcher struct { - calls []runsv1alpha1.StepRunStatus -} - -func (r *recordingSignalPatcher) PatchStepRunStatus( //nolint:lll - _ context.Context, _ string, status runsv1alpha1.StepRunStatus, -) error { - r.calls = append(r.calls, status) - return nil -} - -func TestResolveSignalMaxPayloadBytesFromEnv(t *testing.T) { - t.Setenv(signalMaxPayloadBytesEnv, "256") - if got := resolveSignalMaxPayloadBytes(); got != 256 { - t.Fatalf("resolveSignalMaxPayloadBytes() = %d, want 256", got) - } -} - -func TestResolveSignalMaxPayloadBytesFallsBackOnInvalidEnv(t *testing.T) { - t.Setenv(signalMaxPayloadBytesEnv, "invalid") - if got := resolveSignalMaxPayloadBytes(); got != defaultMaxSignalPayloadBytes { - t.Fatalf("resolveSignalMaxPayloadBytes() = %d, want %d", got, defaultMaxSignalPayloadBytes) - } -} - -func TestEmitSignalAcceptsNilContext(t *testing.T) { - t.Setenv(contracts.StepRunNameEnv, "step-1") - testResetSignalEmitter() - - prevFactory := signalClientFactory - patcher := &recordingSignalPatcher{} - signalClientFactory = func() (signalPatcher, error) { return patcher, nil } - t.Cleanup(func() { - signalClientFactory = prevFactory - testResetSignalEmitter() - }) - - if err := EmitSignal(nil, "state", map[string]any{"ok": true}); err != nil { //nolint:staticcheck - t.Fatalf("EmitSignal(nil, ...) returned error: %v", err) - } - if len(patcher.calls) != 1 { - t.Fatalf("expected one patch call, got %d", len(patcher.calls)) - } - - raw := patcher.calls[0].Signals["state"].Raw - var payload map[string]any - if err := json.Unmarshal(raw, &payload); err != nil { - t.Fatalf("unmarshal signal status summary: %v", err) - } - if payload["redacted"] != true { - t.Fatalf("expected redacted summary, got %#v", payload["redacted"]) - } - if payload["type"] != "map" { - t.Fatalf("unexpected summary type: %#v", payload["type"]) - } -} - -func TestEmitSequencedSignalAcceptsNilContext(t *testing.T) { - t.Setenv(contracts.StepRunNameEnv, "step-1") - testResetSignalEmitter() - - prevFactory := signalClientFactory - patcher := &recordingSignalPatcher{} - signalClientFactory = func() (signalPatcher, error) { return patcher, nil } - t.Cleanup(func() { - signalClientFactory = prevFactory - testResetSignalEmitter() - }) - - if err := EmitSequencedSignal(nil, "state", 9, map[string]any{"ok": true}); err != nil { //nolint:staticcheck - t.Fatalf("EmitSequencedSignal(nil, ...) returned error: %v", err) - } - if len(patcher.calls) != 1 { - t.Fatalf("expected one patch call, got %d", len(patcher.calls)) - } - if len(patcher.calls[0].SignalEvents) != 1 { - t.Fatalf("expected one signal event, got %d", len(patcher.calls[0].SignalEvents)) - } - if patcher.calls[0].SignalEvents[0].Seq != 9 { - t.Fatalf("expected sequence 9, got %d", patcher.calls[0].SignalEvents[0].Seq) - } -} - -func TestEmitSequencedSignalStoresSummaryPayloadInEvents(t *testing.T) { - t.Setenv(contracts.StepRunNameEnv, "step-1") - testResetSignalEmitter() - - prevFactory := signalClientFactory - patcher := &recordingSignalPatcher{} - signalClientFactory = func() (signalPatcher, error) { return patcher, nil } - t.Cleanup(func() { - signalClientFactory = prevFactory - testResetSignalEmitter() - }) - - if err := EmitSequencedSignal(context.Background(), "state", 7, map[string]any{ - "token": "s3cr3t", - "ok": true, - }); err != nil { - t.Fatalf("EmitSequencedSignal(...) returned error: %v", err) - } - - if len(patcher.calls) != 1 { - t.Fatalf("expected one patch call, got %d", len(patcher.calls)) - } - if len(patcher.calls[0].SignalEvents) != 1 { - t.Fatalf("expected one signal event, got %d", len(patcher.calls[0].SignalEvents)) - } - - signalRaw := patcher.calls[0].Signals["state"].Raw - if strings.Contains(string(signalRaw), "s3cr3t") { - t.Fatalf("expected latest status signal to stay summarized, got %s", signalRaw) - } - - eventPayload := patcher.calls[0].SignalEvents[0].Payload - if eventPayload == nil { - t.Fatal("expected signal event payload summary") - } - if strings.Contains(string(eventPayload.Raw), "s3cr3t") { - t.Fatalf("expected signal event summary to redact raw payload, got %s", eventPayload.Raw) - } - - var summary map[string]any - if err := json.Unmarshal(eventPayload.Raw, &summary); err != nil { - t.Fatalf("unmarshal signal event summary: %v", err) - } - if summary["type"] != "map" { - t.Fatalf("unexpected signal event summary type: %#v", summary["type"]) - } - if summary["sizeBytes"] == nil { - t.Fatalf("expected signal event summary size, got %#v", summary["sizeBytes"]) - } - if summary["sizeBytes"] == float64(len(signalRaw)) { - t.Fatalf("expected event summary size to describe the original payload, got status-summary size %#v", summary["sizeBytes"]) //nolint:lll - } - if _, ok := summary["sha256"].(string); !ok { - t.Fatalf("expected signal event summary hash, got %#v", summary["sha256"]) - } -} - -func TestEmitSignalTruncationOmitsMapKeysFromSummary(t *testing.T) { - t.Setenv(contracts.StepRunNameEnv, "step-1") - testResetSignalEmitter() - - prevFactory := signalClientFactory - patcher := &recordingSignalPatcher{} - signalClientFactory = func() (signalPatcher, error) { return patcher, nil } - t.Cleanup(func() { - signalClientFactory = prevFactory - testResetSignalEmitter() - }) - - payload := map[string]any{ - "verySensitiveFieldName": strings.Repeat("x", defaultMaxSignalPayloadBytes), - } - if err := EmitSignal(context.Background(), "state", payload); err != nil { - t.Fatalf("EmitSignal(...) returned error: %v", err) - } - - if len(patcher.calls) != 1 { - t.Fatalf("expected one patch call, got %d", len(patcher.calls)) - } - - signalRaw := patcher.calls[0].Signals["state"].Raw - if strings.Contains(string(signalRaw), "verySensitiveFieldName") { - t.Fatalf("expected truncated signal summary to omit original map keys, got %s", signalRaw) - } - - var summary map[string]any - if err := json.Unmarshal(signalRaw, &summary); err != nil { - t.Fatalf("unmarshal truncated signal summary: %v", err) - } - if summary["redacted"] != true { - t.Fatalf("expected redacted summary marker, got %#v", summary["redacted"]) - } - details, ok := summary["details"].(map[string]any) - if !ok { - t.Fatalf("expected truncated summary details, got %#v", summary["details"]) - } - if _, exists := details["keys"]; exists { - t.Fatalf("expected truncated summary details to omit key names, got %#v", details) - } -} - -func TestEmitSignalStoresSummaryInStatusSignals(t *testing.T) { - t.Setenv(contracts.StepRunNameEnv, "step-1") - testResetSignalEmitter() - - prevFactory := signalClientFactory - patcher := &recordingSignalPatcher{} - signalClientFactory = func() (signalPatcher, error) { return patcher, nil } - t.Cleanup(func() { - signalClientFactory = prevFactory - testResetSignalEmitter() - }) - - if err := EmitSignal(context.Background(), "state", map[string]any{ - "token": "s3cr3t", - "ok": true, - }); err != nil { - t.Fatalf("EmitSignal(...) returned error: %v", err) - } - - if len(patcher.calls) != 1 { - t.Fatalf("expected one patch call, got %d", len(patcher.calls)) - } - - signalRaw := patcher.calls[0].Signals["state"].Raw - if strings.Contains(string(signalRaw), "s3cr3t") { - t.Fatalf("expected redacted signal payload in status.signals, got %s", signalRaw) - } - - var summary map[string]any - if err := json.Unmarshal(signalRaw, &summary); err != nil { - t.Fatalf("unmarshal redacted signal summary: %v", err) - } - if summary["redacted"] != true { - t.Fatalf("expected redacted marker, got %#v", summary["redacted"]) - } - if summary["type"] != "map" { - t.Fatalf("expected map summary type, got %#v", summary["type"]) - } - if _, ok := summary["sha256"].(string); !ok { - t.Fatalf("expected summary hash, got %#v", summary["sha256"]) - } -} - -func TestEmitSignalReturnsErrorWhenSummaryExceedsConfiguredMaxPayloadBytes(t *testing.T) { - t.Setenv(contracts.StepRunNameEnv, "step-1") - t.Setenv(signalMaxPayloadBytesEnv, "32") - testResetSignalEmitter() - - prevFactory := signalClientFactory - patcher := &recordingSignalPatcher{} - signalClientFactory = func() (signalPatcher, error) { return patcher, nil } - t.Cleanup(func() { - signalClientFactory = prevFactory - testResetSignalEmitter() - }) - - err := EmitSignal(context.Background(), "state", map[string]any{"ok": true}) - if err == nil { - t.Fatal("expected EmitSignal to fail when summary exceeds configured max payload bytes") - } - if !strings.Contains(err.Error(), "signal summary for state exceeds 32 bytes") { - t.Fatalf("expected configured-limit summary error, got %v", err) - } - if len(patcher.calls) != 0 { - t.Fatalf("expected no patch calls on payload cap violation, got %d", len(patcher.calls)) - } -} diff --git a/storage/file_store.go b/storage/file_store.go new file mode 100644 index 0000000..3c15b18 --- /dev/null +++ b/storage/file_store.go @@ -0,0 +1,144 @@ +package storage + +import ( + "context" + "fmt" + "io" + "os" + "path/filepath" + "strings" +) + +// FileStore implements the Store interface for a local filesystem. +type FileStore struct { + basePath string +} + +// NewFileStore creates a new FileStore. +// It requires a base path where all files will be stored. +func NewFileStore(basePath string) (*FileStore, error) { + if basePath == "" { + return nil, fmt.Errorf("file store base path cannot be empty") + } + if _, err := os.Stat(basePath); os.IsNotExist(err) { + return nil, fmt.Errorf("storage path '%s' provided but does not exist", basePath) + } + return &FileStore{basePath: basePath}, nil +} + +// contextReader wraps an io.Reader and returns ctx.Err() when the context is canceled. +type contextReader struct { + ctx context.Context + r io.Reader +} + +// Read implements io.Reader with context cancellation support. +func (cr *contextReader) Read(p []byte) (int, error) { + select { + case <-cr.ctx.Done(): + return 0, cr.ctx.Err() + default: + return cr.r.Read(p) + } +} + +// contextWriter wraps an io.Writer and returns ctx.Err() when the context is canceled. +type contextWriter struct { + ctx context.Context + w io.Writer +} + +// Write implements io.Writer with context cancellation support. +func (cw *contextWriter) Write(p []byte) (int, error) { + select { + case <-cw.ctx.Done(): + return 0, cw.ctx.Err() + default: + return cw.w.Write(p) + } +} + +// Write saves data from a reader to a file at the given path relative to the basePath. +func (fs *FileStore) Write(ctx context.Context, path string, reader io.Reader) error { + if err := ctx.Err(); err != nil { + return err + } + // Validate that joined path is within basePath to prevent path traversal + fullPath := filepath.Join(fs.basePath, path) + relPath, err := filepath.Rel(fs.basePath, fullPath) + if err != nil || strings.HasPrefix(relPath, ".."+string(filepath.Separator)) || filepath.IsAbs(relPath) { + return fmt.Errorf("invalid storage path: path traversal detected (requested '%s', resolved to '%s')", path, fullPath) + } + // Additional safety: ensure fullPath is still within basePath after Clean + if !strings.HasPrefix(filepath.Clean(fullPath), filepath.Clean(fs.basePath)) { + return fmt.Errorf("invalid storage path: resolved path '%s' escapes base path '%s'", fullPath, fs.basePath) + } + if err := os.MkdirAll(filepath.Dir(fullPath), 0755); err != nil { + return fmt.Errorf("failed to create directory '%s' for file store: %w", filepath.Dir(fullPath), err) + } + + // Write to a temporary file first to ensure atomicity. + tempFile, err := os.CreateTemp(filepath.Dir(fullPath), ".tmp-"+filepath.Base(path)+"-") + if err != nil { + return fmt.Errorf("failed to create temporary file for writing: %w", err) + } + defer func() { + _ = tempFile.Close() + _ = os.Remove(tempFile.Name()) // Clean up temp file on exit. + }() + + // Wrap reader to be context-aware so cancellation interrupts long writes + cr := &contextReader{ctx: ctx, r: reader} + if _, err = io.Copy(tempFile, cr); err != nil { + return fmt.Errorf("failed to write to temporary file '%s': %w", tempFile.Name(), err) + } + + // Close the file before renaming. + if err := tempFile.Close(); err != nil { + return fmt.Errorf("failed to close temporary file '%s': %w", tempFile.Name(), err) + } + + if err := ctx.Err(); err != nil { + return err + } + + // Atomically rename the temporary file to the final destination. + if err := os.Rename(tempFile.Name(), fullPath); err != nil { + return fmt.Errorf("failed to atomically move temporary file to '%s': %w", fullPath, err) + } + + return nil +} + +// Read retrieves data from a file and writes it to a writer. +func (fs *FileStore) Read(ctx context.Context, path string, writer io.Writer) error { + if err := ctx.Err(); err != nil { + return err + } + // Validate that joined path is within basePath to prevent path traversal + fullPath := filepath.Join(fs.basePath, path) + relPath, err := filepath.Rel(fs.basePath, fullPath) + if err != nil || strings.HasPrefix(relPath, ".."+string(filepath.Separator)) || filepath.IsAbs(relPath) { + return fmt.Errorf("invalid storage path: path traversal detected (requested '%s', resolved to '%s')", path, fullPath) + } + // Additional safety: ensure fullPath is still within basePath after Clean + if !strings.HasPrefix(filepath.Clean(fullPath), filepath.Clean(fs.basePath)) { + return fmt.Errorf("invalid storage path: resolved path '%s' escapes base path '%s'", fullPath, fs.basePath) + } + + file, err := os.Open(fullPath) + if err != nil { + return fmt.Errorf("failed to open file '%s' for reading: %w", fullPath, err) + } + defer func() { + err = file.Close() + if err != nil { + fmt.Printf("Close() error = %v\n", err) + } + }() + + // Wrap writer to be context-aware so cancellation interrupts long reads + cw := &contextWriter{ctx: ctx, w: writer} + _, err = io.Copy(cw, file) + return err +} diff --git a/storage/file_store_test.go b/storage/file_store_test.go new file mode 100644 index 0000000..100c213 --- /dev/null +++ b/storage/file_store_test.go @@ -0,0 +1,207 @@ +package storage + +import ( + "bytes" + "context" + "os" + "path/filepath" + "testing" +) + +func TestNewFileStore(t *testing.T) { + // Create a temporary directory for testing + tmpDir := t.TempDir() + + tests := []struct { + name string + basePath string + wantErr bool + }{ + { + name: "valid base path", + basePath: tmpDir, + wantErr: false, + }, + { + name: "empty base path", + basePath: "", + wantErr: true, + }, + { + name: "non-existent path", + basePath: filepath.Join(tmpDir, "does-not-exist"), + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := NewFileStore(tt.basePath) + if (err != nil) != tt.wantErr { + t.Errorf("NewFileStore() error = %v, wantErr %v", err, tt.wantErr) + return + } + if err == nil && got == nil { + t.Error("NewFileStore() returned nil without error") + } + if err == nil && got.basePath != tt.basePath { + t.Errorf("NewFileStore() basePath = %v, want %v", got.basePath, tt.basePath) + } + }) + } +} + +func TestFileStore_Write(t *testing.T) { + tmpDir := t.TempDir() + fs, err := NewFileStore(tmpDir) + if err != nil { + t.Fatalf("NewFileStore() error = %v", err) + } + + ctx := context.Background() + testData := []byte("test content") + testPath := "test/file.txt" + + err = fs.Write(ctx, testPath, bytes.NewReader(testData)) + if err != nil { + t.Errorf("Write() error = %v", err) + } + + // Verify the file was created + fullPath := filepath.Join(tmpDir, testPath) + if _, err := os.Stat(fullPath); os.IsNotExist(err) { + t.Error("Write() did not create file") + } + + // Verify the content + content, err := os.ReadFile(fullPath) + if err != nil { + t.Fatalf("Failed to read written file: %v", err) + } + if !bytes.Equal(content, testData) { + t.Errorf("Write() wrote %v, want %v", content, testData) + } +} + +func TestFileStore_Write_CreatesDirectories(t *testing.T) { + tmpDir := t.TempDir() + fs, err := NewFileStore(tmpDir) + if err != nil { + t.Fatalf("NewFileStore() error = %v", err) + } + + ctx := context.Background() + testData := []byte("nested content") + testPath := "deep/nested/path/file.txt" + + err = fs.Write(ctx, testPath, bytes.NewReader(testData)) + if err != nil { + t.Errorf("Write() error = %v", err) + } + + // Verify the nested directories were created + fullPath := filepath.Join(tmpDir, testPath) + if _, err := os.Stat(fullPath); os.IsNotExist(err) { + t.Error("Write() did not create nested directories") + } +} + +func TestFileStore_Read(t *testing.T) { + tmpDir := t.TempDir() + fs, err := NewFileStore(tmpDir) + if err != nil { + t.Fatalf("NewFileStore() error = %v", err) + } + + ctx := context.Background() + testData := []byte("test content for reading") + testPath := "read/test.txt" + + // First write a file + if err := fs.Write(ctx, testPath, bytes.NewReader(testData)); err != nil { + t.Fatalf("Setup Write() error = %v", err) + } + + // Now read it back + var buf bytes.Buffer + err = fs.Read(ctx, testPath, &buf) + if err != nil { + t.Errorf("Read() error = %v", err) + } + + if !bytes.Equal(buf.Bytes(), testData) { + t.Errorf("Read() got %v, want %v", buf.Bytes(), testData) + } +} + +func TestFileStore_Read_NonExistent(t *testing.T) { + tmpDir := t.TempDir() + fs, err := NewFileStore(tmpDir) + if err != nil { + t.Fatalf("NewFileStore() error = %v", err) + } + + ctx := context.Background() + var buf bytes.Buffer + + err = fs.Read(ctx, "does-not-exist.txt", &buf) + if err == nil { + t.Error("Read() should return error for non-existent file") + } +} + +func TestFileStore_RoundTrip(t *testing.T) { + tmpDir := t.TempDir() + fs, err := NewFileStore(tmpDir) + if err != nil { + t.Fatalf("NewFileStore() error = %v", err) + } + + ctx := context.Background() + testCases := []struct { + name string + data []byte + path string + }{ + { + name: "simple text", + data: []byte("hello world"), + path: "simple.txt", + }, + { + name: "binary data", + data: []byte{0x00, 0x01, 0x02, 0xFF, 0xFE, 0xFD}, + path: "binary.dat", + }, + { + name: "large content", + data: bytes.Repeat([]byte("a"), 10000), + path: "large.txt", + }, + { + name: "empty file", + data: []byte{}, + path: "empty.txt", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Write + if err := fs.Write(ctx, tc.path, bytes.NewReader(tc.data)); err != nil { + t.Fatalf("Write() error = %v", err) + } + + // Read + var buf bytes.Buffer + if err := fs.Read(ctx, tc.path, &buf); err != nil { + t.Fatalf("Read() error = %v", err) + } + + // Verify + if !bytes.Equal(buf.Bytes(), tc.data) { + t.Errorf("Round trip failed: got %d bytes, want %d bytes", len(buf.Bytes()), len(tc.data)) + } + }) + } +} diff --git a/storage/manager.go b/storage/manager.go new file mode 100644 index 0000000..dbc888e --- /dev/null +++ b/storage/manager.go @@ -0,0 +1,619 @@ +package storage + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "os" + "path/filepath" + "runtime" + "strconv" + "strings" + "time" + + "github.com/bubustack/bubu-sdk-go/pkg/metrics" +) + +const ( + // DefaultMaxInlineSize is the threshold in bytes above which data is offloaded to storage. + // + // Aligns with operator's DefaultMaxInlineSize (bobrapet/internal/config/controller_config.go:246). + // Override via BUBU_MAX_INLINE_SIZE. Keeping this low (1 KiB) prevents apiserver/etcd overload + // while allowing small outputs to remain inline for fast access. + DefaultMaxInlineSize = 1 * 1024 // 1 KiB + + // DefaultMaxRecursionDepth is the maximum nesting depth for hydration/dehydration. + // + // Prevents stack overflow on deeply nested data structures. Override via BUBU_MAX_RECURSION_DEPTH. + DefaultMaxRecursionDepth = 10 + + // DefaultStorageTimeout provides ample time for large file uploads (e.g., 100MB at 1MB/s = 100s + overhead). + // + // Operators should tune this based on expected output sizes and S3 latency: + // timeout >= (max_output_mb / upload_bandwidth_mbps) * 1.5 + baseline_latency_sec + // Override via BUBU_STORAGE_TIMEOUT. + DefaultStorageTimeout = 300 * time.Second // 5min for storage ops + + storageRefKey = "$bubuStorageRef" +) + +// getMaxInlineSize returns the max inline size from env or default +func getMaxInlineSize() (int, error) { + if v := os.Getenv("BUBU_MAX_INLINE_SIZE"); v != "" { + val, err := strconv.Atoi(v) + if err != nil { + return 0, fmt.Errorf("invalid BUBU_MAX_INLINE_SIZE '%s': must be a positive integer", v) + } + if val <= 0 { + return 0, fmt.Errorf("invalid BUBU_MAX_INLINE_SIZE '%s': must be > 0", v) + } + return val, nil + } + return DefaultMaxInlineSize, nil +} + +// getMaxRecursionDepth returns the max recursion depth from env or default +func getMaxRecursionDepth() int { + if v := os.Getenv("BUBU_MAX_RECURSION_DEPTH"); v != "" { + if i, err := strconv.Atoi(v); err == nil && i > 0 { + return i + } + } + return DefaultMaxRecursionDepth +} + +// getStorageTimeout returns the timeout for storage operations from env or default +func getStorageTimeout() time.Duration { + if v := os.Getenv("BUBU_STORAGE_TIMEOUT"); v != "" { + if d, err := time.ParseDuration(v); err == nil && d > 0 { + return d + } + } + return DefaultStorageTimeout +} + +// StoredObject is a wrapper for data offloaded to storage, providing metadata +// about the content type to ensure correct hydration. +type StoredObject struct { + // ContentType indicates whether the stored data is "json" or "raw" text. + ContentType string `json:"contentType"` + // Data holds the actual content. For JSON, it's a RawMessage; for raw, it's a string. + Data json.RawMessage `json:"data"` +} + +// StorageManager handles the transparent offloading of large inputs and outputs +// to a configured storage backend. +type StorageManager struct { + store Store + maxInlineSize int + outputPrefix string + inputPrefix string +} + +// Context helpers for metrics attribution +type stepRunIDKey struct{} + +// WithStepRunID attaches a StepRunID to the context for hydration metrics attribution. +func WithStepRunID(ctx context.Context, stepRunID string) context.Context { + if stepRunID == "" { + return ctx + } + return context.WithValue(ctx, stepRunIDKey{}, stepRunID) +} + +func stepRunIDFromContext(ctx context.Context) string { + if ctx == nil { + return "" + } + if v, ok := ctx.Value(stepRunIDKey{}).(string); ok { + return v + } + return "" +} + +// GetStore returns the underlying Store implementation used by this manager. +// This is primarily useful for testing and debugging. +func (sm *StorageManager) GetStore() Store { + return sm.store +} + +// NewManager creates a new StorageManager, automatically configuring the +// storage backend based on environment variables. +func NewManager(ctx context.Context) (*StorageManager, error) { + maxSize, err := getMaxInlineSize() + if err != nil { + return nil, err + } + + outputPrefix := "outputs" + if prefix := os.Getenv("BUBU_STORAGE_OUTPUT_PREFIX"); prefix != "" { + outputPrefix = prefix + } + inputPrefix := "inputs" + if prefix := os.Getenv("BUBU_STORAGE_INPUT_PREFIX"); prefix != "" { + inputPrefix = prefix + } + + var store Store + provider := os.Getenv("BUBU_STORAGE_PROVIDER") + + switch provider { + case "s3": + s3Store, err := NewS3Store(ctx) + if err != nil { + return nil, fmt.Errorf("failed to initialize S3 storage: %w", err) + } + store = s3Store + case "file": + basePath := os.Getenv("BUBU_STORAGE_PATH") + if basePath == "" { + return nil, fmt.Errorf("BUBU_STORAGE_PROVIDER is set to 'file' but BUBU_STORAGE_PATH is not set") + } + fileStore, err := NewFileStore(basePath) + if err != nil { + return nil, fmt.Errorf("failed to initialize file storage: %w", err) + } + store = fileStore + case "", "none": + // Disabled or not set + store = nil + default: + return nil, fmt.Errorf("unknown BUBU_STORAGE_PROVIDER '%s' (expected 's3', 'file', or 'none')", provider) + } + + return &StorageManager{ + store: store, + maxInlineSize: maxSize, + outputPrefix: outputPrefix, + inputPrefix: inputPrefix, + }, nil +} + +// Hydrate recursively scans a data structure for storage references and replaces +// them with the actual content from the storage backend. +func (sm *StorageManager) Hydrate(ctx context.Context, data any) (any, error) { + if sm.store == nil { + // When storage is disabled, pass through inputs unchanged unless they contain + // a storage reference which we cannot hydrate without a backend. + if containsStorageRef(data, 0) { + return nil, fmt.Errorf( + "found storage reference in inputs but storage is disabled " + + "(set BUBU_STORAGE_PROVIDER=s3 or file)", + ) + } + // This maintains backward compatibility with existing tests and behavior. + return data, nil + } + + // Enforce timeout on storage operations to prevent indefinite hangs + timeout := getStorageTimeout() + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + result, err := sm.hydrateValue(ctx, data, 0) + if err == nil && result != nil { + // Record hydration size for observability + size := estimateSize(result) + metrics.RecordHydrationSize(ctx, int64(size), stepRunIDFromContext(ctx)) + } + return result, err +} + +// DehydrateInputs dehydrates StoryRun inputs. +func (sm *StorageManager) DehydrateInputs(ctx context.Context, data any, storyRunID string) (any, error) { + if sm.store == nil { + if data != nil { + size := estimateSize(data) + if size > sm.maxInlineSize { + return nil, fmt.Errorf( + "output size %d bytes exceeds inline limit %d and storage is disabled; "+ + "either enable storage (BUBU_STORAGE_PROVIDER=s3|file), "+ + "increase limit (BUBU_MAX_INLINE_SIZE=%d or higher), "+ + "or reduce output size", + size, sm.maxInlineSize, size, + ) + } + } + return data, nil + } + + timeout := getStorageTimeout() + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + result, err := sm.dehydrateValue(ctx, data, storyRunID, "", 0, sm.inputPrefix) + if err == nil && data != nil { + size := estimateSize(data) + metrics.RecordDehydrationSize(ctx, int64(size), storyRunID) + } + return result, err +} + +// Dehydrate recursively checks the size of a data structure. If it exceeds the inline +// size limit, it saves the data to the storage backend and replaces it with a +// storage reference. +func (sm *StorageManager) Dehydrate(ctx context.Context, data any, stepRunID string) (any, error) { + if sm.store == nil { + // When storage is disabled, prevent dangerously large payloads from being inlined + // into the StepRun status, which could exceed apiserver/etcd limits. + if data != nil { + size := estimateSize(data) + if size > sm.maxInlineSize { + return nil, fmt.Errorf("output size %d bytes exceeds inline limit %d and storage is disabled; "+ + "either enable storage (BUBU_STORAGE_PROVIDER=s3|file), "+ + "increase limit (BUBU_MAX_INLINE_SIZE=%d or higher), "+ + "or reduce output size", + size, sm.maxInlineSize, size, + ) + } + } + // Otherwise, pass through outputs unchanged. Responsibility for size management + // lies with cluster policy in this mode. + return data, nil + } + + // Enforce timeout on storage operations to prevent indefinite hangs + timeout := getStorageTimeout() + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + result, err := sm.dehydrateValue(ctx, data, stepRunID, "", 0, sm.outputPrefix) + if err == nil && data != nil { + // Record dehydration size for observability + size := estimateSize(data) + metrics.RecordDehydrationSize(ctx, int64(size), stepRunID) + } + return result, err +} + +// validateStorageRef checks if a storage reference path is safe to use. +// It rejects: +// - Absolute paths (e.g., /etc/passwd, C:\secrets\) +// - Path traversal attempts (e.g., ../, ..\, or embedded .. components) +// This provides defense-in-depth against path traversal attacks from malicious +// or compromised upstream engrams, even if storage backend should enforce isolation. +func validateStorageRef(refPath string) error { + if refPath == "" { + return fmt.Errorf("storage reference path is empty") + } + + // Reject absolute paths + if filepath.IsAbs(refPath) { + return fmt.Errorf("invalid storage reference: absolute paths not allowed (got '%s')", refPath) + } + + // Check for path traversal using platform-independent approach + // filepath.Clean normalizes the path, then we verify it's still relative and doesn't escape + cleanPath := filepath.Clean(refPath) + + // After cleaning, check if path tries to escape (starts with ..) + if len(cleanPath) >= 2 && cleanPath[0] == '.' && cleanPath[1] == '.' { + return fmt.Errorf( + "invalid storage reference: path traversal detected (got '%s', normalized to '%s')", + refPath, cleanPath, + ) + } + + // Also check for ../ or ..\ anywhere in the original path to catch obfuscation attempts + // (e.g., "foo/../bar" would normalize to "bar" but indicates traversal intent) + if strings.Contains(refPath, ".."+string(filepath.Separator)) || + strings.Contains(refPath, ".."+string('/')) || // Unix separator + (runtime.GOOS == "windows" && strings.Contains(refPath, ".."+string('\\'))) { // Windows separator + return fmt.Errorf("invalid storage reference: path traversal detected (got '%s')", refPath) + } + + return nil +} + +func (sm *StorageManager) hydrateValue(ctx context.Context, value any, depth int) (any, error) { + maxDepth := getMaxRecursionDepth() + if depth > maxDepth { + return nil, fmt.Errorf("maximum recursion depth (%d) exceeded during hydration", maxDepth) + } + switch v := value.(type) { + case map[string]any: + return sm.hydrateMap(ctx, v, depth) + case []any: + return sm.hydrateSlice(ctx, v, depth) + default: + return value, nil + } +} + +func (sm *StorageManager) hydrateMap(ctx context.Context, m map[string]any, depth int) (any, error) { + if refPath, ok := singleStorageRef(m); ok { + return sm.hydrateFromStorageRef(ctx, refPath) + } + hydratedMap := make(map[string]any) + for k, val := range m { + hv, err := sm.hydrateValue(ctx, val, depth+1) + if err != nil { + return nil, err + } + hydratedMap[k] = hv + } + return hydratedMap, nil +} + +func (sm *StorageManager) hydrateSlice(ctx context.Context, s []any, depth int) (any, error) { + hydrated := make([]any, len(s)) + for i, item := range s { + hv, err := sm.hydrateValue(ctx, item, depth+1) + if err != nil { + return nil, err + } + hydrated[i] = hv + } + return hydrated, nil +} + +func singleStorageRef(m map[string]any) (string, bool) { + if ref, exists := m[storageRefKey]; exists && len(m) == 1 { + if refPath, ok := ref.(string); ok { + return refPath, true + } + } + return "", false +} + +func (sm *StorageManager) hydrateFromStorageRef(ctx context.Context, refPath string) (any, error) { + if err := validateStorageRef(refPath); err != nil { + return nil, fmt.Errorf("failed to read offloaded data from '%s': %w", refPath, err) + } + var buf bytes.Buffer + if err := sm.store.Read(ctx, refPath, &buf); err != nil { + return nil, fmt.Errorf("failed to read offloaded data from '%s': %w", refPath, err) + } + var storedObj StoredObject + if err := json.Unmarshal(buf.Bytes(), &storedObj); err != nil { + return nil, fmt.Errorf( + "failed to unmarshal stored object from '%s', content may be malformed: %w", + refPath, err, + ) + } + return hydrateStoredObject(storedObj, refPath) +} + +func hydrateStoredObject(obj StoredObject, refPath string) (any, error) { + switch obj.ContentType { + case "json": + var loaded any + if err := json.Unmarshal(obj.Data, &loaded); err != nil { + return nil, fmt.Errorf("failed to unmarshal hydrated JSON data from '%s': %w", refPath, err) + } + return loaded, nil + case "raw": + var raw string + if err := json.Unmarshal(obj.Data, &raw); err != nil { + return nil, fmt.Errorf("failed to unmarshal hydrated raw content from '%s': %w", refPath, err) + } + return raw, nil + default: + return nil, fmt.Errorf("unknown content type '%s' in stored object '%s'", obj.ContentType, refPath) + } +} + +func (sm *StorageManager) dehydrateValue( + ctx context.Context, + value any, + id, keyPrefix string, + depth int, + pathPrefix string, +) (any, error) { + maxDepth := getMaxRecursionDepth() + if depth > maxDepth { + return nil, fmt.Errorf("maximum recursion depth (%d) exceeded during dehydration", maxDepth) + } + switch v := value.(type) { + case map[string]any: + return sm.dehydrateMap(ctx, v, id, keyPrefix, depth, pathPrefix) + case []any: + return sm.dehydrateSlice(ctx, v, id, keyPrefix, depth, pathPrefix) + case string: + return sm.dehydrateString(ctx, v, id, keyPrefix, pathPrefix) + default: + return sm.dehydrateOther(ctx, v, id, keyPrefix, pathPrefix) + } +} + +func (sm *StorageManager) dehydrateMap( + ctx context.Context, + m map[string]any, + id, keyPrefix string, + depth int, + pathPrefix string, +) (any, error) { + dehydratedMap := make(map[string]any) + for k, val := range m { + newPrefix := k + if keyPrefix != "" { + newPrefix = fmt.Sprintf("%s.%s", keyPrefix, k) + } + dv, err := sm.dehydrateValue(ctx, val, id, newPrefix, depth+1, pathPrefix) + if err != nil { + return nil, err + } + dehydratedMap[k] = dv + } + return dehydratedMap, nil +} + +func (sm *StorageManager) dehydrateSlice( + ctx context.Context, + s []any, + id, keyPrefix string, + depth int, + pathPrefix string, +) (any, error) { + dehydrated := make([]any, len(s)) + hadOffload := false + for i, item := range s { + idx := strconv.Itoa(i) + if keyPrefix != "" { + idx = fmt.Sprintf("%s.%s", keyPrefix, idx) + } + dv, err := sm.dehydrateValue(ctx, item, id, idx, depth+1, pathPrefix) + if err != nil { + return nil, err + } + if m, ok := dv.(map[string]any); ok && len(m) == 1 { + if _, ok := m[storageRefKey]; ok { + hadOffload = true + } + } + dehydrated[i] = dv + } + if !hadOffload { + encodedSlice, err := json.Marshal(s) + if err != nil { + return nil, fmt.Errorf("failed to marshal slice for size check: %w", err) + } + if len(encodedSlice) > sm.maxInlineSize { + return sm.offloadValue(ctx, encodedSlice, "json", id, keyPrefix, pathPrefix) + } + } + return dehydrated, nil +} + +func (sm *StorageManager) dehydrateString( + ctx context.Context, + v string, + id, keyPrefix, pathPrefix string, +) (any, error) { + if len(v) <= sm.maxInlineSize { + return v, nil + } + marshaledString, err := json.Marshal(v) + if err != nil { + return nil, fmt.Errorf("failed to marshal raw string for storage: %w", err) + } + return sm.offloadValue(ctx, marshaledString, "raw", id, keyPrefix, pathPrefix) +} + +func (sm *StorageManager) dehydrateOther( + ctx context.Context, + v any, + id, keyPrefix, pathPrefix string, +) (any, error) { + encoded, err := json.Marshal(v) + if err != nil { + return nil, fmt.Errorf("failed to marshal value for size check: %w", err) + } + if len(encoded) <= sm.maxInlineSize { + return v, nil + } + return sm.offloadValue(ctx, encoded, "json", id, keyPrefix, pathPrefix) +} + +// offloadValue handles the process of wrapping a value in a StoredObject, +// writing it to the storage backend, and returning a storage reference. +// Uses io.Pipe to stream JSON encoding directly to storage without intermediate buffering. +func (sm *StorageManager) offloadValue( + ctx context.Context, + rawData []byte, + contentType, + id, + keyPrefix, + pathPrefix string, +) (any, error) { + storedObj := StoredObject{ + ContentType: contentType, + Data: json.RawMessage(rawData), + } + + fileName := fmt.Sprintf("%s.json", keyPrefix) + if keyPrefix == "" { + fileName = "output.json" + } + filePath := filepath.Join(pathPrefix, id, fileName) + + // Validate constructed path before writing (defense-in-depth; matches hydration path) + if err := validateStorageRef(filePath); err != nil { + return nil, fmt.Errorf("invalid storage path for offload: %w", err) + } + + // Stream JSON encoding to storage via pipe to avoid buffering in memory + pr, pw := io.Pipe() + encErrCh := make(chan error, 1) + wrErrCh := make(chan error, 1) + + // Encoder goroutine + go func() { + defer func() { _ = pw.Close() }() + encoder := json.NewEncoder(pw) + encErrCh <- encoder.Encode(storedObj) + }() + + // Writer goroutine + go func() { + wrErrCh <- sm.store.Write(ctx, filePath, pr) + }() + + // Wait for both to complete; ensure the pipe writer is closed on error to avoid deadlocks + var encErr, wrErr error + for i := 0; i < 2; i++ { + select { + case encErr = <-encErrCh: + if encErr != nil { + _ = pw.CloseWithError(encErr) + } + case wrErr = <-wrErrCh: + if wrErr != nil { + _ = pw.CloseWithError(wrErr) + } + } + } + + if encErr != nil { + return nil, fmt.Errorf("failed to encode stored object: %w", encErr) + } + if wrErr != nil { + return nil, fmt.Errorf("failed to write offloaded value for key '%s': %w", keyPrefix, wrErr) + } + + // Return the storage reference in place of the large value. + return map[string]any{storageRefKey: filePath}, nil +} + +// estimateSize estimates the JSON size of a value for metrics. +// This is a rough estimate and may not be exact. +func estimateSize(v any) int { + b, err := json.Marshal(v) + if err != nil { + return 0 + } + return len(b) +} + +// containsStorageRef walks the provided value looking for a map that is exactly a +// single key equal to the storageRefKey. It protects against deep recursion. +func containsStorageRef(value any, depth int) bool { + maxDepth := getMaxRecursionDepth() + if depth > maxDepth { + return false + } + switch t := value.(type) { + case map[string]any: + if ref, ok := t[storageRefKey]; ok && len(t) == 1 { + _, isString := ref.(string) + return isString + } + for _, v := range t { + if containsStorageRef(v, depth+1) { + return true + } + } + return false + case []any: + for _, item := range t { + if containsStorageRef(item, depth+1) { + return true + } + } + return false + default: + return false + } +} diff --git a/storage/manager_fuzz_test.go b/storage/manager_fuzz_test.go new file mode 100644 index 0000000..0a378ae --- /dev/null +++ b/storage/manager_fuzz_test.go @@ -0,0 +1,78 @@ +//go:build go1.18 +// +build go1.18 + +package storage + +import ( + "context" + "encoding/json" + "strings" + "testing" +) + +func FuzzHydrateValue(f *testing.F) { + // Seed corpus with various structures + f.Add(`{"key":"value"}`) + f.Add(`{"$bubuStorageRef":"test/path.json"}`) + f.Add(`{"nested":{"deep":{"very":{"much":"data"}}}}`) + f.Add(`[1,2,3,4,5]`) + f.Add(`{"array":[{"nested":"data"},{"more":"stuff"}]}`) + f.Add(`{"a":{"b":{"c":{"d":{"e":{"f":{"g":{"h":"deep"}}}}}}}}`) + + f.Fuzz(func(t *testing.T, jsonStr string) { + var data interface{} + if err := json.Unmarshal([]byte(jsonStr), &data); err != nil { + t.Skip("invalid JSON") + } + + // Create manager with mock store + mock := newMockStore() + // Add some test data to the mock + testData := StoredObject{ + ContentType: "json", + Data: json.RawMessage(`{"test":"data"}`), + } + dataBytes, _ := json.Marshal(testData) + mock.data["test/path.json"] = dataBytes + + sm := &StorageManager{ + store: mock, + maxInlineSize: 100, + } + + ctx := context.Background() + + // Should not panic, even with deeply nested or malformed data + _, err := sm.hydrateValue(ctx, data, 0) + // Error is acceptable (e.g., max depth exceeded), but no panic + _ = err + }) +} + +func FuzzDehydrateValue(f *testing.F) { + // Seed corpus + f.Add(`{"key":"value"}`) + f.Add(`{"large":"` + strings.Repeat("x", 200) + `"}`) + f.Add(`{"nested":{"a":{"b":{"c":"deep"}}}}`) + f.Add(`{"array":["item1","item2","item3"]}`) + f.Add(`[1,2,3,4,5,6,7,8,9,10]`) + + f.Fuzz(func(t *testing.T, jsonStr string) { + var data interface{} + if err := json.Unmarshal([]byte(jsonStr), &data); err != nil { + t.Skip("invalid JSON") + } + + mock := newMockStore() + sm := &StorageManager{ + store: mock, + maxInlineSize: 50, // Small to trigger offloading + } + + ctx := context.Background() + + // Should not panic + _, err := sm.dehydrateValue(ctx, data, "test-run", "", 0, "") + _ = err + }) +} diff --git a/storage/manager_test.go b/storage/manager_test.go new file mode 100644 index 0000000..12edc3f --- /dev/null +++ b/storage/manager_test.go @@ -0,0 +1,595 @@ +package storage + +import ( + "bytes" + "context" + "encoding/json" + "os" + "reflect" + "strings" + "testing" +) + +func TestStorageManager_Hydrate(t *testing.T) { + ctx := context.Background() + mock := newMockStore() + sm := &StorageManager{ + store: mock, + maxInlineSize: 100, + } + + // Setup: Store some data in the mock + storedData := StoredObject{ + ContentType: "json", + Data: json.RawMessage(`{"nested":"value"}`), + } + dataBytes, _ := json.Marshal(storedData) + mock.data["test/path.json"] = dataBytes + + tests := []struct { + name string + input any + want any + wantErr bool + }{ + { + name: "hydrate storage reference", + input: map[string]any{ + "$bubuStorageRef": "test/path.json", + }, + want: map[string]any{ + "nested": "value", + }, + wantErr: false, + }, + { + name: "pass through regular map", + input: map[string]any{ + "key": "value", + }, + want: map[string]any{ + "key": "value", + }, + wantErr: false, + }, + { + name: "hydrate nested structure", + input: map[string]any{ + "outer": map[string]any{ + "$bubuStorageRef": "test/path.json", + }, + }, + want: map[string]any{ + "outer": map[string]any{ + "nested": "value", + }, + }, + wantErr: false, + }, + { + name: "simple string", + input: "just a string", + want: "just a string", + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := sm.Hydrate(ctx, tt.input) + if (err != nil) != tt.wantErr { + t.Errorf("Hydrate() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("Hydrate() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestStorageManager_Hydrate_RawContent(t *testing.T) { + ctx := context.Background() + mock := newMockStore() + sm := &StorageManager{ + store: mock, + maxInlineSize: 100, + } + + // Store raw string content + rawStored := StoredObject{ + ContentType: "raw", + Data: json.RawMessage(`"This is raw text content"`), + } + dataBytes, _ := json.Marshal(rawStored) + mock.data["raw/content.json"] = dataBytes + + input := map[string]any{ + "$bubuStorageRef": "raw/content.json", + } + + got, err := sm.Hydrate(ctx, input) + if err != nil { + t.Fatalf("Hydrate() error = %v", err) + } + + expected := "This is raw text content" + if got != expected { + t.Errorf("Hydrate() = %v, want %v", got, expected) + } +} + +func TestStorageManager_Hydrate_NoStore(t *testing.T) { + ctx := context.Background() + sm := &StorageManager{ + store: nil, + maxInlineSize: 100, + } + + t.Run("error on storage reference", func(t *testing.T) { + input := map[string]any{ + "$bubuStorageRef": "test/path.json", + } + + // Should error when encountering storage ref with no backend + _, err := sm.Hydrate(ctx, input) + if err == nil { + t.Fatal("Hydrate() should error when storage reference found but storage is disabled") + } + if !strings.Contains(err.Error(), "storage reference") { + t.Errorf("Expected error about storage reference, got: %v", err) + } + }) + + t.Run("pass through normal data", func(t *testing.T) { + input := map[string]any{ + "normalField": "value", + "nested": map[string]any{ + "data": 123, + }, + } + + // Should pass through unchanged when no storage refs present + got, err := sm.Hydrate(ctx, input) + if err != nil { + t.Fatalf("Hydrate() unexpected error = %v", err) + } + + if !reflect.DeepEqual(got, input) { + t.Errorf("Hydrate() should pass through unchanged when no storage refs") + } + }) +} + +func TestStorageManager_Dehydrate(t *testing.T) { + ctx := context.Background() + mock := newMockStore() + sm := &StorageManager{ + store: mock, + maxInlineSize: 50, // Small size to trigger offloading + } + + tests := []struct { + name string + input any + stepRunID string + wantOffload bool + wantWritten int + }{ + { + name: "small string stays inline", + input: "small", + stepRunID: "run-1", + wantOffload: false, + wantWritten: 0, + }, + { + name: "large string gets offloaded", + input: "This is a very long string that definitely exceeds the inline size limit and should be offloaded", + stepRunID: "run-2", + wantOffload: true, + wantWritten: 1, + }, + { + name: "small map stays inline", + input: map[string]any{ + "key": "value", + }, + stepRunID: "run-3", + wantOffload: false, + wantWritten: 0, + }, + { + name: "large array gets offloaded", + input: []any{ + "element1", "element2", "element3", "element4", "element5", + "element6", "element7", "element8", "element9", "element10", + }, + stepRunID: "run-4", + wantOffload: true, + wantWritten: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mock.writes = 0 // Reset counter + got, err := sm.Dehydrate(ctx, tt.input, tt.stepRunID) + if err != nil { + t.Errorf("Dehydrate() error = %v", err) + return + } + + if tt.wantOffload { + // Check if result is a storage reference + if refMap, ok := got.(map[string]any); ok { + if _, hasRef := refMap["$bubuStorageRef"]; !hasRef { + t.Error("Expected storage reference in result") + } + } else { + t.Error("Expected map with storage reference") + } + } + + if mock.writes != tt.wantWritten { + t.Errorf("Expected %d writes, got %d", tt.wantWritten, mock.writes) + } + }) + } +} + +func TestStorageManager_Dehydrate_NoStore(t *testing.T) { + ctx := context.Background() + + t.Run("error on size exceeding limit", func(t *testing.T) { + sm := &StorageManager{ + store: nil, + maxInlineSize: 10, + } + + input := "This is a very long string that exceeds the limit" + + // Should error when data exceeds limit and storage is disabled + _, err := sm.Dehydrate(ctx, input, "run-1") + if err == nil { + t.Fatal("Dehydrate() should error when size exceeds limit and storage is disabled") + } + if !strings.Contains(err.Error(), "storage is disabled") { + t.Errorf("Expected error about storage disabled, got: %v", err) + } + }) + + t.Run("pass through small data", func(t *testing.T) { + sm := &StorageManager{ + store: nil, + maxInlineSize: 100, + } + + input := "small string" + + // Should pass through when data is under the limit + got, err := sm.Dehydrate(ctx, input, "run-1") + if err != nil { + t.Fatalf("Dehydrate() unexpected error = %v", err) + } + + if !reflect.DeepEqual(got, input) { + t.Errorf("Dehydrate() should pass through small data unchanged") + } + }) +} + +func TestStorageManager_Dehydrate_NestedMap(t *testing.T) { + ctx := context.Background() + mock := newMockStore() + sm := &StorageManager{ + store: mock, + maxInlineSize: 20, + } + + input := map[string]any{ + "small": "ok", + "large": "This is a very long string that will be offloaded to storage", + } + + got, err := sm.Dehydrate(ctx, input, "run-nested") + if err != nil { + t.Fatalf("Dehydrate() error = %v", err) + } + + resultMap, ok := got.(map[string]any) + if !ok { + t.Fatal("Expected result to be a map") + } + + // Small value should stay inline + if resultMap["small"] != "ok" { + t.Error("Small value should stay inline") + } + + // Large value should be a reference + if largeRef, ok := resultMap["large"].(map[string]any); ok { + if _, hasRef := largeRef["$bubuStorageRef"]; !hasRef { + t.Error("Large value should be offloaded") + } + } else { + t.Error("Large value should be a storage reference") + } +} + +func TestStorageManager_RoundTrip(t *testing.T) { + ctx := context.Background() + mock := newMockStore() + sm := &StorageManager{ + store: mock, + maxInlineSize: 30, + } + + original := map[string]any{ + "data": "This is a string that is definitely longer than the inline size limit", + } + + // Dehydrate + dehydrated, err := sm.Dehydrate(ctx, original, "roundtrip-test") + if err != nil { + t.Fatalf("Dehydrate() error = %v", err) + } + + // Hydrate + hydrated, err := sm.Hydrate(ctx, dehydrated) + if err != nil { + t.Fatalf("Hydrate() error = %v", err) + } + + // Should match original + if !reflect.DeepEqual(hydrated, original) { + t.Errorf("Round trip failed: got %v, want %v", hydrated, original) + } +} + +func TestStorageManager_GetStore(t *testing.T) { + mock := newMockStore() + sm := &StorageManager{ + store: mock, + } + + if sm.GetStore() != mock { + t.Error("GetStore() should return the underlying store") + } +} + +func TestStoredObject(t *testing.T) { + // Test JSON marshaling/unmarshaling of StoredObject + obj := StoredObject{ + ContentType: "json", + Data: json.RawMessage(`{"key":"value"}`), + } + + marshaled, err := json.Marshal(obj) + if err != nil { + t.Fatalf("Marshal error = %v", err) + } + + var unmarshaled StoredObject + if err := json.Unmarshal(marshaled, &unmarshaled); err != nil { + t.Fatalf("Unmarshal error = %v", err) + } + + if unmarshaled.ContentType != obj.ContentType { + t.Errorf("ContentType = %v, want %v", unmarshaled.ContentType, obj.ContentType) + } + + if !bytes.Equal(unmarshaled.Data, obj.Data) { + t.Errorf("Data = %v, want %v", unmarshaled.Data, obj.Data) + } +} + +// Tests for NewManager +func TestNewManager_FileStore(t *testing.T) { + tmpDir := t.TempDir() + + err := os.Setenv("BUBU_STORAGE_PROVIDER", "file") + if err != nil { + t.Fatalf("Setenv() error = %v", err) + } + + err = os.Setenv("BUBU_STORAGE_PATH", tmpDir) + if err != nil { + t.Fatalf("Setenv() error = %v", err) + } + defer func() { + err = os.Unsetenv("BUBU_STORAGE_PROVIDER") + if err != nil { + t.Fatalf("Unsetenv() error = %v", err) + } + err = os.Unsetenv("BUBU_STORAGE_PATH") + if err != nil { + t.Fatalf("Unsetenv() error = %v", err) + } + }() + defer func() { + err = os.Unsetenv("BUBU_STORAGE_PATH") + if err != nil { + t.Fatalf("Unsetenv() error = %v", err) + } + }() + + ctx := context.Background() + manager, err := NewManager(ctx) + if err != nil { + t.Fatalf("NewManager() error = %v", err) + } + + if manager == nil { + t.Fatal("NewManager() returned nil manager") + } + + if manager.store == nil { + t.Error("NewManager() did not initialize file store") + } + + if manager.maxInlineSize != DefaultMaxInlineSize { + t.Errorf("NewManager() maxInlineSize = %d, want %d", manager.maxInlineSize, DefaultMaxInlineSize) + } +} + +func TestNewManager_NoProvider(t *testing.T) { + _ = os.Unsetenv("BUBU_STORAGE_PROVIDER") + _ = os.Unsetenv("BUBU_STORAGE_PATH") + + ctx := context.Background() + manager, err := NewManager(ctx) + if err != nil { + t.Fatalf("NewManager() error = %v", err) + } + + if manager.store != nil { + t.Error("NewManager() should have nil store when no provider is set") + } +} + +func TestNewManager_FileProviderMissingPath(t *testing.T) { + ctx := context.Background() + + // Set file provider but don't set path + _ = os.Setenv("BUBU_STORAGE_PROVIDER", "file") + _ = os.Unsetenv("BUBU_STORAGE_PATH") + defer func() { _ = os.Unsetenv("BUBU_STORAGE_PROVIDER") }() + + _, err := NewManager(ctx) + if err == nil { + t.Fatal("NewManager() should error when BUBU_STORAGE_PROVIDER=file but BUBU_STORAGE_PATH is not set") + } + + expectedMsg := "BUBU_STORAGE_PROVIDER is set to 'file' but BUBU_STORAGE_PATH is not set" + if err.Error() != expectedMsg { + t.Errorf("Expected error message %q, got: %v", expectedMsg, err) + } +} + +func TestNewManager_CustomMaxInlineSize(t *testing.T) { + _ = os.Setenv("BUBU_MAX_INLINE_SIZE", "1024") + defer func() { _ = os.Unsetenv("BUBU_MAX_INLINE_SIZE") }() + + ctx := context.Background() + manager, err := NewManager(ctx) + if err != nil { + t.Fatalf("NewManager() error = %v", err) + } + + if manager.maxInlineSize != 1024 { + t.Errorf("NewManager() maxInlineSize = %d, want 1024", manager.maxInlineSize) + } +} + +func TestNewManager_InvalidMaxInlineSize(t *testing.T) { + _ = os.Setenv("BUBU_MAX_INLINE_SIZE", "not-a-number") + defer func() { _ = os.Unsetenv("BUBU_MAX_INLINE_SIZE") }() + + ctx := context.Background() + manager, err := NewManager(ctx) + if err == nil { + t.Fatalf("NewManager() expected error for invalid BUBU_MAX_INLINE_SIZE, got nil") + } + if manager != nil { + t.Errorf("NewManager() expected nil manager on error, got %v", manager) + } + expectedMsg := "invalid BUBU_MAX_INLINE_SIZE 'not-a-number': must be a positive integer" + if err.Error() != expectedMsg { + t.Errorf("NewManager() error = %v, want %v", err.Error(), expectedMsg) + } +} + +func TestNewManager_FileStore_InvalidPath(t *testing.T) { + _ = os.Setenv("BUBU_STORAGE_PROVIDER", "file") + _ = os.Setenv("BUBU_STORAGE_PATH", "/this/path/does/not/exist") + defer func() { _ = os.Unsetenv("BUBU_STORAGE_PROVIDER") }() + defer func() { _ = os.Unsetenv("BUBU_STORAGE_PATH") }() + + ctx := context.Background() + _, err := NewManager(ctx) + if err == nil { + t.Error("NewManager() should return error for invalid path") + } +} + +func TestNewManager_S3Store_NoCredentials(t *testing.T) { + // Unset any AWS variables to ensure a clean slate + _ = os.Unsetenv("AWS_ACCESS_KEY_ID") + _ = os.Unsetenv("AWS_SECRET_ACCESS_KEY") + _ = os.Unsetenv("AWS_SESSION_TOKEN") + _ = os.Unsetenv("AWS_REGION") + + _ = os.Setenv("BUBU_STORAGE_PROVIDER", "s3") + defer func() { _ = os.Unsetenv("BUBU_STORAGE_PROVIDER") }() + + ctx := context.Background() + _, err := NewManager(ctx) + if err == nil { + t.Error("NewManager() should return an error when S3 provider is selected" + + "but no credentials or region are configured") + } +} + +func TestStorageManager_Dehydrate_SliceElements(t *testing.T) { + ctx := context.Background() + mock := newMockStore() + sm := &StorageManager{ + store: mock, + maxInlineSize: 20, + outputPrefix: "outputs", + } + + longString := "this string is definitely longer than twenty characters" + input := []any{ + "short", + longString, + map[string]any{"key": "value"}, // small map + map[string]any{"key": longString}, + } + + dehydrated, err := sm.Dehydrate(ctx, input, "slice-run-id") + if err != nil { + t.Fatalf("Dehydrate() error = %v", err) + } + + dehydratedSlice, ok := dehydrated.([]any) + if !ok { + t.Fatalf("Expected result to be a slice, got %T", dehydrated) + } + + if len(dehydratedSlice) != 4 { + t.Fatalf("Expected slice of length 4, got %d", len(dehydratedSlice)) + } + + // 1. Check the small string + if dehydratedSlice[0] != "short" { + t.Errorf("Expected first element to be 'short', got %v", dehydratedSlice[0]) + } + + // 2. Check the long string + ref1, ok := dehydratedSlice[1].(map[string]any) + if !ok || ref1["$bubuStorageRef"] == nil { + t.Errorf("Expected second element to be a storage ref, got %v", dehydratedSlice[1]) + } + + // 3. Check the small map + if !reflect.DeepEqual(dehydratedSlice[2], map[string]any{"key": "value"}) { + t.Errorf("Expected third element to be the original small map, got %v", dehydratedSlice[2]) + } + + // 4. Check the map containing a large string + mapWithRef, ok := dehydratedSlice[3].(map[string]any) + if !ok { + t.Fatalf("Expected fourth element to be a map, got %T", dehydratedSlice[3]) + } + nestedRef, ok := mapWithRef["key"].(map[string]any) + if !ok || nestedRef["$bubuStorageRef"] == nil { + t.Errorf("Expected nested value in map to be a storage ref, got %v", mapWithRef["key"]) + } + + // We expect two writes: one for the standalone long string and one for the long string inside the map + if mock.writes != 2 { + t.Errorf("Expected 2 writes to the store, got %d", mock.writes) + } +} diff --git a/storage/s3_integration_test.go b/storage/s3_integration_test.go new file mode 100644 index 0000000..93d9976 --- /dev/null +++ b/storage/s3_integration_test.go @@ -0,0 +1,89 @@ +package storage + +import ( + "context" + "os" + "testing" + "time" +) + +// Integration test for S3 multipart uploads using manager.Uploader. +// This test is gated by environment variables and will be skipped by default. +// Required envs to run: +// - BUBU_S3_INTEGRATION=1 +// - BUBU_STORAGE_S3_BUCKET +// - AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY (and optionally AWS_SESSION_TOKEN) +// +// Optional envs: +// - BUBU_STORAGE_S3_REGION (default us-east-1) +// - BUBU_STORAGE_S3_ENDPOINT (for MinIO or S3-compatible endpoints) +func TestS3Integration_UploaderRoundTrip(t *testing.T) { + if os.Getenv("BUBU_S3_INTEGRATION") != "1" { + t.Skip("skipping S3 integration test; set BUBU_S3_INTEGRATION=1 to enable") + } + + bucket := os.Getenv("BUBU_STORAGE_S3_BUCKET") + if bucket == "" { + t.Skip("skipping: BUBU_STORAGE_S3_BUCKET not set") + } + if os.Getenv("AWS_ACCESS_KEY_ID") == "" || os.Getenv("AWS_SECRET_ACCESS_KEY") == "" { + t.Skip("skipping: AWS credentials not provided") + } + + // Force S3 provider for the manager + _ = os.Setenv("BUBU_STORAGE_PROVIDER", "s3") + defer func() { _ = os.Unsetenv("BUBU_STORAGE_PROVIDER") }() + + // Make inline threshold small to guarantee offload + _ = os.Setenv("BUBU_MAX_INLINE_SIZE", "16") + defer func() { _ = os.Unsetenv("BUBU_MAX_INLINE_SIZE") }() + + // Use a unique stepRunID for namespacing + stepRunID := "it-" + time.Now().UTC().Format("20060102T150405Z") + + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + sm, err := NewManager(ctx) + if err != nil { + t.Skipf("manager init failed (likely missing/invalid S3 config): %v", err) + } + if sm == nil || sm.GetStore() == nil { + t.Skip("store not initialized; skipping integration test") + } + + // Large payload to trigger multipart path in uploader (size > 5MB preferred) + // Keep it modest to avoid long CI times; uploader will still choose multipart if configured. + // We rely on manager.Dehydrate to offload any value exceeding inline size. + big := make([]byte, 256*1024) // 256 KiB + for i := range big { + big[i] = byte('a' + (i % 26)) + } + + original := map[string]any{"data": string(big)} + + // Dehydrate writes to S3 via manager.Uploader + dehydrated, err := sm.Dehydrate(ctx, original, stepRunID) + if err != nil { + t.Fatalf("Dehydrate error: %v", err) + } + + // Hydrate reads back from S3 and reconstructs the original value + hydrated, err := sm.Hydrate(ctx, dehydrated) + if err != nil { + t.Fatalf("Hydrate error: %v", err) + } + + // Spot-check the presence and length of the large field + hydratedMap, ok := hydrated.(map[string]any) + if !ok { + t.Fatalf("expected map[string]any after hydrate, got %T", hydrated) + } + val, ok := hydratedMap["data"].(string) + if !ok { + t.Fatalf("expected string in hydrated 'data' field, got %T", hydratedMap["data"]) + } + if len(val) != len(big) { + t.Fatalf("hydrated data length mismatch: got %d want %d", len(val), len(big)) + } +} diff --git a/storage/s3_store.go b/storage/s3_store.go new file mode 100644 index 0000000..eb81a00 --- /dev/null +++ b/storage/s3_store.go @@ -0,0 +1,194 @@ +package storage + +import ( + "context" + "fmt" + "io" + "net/http" + "os" + "strconv" + "time" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/aws/retry" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/feature/s3/manager" + "github.com/aws/aws-sdk-go-v2/service/s3" + s3types "github.com/aws/aws-sdk-go-v2/service/s3/types" +) + +// S3Store implements the Store interface for an S3-compatible object store. +// It supports standard AWS S3 and S3-compatible services like MinIO. +// Uses manager.Uploader for automatic multipart uploads on large files. +type S3Store struct { + client *s3.Client + uploader *manager.Uploader + bucket string +} + +// getS3RetryConfig returns retry configuration from env or defaults +func getS3RetryConfig() (maxAttempts int, maxBackoff time.Duration) { + maxAttempts = 3 // Default + if v := os.Getenv("BUBU_STORAGE_S3_MAX_RETRIES"); v != "" { + if i, err := strconv.Atoi(v); err == nil && i > 0 { + maxAttempts = i + } + } + + maxBackoff = 10 * time.Second // Default + if v := os.Getenv("BUBU_STORAGE_S3_MAX_BACKOFF"); v != "" { + if d, err := time.ParseDuration(v); err == nil && d > 0 { + maxBackoff = d + } + } + + return maxAttempts, maxBackoff +} + +// getS3UsePathStyle determines whether to use path-style addressing for S3. +// - Path-style: https://s3.amazonaws.com/bucket/key (required for MinIO, Ceph) +// - Virtual-hosted-style: https://bucket.s3.amazonaws.com/key (AWS S3 standard, required for TLS) +// +// Logic: +// 1. If BUBU_STORAGE_S3_USE_PATH_STYLE is set, use that value +// 2. If BUBU_STORAGE_S3_ENDPOINT is set (custom endpoint → MinIO/Ceph), default to true +// 3. Otherwise (AWS S3), default to false (virtual-hosted) +func getS3UsePathStyle() bool { + if v := os.Getenv("BUBU_STORAGE_S3_USE_PATH_STYLE"); v != "" { + return v == "true" + } + + // Auto-detect: custom endpoints (MinIO, Ceph) typically require path-style + endpoint := os.Getenv("BUBU_STORAGE_S3_ENDPOINT") + if endpoint != "" { + return true + } + + // Default to virtual-hosted-style for AWS S3 (TLS compatibility) + return false +} + +// NewS3Store creates a new S3Store. +// It automatically configures the S3 client from environment variables. +func NewS3Store(ctx context.Context) (*S3Store, error) { + bucket := os.Getenv("BUBU_STORAGE_S3_BUCKET") + if bucket == "" { + return nil, fmt.Errorf("s3 storage is enabled but BUBU_STORAGE_S3_BUCKET is not set") + } + + cfg, err := config.LoadDefaultConfig(ctx) + if err != nil { + return nil, fmt.Errorf("failed to load AWS config: %w", err) + } + + // Configure retry strategy with exponential backoff + maxAttempts, maxBackoff := getS3RetryConfig() + cfg.Retryer = func() aws.Retryer { + return retry.NewStandard(func(o *retry.StandardOptions) { + o.MaxAttempts = maxAttempts + o.MaxBackoff = maxBackoff + }) + } + + // Create a custom HTTP client with configurable timeout + // Default 5min accommodates large file uploads (e.g., 100MB model weights) + timeout := 5 * time.Minute + if v := os.Getenv("BUBU_STORAGE_S3_HTTP_TIMEOUT"); v != "" { + if d, err := time.ParseDuration(v); err == nil && d > 0 { + timeout = d + } + } + httpClient := &http.Client{ + Timeout: timeout, + } + + // Prioritize our custom region env var, but default to a safe value for S3-compatibles. + if region := os.Getenv("BUBU_STORAGE_S3_REGION"); region != "" { + cfg.Region = region + } else if cfg.Region == "" { + cfg.Region = "us-east-1" + } + + endpoint := os.Getenv("BUBU_STORAGE_S3_ENDPOINT") + if endpoint != "" { + cfg.BaseEndpoint = aws.String(endpoint) + } + + client := s3.NewFromConfig(cfg, func(o *s3.Options) { + // Use path-style for MinIO/Ceph, virtual-hosted for AWS S3 (TLS) + o.UsePathStyle = getS3UsePathStyle() + o.HTTPClient = httpClient + }) + + // Create uploader with automatic multipart for large files + // Default part size: 5MB, concurrency: 5 + uploader := manager.NewUploader(client, func(u *manager.Uploader) { + // Part size for multipart uploads (default 5MB is good) + if partSize := os.Getenv("BUBU_STORAGE_S3_PART_SIZE"); partSize != "" { + if size, err := strconv.ParseInt(partSize, 10, 64); err == nil && size > 0 { + u.PartSize = size + } + } + // Concurrency for parallel uploads (default 5) + if concurrency := os.Getenv("BUBU_STORAGE_S3_CONCURRENCY"); concurrency != "" { + if n, err := strconv.Atoi(concurrency); err == nil && n > 0 { + u.Concurrency = n + } + } + }) + + return &S3Store{ + client: client, + uploader: uploader, + bucket: bucket, + }, nil +} + +// Write uploads data to S3 from a reader. +// Uses manager.Uploader which automatically handles multipart uploads for large files. +// For files >5MB, it will automatically split into parts and upload in parallel. +func (s *S3Store) Write(ctx context.Context, path string, reader io.Reader) error { + input := &s3.PutObjectInput{ + Bucket: &s.bucket, + Key: &path, + Body: reader, + } + + // Server-side encryption controls (default to AES-256 if unset) + if sse := os.Getenv("BUBU_STORAGE_S3_SSE"); sse == "kms" { + algo := s3types.ServerSideEncryptionAwsKms + input.ServerSideEncryption = algo + if kmsKey := os.Getenv("BUBU_STORAGE_S3_SSE_KMS_KEY_ID"); kmsKey != "" { + input.SSEKMSKeyId = &kmsKey + } + } else { + // Default and explicit "s3" + algo := s3types.ServerSideEncryptionAes256 + input.ServerSideEncryption = algo + } + + // Use uploader which automatically handles: + // - Small files: single PutObject + // - Large files: automatic multipart upload with parallel parts + // - No need to know file size upfront! + _, err := s.uploader.Upload(ctx, input) + if err != nil { + return fmt.Errorf("failed to upload object '%s' to s3 bucket '%s': %w", path, s.bucket, err) + } + return nil +} + +// Read downloads data from S3 and writes it to a writer. +func (s *S3Store) Read(ctx context.Context, path string, writer io.Writer) error { + out, err := s.client.GetObject(ctx, &s3.GetObjectInput{ + Bucket: &s.bucket, + Key: &path, + }) + if err != nil { + return fmt.Errorf("failed to get object '%s' from s3 bucket '%s': %w", path, s.bucket, err) + } + defer func() { _ = out.Body.Close() }() + + _, err = io.Copy(writer, out.Body) + return err +} diff --git a/storage/s3_store_test.go b/storage/s3_store_test.go new file mode 100644 index 0000000..c6b9d77 --- /dev/null +++ b/storage/s3_store_test.go @@ -0,0 +1,322 @@ +package storage + +import ( + "bytes" + "context" + "io" + "os" + "testing" + + "github.com/aws/aws-sdk-go-v2/service/s3" +) + +// s3API interface defines the methods we need from the S3 client +type s3API interface { + PutObject( + ctx context.Context, + params *s3.PutObjectInput, + optFns ...func(*s3.Options), + ) (*s3.PutObjectOutput, error) + GetObject( + ctx context.Context, + params *s3.GetObjectInput, + optFns ...func(*s3.Options), + ) (*s3.GetObjectOutput, error) +} + +// mockS3Client is a mock implementation of the S3 API for testing +type mockS3Client struct { + putObjectFunc func( + ctx context.Context, + params *s3.PutObjectInput, + optFns ...func(*s3.Options), + ) (*s3.PutObjectOutput, error) + getObjectFunc func( + ctx context.Context, + params *s3.GetObjectInput, + optFns ...func(*s3.Options), + ) (*s3.GetObjectOutput, error) +} + +func (m *mockS3Client) PutObject( + ctx context.Context, + params *s3.PutObjectInput, + optFns ...func(*s3.Options), +) (*s3.PutObjectOutput, error) { + return m.putObjectFunc(ctx, params, optFns...) +} + +func (m *mockS3Client) GetObject( + ctx context.Context, + params *s3.GetObjectInput, + optFns ...func(*s3.Options), +) (*s3.GetObjectOutput, error) { + return m.getObjectFunc(ctx, params, optFns...) +} + +// mockS3Store wraps the mock client for testing +type mockS3Store struct { + client s3API + bucket string +} + +func (s *mockS3Store) Write(ctx context.Context, path string, reader io.Reader) error { + _, err := s.client.PutObject(ctx, &s3.PutObjectInput{ + Bucket: &s.bucket, + Key: &path, + Body: reader, + }) + return err +} + +func (s *mockS3Store) Read(ctx context.Context, path string, writer io.Writer) error { + out, err := s.client.GetObject(ctx, &s3.GetObjectInput{ + Bucket: &s.bucket, + Key: &path, + }) + if err != nil { + return err + } + defer func() { _ = out.Body.Close() }() + _, err = io.Copy(writer, out.Body) + return err +} + +func TestNewS3Store_MissingBucket(t *testing.T) { + _ = os.Unsetenv("BUBU_STORAGE_S3_BUCKET") + + ctx := context.Background() + _, err := NewS3Store(ctx) + if err == nil { + t.Error("NewS3Store() should return error when bucket is not set") + } + if err != nil && err.Error() != "s3 storage is enabled but BUBU_STORAGE_S3_BUCKET is not set" { + t.Errorf("NewS3Store() unexpected error: %v", err) + } +} + +func TestS3Store_Write(t *testing.T) { + testData := []byte("test s3 data") + testBucket := "test-bucket" + testKey := "test/path.txt" + + mock := &mockS3Client{ + putObjectFunc: func( + ctx context.Context, + params *s3.PutObjectInput, + optFns ...func(*s3.Options), + ) (*s3.PutObjectOutput, error) { + // Verify parameters + if *params.Bucket != testBucket { + t.Errorf("PutObject bucket = %v, want %v", *params.Bucket, testBucket) + } + if *params.Key != testKey { + t.Errorf("PutObject key = %v, want %v", *params.Key, testKey) + } + + // Read and verify body + data, err := io.ReadAll(params.Body) + if err != nil { + t.Fatalf("Failed to read body: %v", err) + } + if !bytes.Equal(data, testData) { + t.Errorf("PutObject body = %v, want %v", data, testData) + } + + return &s3.PutObjectOutput{}, nil + }, + } + + store := &mockS3Store{ + client: mock, + bucket: testBucket, + } + + ctx := context.Background() + err := store.Write(ctx, testKey, bytes.NewReader(testData)) + if err != nil { + t.Errorf("Write() error = %v", err) + } +} + +func TestS3Store_Read(t *testing.T) { + testData := []byte("s3 read data") + testBucket := "test-bucket" + testKey := "read/path.txt" + + mock := &mockS3Client{ + getObjectFunc: func( + ctx context.Context, + params *s3.GetObjectInput, + optFns ...func(*s3.Options), + ) (*s3.GetObjectOutput, error) { + // Verify parameters + if *params.Bucket != testBucket { + t.Errorf("GetObject bucket = %v, want %v", *params.Bucket, testBucket) + } + if *params.Key != testKey { + t.Errorf("GetObject key = %v, want %v", *params.Key, testKey) + } + + return &s3.GetObjectOutput{ + Body: io.NopCloser(bytes.NewReader(testData)), + }, nil + }, + } + + store := &mockS3Store{ + client: mock, + bucket: testBucket, + } + + ctx := context.Background() + var buf bytes.Buffer + err := store.Read(ctx, testKey, &buf) + if err != nil { + t.Errorf("Read() error = %v", err) + } + + if !bytes.Equal(buf.Bytes(), testData) { + t.Errorf("Read() got %v, want %v", buf.Bytes(), testData) + } +} + +func TestS3Store_Write_Error(t *testing.T) { + expectedErr := io.EOF + + mock := &mockS3Client{ + putObjectFunc: func( + ctx context.Context, + params *s3.PutObjectInput, + optFns ...func(*s3.Options), + ) (*s3.PutObjectOutput, error) { + return nil, expectedErr + }, + } + + store := &mockS3Store{ + client: mock, + bucket: "test-bucket", + } + + ctx := context.Background() + err := store.Write(ctx, "error/path", bytes.NewReader([]byte("data"))) + if err == nil { + t.Error("Write() should return error when PutObject fails") + } +} + +func TestS3Store_Read_Error(t *testing.T) { + expectedErr := io.EOF + + mock := &mockS3Client{ + getObjectFunc: func( + ctx context.Context, + params *s3.GetObjectInput, + optFns ...func(*s3.Options), + ) (*s3.GetObjectOutput, error) { + return nil, expectedErr + }, + } + + store := &mockS3Store{ + client: mock, + bucket: "test-bucket", + } + + ctx := context.Background() + var buf bytes.Buffer + err := store.Read(ctx, "error/path", &buf) + if err == nil { + t.Error("Read() should return error when GetObject fails") + } +} + +func TestS3Store_RoundTrip(t *testing.T) { + testData := []byte("round trip data") + storage := make(map[string][]byte) + + mock := &mockS3Client{ + putObjectFunc: func( + ctx context.Context, + params *s3.PutObjectInput, + optFns ...func(*s3.Options), + ) (*s3.PutObjectOutput, error) { + data, _ := io.ReadAll(params.Body) + storage[*params.Key] = data + return &s3.PutObjectOutput{}, nil + }, + getObjectFunc: func( + ctx context.Context, + params *s3.GetObjectInput, + optFns ...func(*s3.Options), + ) (*s3.GetObjectOutput, error) { + data := storage[*params.Key] + return &s3.GetObjectOutput{ + Body: io.NopCloser(bytes.NewReader(data)), + }, nil + }, + } + + store := &mockS3Store{ + client: mock, + bucket: "test-bucket", + } + + ctx := context.Background() + path := "roundtrip/test.txt" + + // Write + if err := store.Write(ctx, path, bytes.NewReader(testData)); err != nil { + t.Fatalf("Write() error = %v", err) + } + + // Read + var buf bytes.Buffer + if err := store.Read(ctx, path, &buf); err != nil { + t.Fatalf("Read() error = %v", err) + } + + // Verify + if !bytes.Equal(buf.Bytes(), testData) { + t.Errorf("Round trip failed: got %v, want %v", buf.Bytes(), testData) + } +} + +func TestNewManager_S3Store(t *testing.T) { + // This test verifies the environment variable handling for S3 + // We can't actually create an S3 client without AWS credentials + + _ = os.Setenv("BUBU_STORAGE_PROVIDER", "s3") + _ = os.Setenv("BUBU_STORAGE_S3_BUCKET", "test-bucket") + _ = os.Setenv("AWS_ACCESS_KEY_ID", "test") + _ = os.Setenv("AWS_SECRET_ACCESS_KEY", "test") + _ = os.Setenv("AWS_REGION", "us-east-1") + + defer func() { + _ = os.Unsetenv("BUBU_STORAGE_PROVIDER") + _ = os.Unsetenv("BUBU_STORAGE_S3_BUCKET") + _ = os.Unsetenv("AWS_ACCESS_KEY_ID") + _ = os.Unsetenv("AWS_SECRET_ACCESS_KEY") + _ = os.Unsetenv("AWS_REGION") + }() + + ctx := context.Background() + + // This will attempt to create a real S3 client but won't connect to AWS + // The error (if any) should be about connectivity, not configuration + manager, err := NewManager(ctx) + + // We expect this to succeed in creating the manager + if err != nil { + t.Logf("NewManager() with S3 error (expected in test env): %v", err) + } + + if manager == nil { + t.Fatal("NewManager() returned nil manager") + } + + // In a real AWS environment, the store would be initialized + // In test environment, it might fail but that's okay + t.Logf("S3 store initialization completed (store nil: %v)", manager.store == nil) +} diff --git a/storage/store.go b/storage/store.go new file mode 100644 index 0000000..8b86c92 --- /dev/null +++ b/storage/store.go @@ -0,0 +1,16 @@ +package storage + +import ( + "context" + "io" +) + +// Store is the interface for a generic storage backend. +// It provides a streaming Read/Write interface. +type Store interface { + // Write saves the data from the reader to the storage backend at the specified path. + Write(ctx context.Context, path string, reader io.Reader) error + + // Read retrieves the data from the storage backend at the specified path and writes it to the writer. + Read(ctx context.Context, path string, writer io.Writer) error +} diff --git a/storage/store_mock.go b/storage/store_mock.go new file mode 100644 index 0000000..af00d51 --- /dev/null +++ b/storage/store_mock.go @@ -0,0 +1,61 @@ +package storage + +import ( + "context" + "fmt" + "io" + + "github.com/stretchr/testify/mock" +) + +// mockStore implements the Store interface for testing +type mockStore struct { + data map[string][]byte + writes int + reads int +} + +// MockManager is a mock implementation of the StorageManager interface. +type MockManager struct { + mock.Mock +} + +// Hydrate mocks the Hydrate method for testing. +func (m *MockManager) Hydrate(ctx context.Context, data any) (any, error) { + args := m.Called(ctx, data) + return args.Get(0), args.Error(1) +} + +// Dehydrate mocks the Dehydrate method for testing. +func (m *MockManager) Dehydrate(ctx context.Context, data any, stepRunID string) (any, error) { + args := m.Called(ctx, data, stepRunID) + return args.Get(0), args.Error(1) +} + +func newMockStore() *mockStore { + return &mockStore{ + data: make(map[string][]byte), + } +} + +// Write mocks the Write method for testing. +func (m *mockStore) Write(ctx context.Context, path string, reader io.Reader) error { + m.writes++ + data, err := io.ReadAll(reader) + if err != nil { + return err + } + m.data[path] = data + return nil +} + +// Read mocks the Read method for testing. +func (m *mockStore) Read(ctx context.Context, path string, writer io.Writer) error { + m.reads++ + data, exists := m.data[path] + if !exists { + return fmt.Errorf("path not found: %s", path) + } + _, err := writer.Write(data) + return err +} diff --git a/storage/store_test.go b/storage/store_test.go new file mode 100644 index 0000000..c2006a0 --- /dev/null +++ b/storage/store_test.go @@ -0,0 +1,168 @@ +package storage + +import ( + "bytes" + "context" + "testing" +) + +// TestStoreInterface verifies that our concrete implementations satisfy the Store interface +func TestStoreInterface(t *testing.T) { + t.Run("mockStore satisfies Store", func(t *testing.T) { + var _ Store = (*mockStore)(nil) + }) + + t.Run("FileStore satisfies Store", func(t *testing.T) { + var _ Store = (*FileStore)(nil) + }) + + t.Run("S3Store satisfies Store", func(t *testing.T) { + var _ Store = (*S3Store)(nil) + }) +} + +// TestStoreContract verifies the expected behavior of any Store implementation +func TestStoreContract(t *testing.T) { + // Test with FileStore (we can't easily test S3Store without AWS credentials) + tmpDir := t.TempDir() + store, err := NewFileStore(tmpDir) + if err != nil { + t.Fatalf("Failed to create FileStore: %v", err) + } + + ctx := context.Background() + path := "contract-test/file.txt" + data := []byte("contract test data") + + t.Run("Write then Read", func(t *testing.T) { + // Write + if err := store.Write(ctx, path, bytes.NewReader(data)); err != nil { + t.Fatalf("Write() error = %v", err) + } + + // Read + var buf bytes.Buffer + if err := store.Read(ctx, path, &buf); err != nil { + t.Fatalf("Read() error = %v", err) + } + + // Verify + if !bytes.Equal(buf.Bytes(), data) { + t.Errorf("Read() returned %v, want %v", buf.Bytes(), data) + } + }) + + t.Run("Read non-existent returns error", func(t *testing.T) { + var buf bytes.Buffer + err := store.Read(ctx, "does-not-exist", &buf) + if err == nil { + t.Error("Read() should return error for non-existent path") + } + }) + + t.Run("Overwrite existing file", func(t *testing.T) { + newData := []byte("updated data") + + // Overwrite + if err := store.Write(ctx, path, bytes.NewReader(newData)); err != nil { + t.Fatalf("Write() error = %v", err) + } + + // Read + var buf bytes.Buffer + if err := store.Read(ctx, path, &buf); err != nil { + t.Fatalf("Read() error = %v", err) + } + + // Should have new data + if !bytes.Equal(buf.Bytes(), newData) { + t.Errorf("After overwrite, Read() = %v, want %v", buf.Bytes(), newData) + } + }) +} + +// TestStoreEdgeCases tests edge cases that all stores should handle +func TestStoreEdgeCases(t *testing.T) { + tmpDir := t.TempDir() + store, err := NewFileStore(tmpDir) + if err != nil { + t.Fatalf("Failed to create FileStore: %v", err) + } + + ctx := context.Background() + + t.Run("Empty data", func(t *testing.T) { + path := "edge/empty.txt" + emptyData := []byte{} + + if err := store.Write(ctx, path, bytes.NewReader(emptyData)); err != nil { + t.Errorf("Write() with empty data error = %v", err) + } + + var buf bytes.Buffer + if err := store.Read(ctx, path, &buf); err != nil { + t.Errorf("Read() empty file error = %v", err) + } + + if len(buf.Bytes()) != 0 { + t.Errorf("Expected empty file, got %d bytes", len(buf.Bytes())) + } + }) + + t.Run("Large data", func(t *testing.T) { + path := "edge/large.dat" + largeData := bytes.Repeat([]byte("a"), 1024*1024) // 1MB + + if err := store.Write(ctx, path, bytes.NewReader(largeData)); err != nil { + t.Errorf("Write() large data error = %v", err) + } + + var buf bytes.Buffer + if err := store.Read(ctx, path, &buf); err != nil { + t.Errorf("Read() large file error = %v", err) + } + + if !bytes.Equal(buf.Bytes(), largeData) { + t.Errorf("Large file data mismatch") + } + }) + + t.Run("Binary data", func(t *testing.T) { + path := "edge/binary.dat" + binaryData := make([]byte, 256) + for i := range binaryData { + binaryData[i] = byte(i) + } + + if err := store.Write(ctx, path, bytes.NewReader(binaryData)); err != nil { + t.Errorf("Write() binary data error = %v", err) + } + + var buf bytes.Buffer + if err := store.Read(ctx, path, &buf); err != nil { + t.Errorf("Read() binary file error = %v", err) + } + + if !bytes.Equal(buf.Bytes(), binaryData) { + t.Errorf("Binary data mismatch") + } + }) + + t.Run("Path with special characters", func(t *testing.T) { + path := "edge/file-with-dashes_and_underscores.json" + data := []byte("special path") + + if err := store.Write(ctx, path, bytes.NewReader(data)); err != nil { + t.Errorf("Write() with special characters error = %v", err) + } + + var buf bytes.Buffer + if err := store.Read(ctx, path, &buf); err != nil { + t.Errorf("Read() with special characters error = %v", err) + } + + if !bytes.Equal(buf.Bytes(), data) { + t.Errorf("Special path data mismatch") + } + }) +} diff --git a/story_dispatcher.go b/story_dispatcher.go deleted file mode 100644 index 175106d..0000000 --- a/story_dispatcher.go +++ /dev/null @@ -1,357 +0,0 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package sdk - -import ( - "context" - "errors" - "fmt" - "log" - "maps" - "os" - "strings" - "sync" - "time" - - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - "github.com/bubustack/bubu-sdk-go/k8s" - sdkerrors "github.com/bubustack/bubu-sdk-go/pkg/errors" - "github.com/bubustack/core/contracts" - identity "github.com/bubustack/core/runtime/identity" -) - -type storyRuntime struct { - start func( - ctx context.Context, - storyName string, - storyNamespace string, - inputs map[string]any, - ) (*runsv1alpha1.StoryRun, error) - stop func(ctx context.Context, storyRunName, storyNamespace string) error -} - -// StoryDispatcherOption configures a StoryDispatcher. -type StoryDispatcherOption func(*StoryDispatcher) - -// WithStoryRuntime overrides the start/stop implementation used by the dispatcher. -// Intended primarily for tests. -func WithStoryRuntime( - start func( - ctx context.Context, - storyName string, - storyNamespace string, - inputs map[string]any, - ) (*runsv1alpha1.StoryRun, error), - stop func(ctx context.Context, storyRunName, storyNamespace string) error, -) StoryDispatcherOption { - return func(d *StoryDispatcher) { - d.runtime = storyRuntime{start: start, stop: stop} - } -} - -// StorySession holds metadata about an active StoryRun started by an impulse. -type StorySession struct { - // Key is the dispatcher-local session key used to look up or stop the StoryRun later. - Key string - // StoryRun is the created StoryRun resource name. - StoryRun string - // Namespace is the namespace that owns StoryRun. - Namespace string - // StoryName is the logical Story that produced StoryRun. - StoryName string - // StartedAt records when the dispatcher observed the StoryRun as started. - StartedAt time.Time - // Metadata carries optional impulse-owned attributes associated with the session. - Metadata map[string]string -} - -func copyStorySession(s *StorySession) *StorySession { - if s == nil { - return nil - } - cp := *s - if len(s.Metadata) > 0 { - cp.Metadata = make(map[string]string, len(s.Metadata)) - maps.Copy(cp.Metadata, s.Metadata) - } - return &cp -} - -// StoryTriggerRequest defines the inputs required to trigger a story. -type StoryTriggerRequest struct { - // Key optionally reserves a dispatcher session slot for later Stop/Forget calls. - Key string - // TriggerToken enables idempotent StoryRun creation when the caller provides one. - TriggerToken string - // StoryName overrides the target story name; when empty the dispatcher resolves it from the Impulse environment. - StoryName string - //nolint:lll,lll - // StoryNamespace overrides the target story namespace; when empty the dispatcher resolves it from the Impulse environment. - StoryNamespace string - // Inputs contains the structured trigger payload forwarded to the new StoryRun. - Inputs map[string]any - // Metadata carries caller-defined session metadata stored only in the local dispatcher session. - Metadata map[string]string -} - -// StoryTriggerResult returns the StoryRun created by the dispatcher and the associated session. -type StoryTriggerResult struct { - // StoryRun is the created Kubernetes StoryRun object returned by the runtime. - StoryRun *runsv1alpha1.StoryRun - // Session is the dispatcher-tracked session metadata when a session key was requested. - Session *StorySession -} - -// StoryDispatcher manages StoryRun lifecycles on behalf of an impulse, providing -// session tracking and idempotent stop semantics. -type StoryDispatcher struct { - mu sync.RWMutex - sessions map[string]*StorySession - runtime storyRuntime - timeSource func() time.Time - statsClient *k8s.Client - impulseName string - impulseNamespace string -} - -// NewStoryDispatcher creates a StoryDispatcher with optional configuration. -func NewStoryDispatcher(opts ...StoryDispatcherOption) *StoryDispatcher { - d := &StoryDispatcher{ - sessions: make(map[string]*StorySession), - runtime: storyRuntime{ - start: StartStoryInNamespace, - stop: StopStoryInNamespace, - }, - timeSource: time.Now, - } - for _, opt := range opts { - opt(d) - } - if d.runtime.start == nil { - d.runtime.start = StartStoryInNamespace - } - if d.runtime.stop == nil { - d.runtime.stop = StopStoryInNamespace - } - if d.timeSource == nil { - d.timeSource = time.Now - } - d.initImpulseMetricsClient() - return d -} - -// Trigger starts a StoryRun and optionally records a session keyed by req.Key. -// -// If req.StoryName is empty, the target story is resolved from the Impulse's spec.storyRef -// via GetTargetStory(). This allows impulses to omit the story name in their trigger -// requests, relying on the operator-injected environment variables instead. -func (d *StoryDispatcher) Trigger(ctx context.Context, req StoryTriggerRequest) (*StoryTriggerResult, error) { - if token := strings.TrimSpace(req.TriggerToken); token != "" { - ctx = WithTriggerToken(ctx, token) - } - storyName := req.StoryName - storyNamespace := req.StoryNamespace - - // If no story name is provided, resolve from environment (Impulse.spec.storyRef) - if storyName == "" { - target, err := GetTargetStory() - if err != nil { - return nil, fmt.Errorf("story name is required: %w", err) - } - storyName = target.Name - // Only use target namespace if not explicitly provided - if storyNamespace == "" { - storyNamespace = target.Namespace - } - } - req.StoryName = storyName - req.StoryNamespace = storyNamespace - - if req.Inputs == nil { - req.Inputs = make(map[string]any) - } - sessionKey := strings.TrimSpace(req.Key) - var reserved bool - if sessionKey != "" { - d.mu.Lock() - if _, exists := d.sessions[sessionKey]; exists { - d.mu.Unlock() - return nil, ErrImpulseSessionExists - } - d.sessions[sessionKey] = &StorySession{Key: sessionKey} - d.mu.Unlock() - reserved = true - } - - storyRun, err := d.runtime.start(ctx, req.StoryName, req.StoryNamespace, req.Inputs) - d.recordTriggerStats(ctx, err) - if err != nil { - if reserved { - d.Forget(sessionKey) - } - return nil, err - } - - result := &StoryTriggerResult{StoryRun: storyRun} - if sessionKey == "" { - return result, nil - } - - session := &StorySession{ - Key: sessionKey, - StoryRun: storyRun.Name, - Namespace: storyRun.Namespace, - StoryName: req.StoryName, - StartedAt: d.timeSource().UTC(), - } - metadata := identity.StoryRunSelectorLabels(storyRun.Name) - if len(req.Metadata) > 0 { - if metadata == nil { - metadata = make(map[string]string, len(req.Metadata)) - } - maps.Copy(metadata, req.Metadata) - } - if len(metadata) > 0 { - session.Metadata = metadata - } - - d.mu.Lock() - d.sessions[sessionKey] = session - d.mu.Unlock() - result.Session = copyStorySession(session) - return result, nil -} - -// Stop cancels the StoryRun associated with the session key. -// Returns the session metadata when successful. -func (d *StoryDispatcher) Stop(ctx context.Context, key string) (*StorySession, error) { - key = strings.TrimSpace(key) - if key == "" { - return nil, fmt.Errorf("session key is required") - } - - session, ok := d.removeSession(key) - if !ok { - return nil, ErrImpulseSessionNotFound - } - - err := d.runtime.stop(ctx, session.StoryRun, session.Namespace) - switch { - case err == nil: - return session, nil - case errors.Is(err, ErrStoryRunNotFound): - return session, ErrStoryRunNotFound - default: - d.mu.Lock() - d.sessions[key] = session - d.mu.Unlock() - return session, err - } -} - -// HasSession reports whether a session is currently tracked for the key. -func (d *StoryDispatcher) HasSession(key string) bool { - key = strings.TrimSpace(key) - if key == "" { - return false - } - d.mu.RLock() - defer d.mu.RUnlock() - _, ok := d.sessions[key] - return ok -} - -// Session returns the session metadata for a key without mutating state. -func (d *StoryDispatcher) Session(key string) (*StorySession, bool) { - key = strings.TrimSpace(key) - if key == "" { - return nil, false - } - d.mu.RLock() - defer d.mu.RUnlock() - session, ok := d.sessions[key] - if !ok { - return nil, false - } - cpy := *session - if len(session.Metadata) > 0 { - cpy.Metadata = maps.Clone(session.Metadata) - } - return &cpy, true -} - -func (d *StoryDispatcher) initImpulseMetricsClient() { - name := strings.TrimSpace(os.Getenv(contracts.ImpulseNameEnv)) - if name == "" { - return - } - namespace := strings.TrimSpace(os.Getenv(contracts.ImpulseNamespaceEnv)) - client, err := k8s.SharedClient() - if err != nil { - log.Printf("bubu sdk: unable to initialize impulse metrics client: %v", err) - return - } - d.statsClient = client - d.impulseName = name - d.impulseNamespace = namespace -} - -func (d *StoryDispatcher) recordTriggerStats(ctx context.Context, triggerErr error) { - if d.statsClient == nil || d.impulseName == "" { - return - } - delta := k8s.TriggerStatsDelta{ - TriggersReceived: 1, - LastTrigger: d.timeSource().UTC(), - } - switch { - case triggerErr == nil: - delta.StoriesLaunched = 1 - successTime := delta.LastTrigger - delta.LastSuccess = &successTime - case errors.Is(triggerErr, sdkerrors.ErrRetryable), - errors.Is(triggerErr, context.Canceled), - errors.Is(triggerErr, context.DeadlineExceeded): - // do not classify retryable/pending as durable failures - default: - delta.FailedTriggers = 1 - } - if err := d.statsClient.UpdateImpulseTriggerStats(ctx, d.impulseName, d.impulseNamespace, delta); err != nil { - log.Printf("bubu sdk: failed to update impulse trigger stats: %v", err) - } -} - -// Forget removes a session without attempting to stop the StoryRun. -func (d *StoryDispatcher) Forget(key string) { - key = strings.TrimSpace(key) - if key == "" { - return - } - d.mu.Lock() - defer d.mu.Unlock() - delete(d.sessions, key) -} - -func (d *StoryDispatcher) removeSession(key string) (*StorySession, bool) { - d.mu.Lock() - defer d.mu.Unlock() - session, ok := d.sessions[key] - if ok { - delete(d.sessions, key) - } - return session, ok -} diff --git a/story_dispatcher_test.go b/story_dispatcher_test.go deleted file mode 100644 index 5da7267..0000000 --- a/story_dispatcher_test.go +++ /dev/null @@ -1,210 +0,0 @@ -package sdk_test - -import ( - "context" - "errors" - "testing" - - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - sdk "github.com/bubustack/bubu-sdk-go" -) - -type stubStoryRuntime struct { - startFn func( - ctx context.Context, - storyName string, - storyNamespace string, - inputs map[string]any, - ) (*runsv1alpha1.StoryRun, error) - stopFn func(ctx context.Context, storyRunName, storyNamespace string) error -} - -func (s stubStoryRuntime) start( - ctx context.Context, - storyName string, - storyNamespace string, - inputs map[string]any, -) (*runsv1alpha1.StoryRun, error) { - if s.startFn != nil { - return s.startFn(ctx, storyName, storyNamespace, inputs) - } - return &runsv1alpha1.StoryRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: "example-run", - Namespace: "default", - }, - }, nil -} - -func (s stubStoryRuntime) stop(ctx context.Context, storyRunName, storyNamespace string) error { - if s.stopFn != nil { - return s.stopFn(ctx, storyRunName, storyNamespace) - } - return nil -} - -func TestStoryDispatcher_TriggerAndStop(t *testing.T) { - stub := stubStoryRuntime{} - dispatcher := sdk.NewStoryDispatcher( - sdk.WithStoryRuntime(stub.start, stub.stop), - ) - - ctx := context.Background() - res, err := dispatcher.Trigger(ctx, sdk.StoryTriggerRequest{ - Key: "room-1", - StoryName: "demo-story", - }) - if err != nil { - t.Fatalf("Trigger() error = %v", err) - } - if res.StoryRun == nil { - t.Fatal("Trigger() returned nil StoryRun") - } - if !dispatcher.HasSession("room-1") { - t.Fatal("expected session to be tracked after Trigger") - } - - session, err := dispatcher.Stop(ctx, "room-1") - if err != nil { - t.Fatalf("Stop() error = %v", err) - } - if session == nil || session.Key != "room-1" { - t.Fatalf("Stop() returned unexpected session: %#v", session) - } - if dispatcher.HasSession("room-1") { - t.Fatal("expected session to be cleared after Stop") - } -} - -func TestStoryDispatcher_DuplicateKey(t *testing.T) { - dispatcher := sdk.NewStoryDispatcher( - sdk.WithStoryRuntime( - func( - ctx context.Context, - storyName string, - storyNamespace string, - inputs map[string]any, - ) (*runsv1alpha1.StoryRun, error) { - return &runsv1alpha1.StoryRun{ - ObjectMeta: metav1.ObjectMeta{Name: "run-1", Namespace: "default"}, - }, nil - }, - func(ctx context.Context, storyRunName, storyNamespace string) error { return nil }, - ), - ) - - ctx := context.Background() - if _, err := dispatcher.Trigger(ctx, sdk.StoryTriggerRequest{Key: "dup", StoryName: "demo"}); err != nil { - t.Fatalf("initial Trigger() failed: %v", err) - } - if _, err := dispatcher.Trigger( - ctx, - sdk.StoryTriggerRequest{Key: "dup", StoryName: "demo"}, - ); !errors.Is(err, sdk.ErrImpulseSessionExists) { - t.Fatalf("expected ErrImpulseSessionExists, got %v", err) - } -} - -func TestStoryDispatcher_TriggerToken(t *testing.T) { - token := "token-123" - dispatcher := sdk.NewStoryDispatcher( - sdk.WithStoryRuntime( - func(ctx context.Context, storyName, storyNamespace string, inputs map[string]any) (*runsv1alpha1.StoryRun, error) { - if got := sdk.TriggerTokenFromContext(ctx); got != token { - t.Fatalf("TriggerTokenFromContext() = %q, want %q", got, token) - } - return &runsv1alpha1.StoryRun{ - ObjectMeta: metav1.ObjectMeta{Name: "run-1", Namespace: "default"}, - }, nil - }, - func(ctx context.Context, storyRunName, storyNamespace string) error { return nil }, - ), - ) - - ctx := context.Background() - _, err := dispatcher.Trigger(ctx, sdk.StoryTriggerRequest{ - StoryName: "demo", - TriggerToken: token, - }) - if err != nil { - t.Fatalf("Trigger() failed: %v", err) - } -} - -func TestStoryDispatcher_StopNotFound(t *testing.T) { - stopped := false - dispatcher := sdk.NewStoryDispatcher( - sdk.WithStoryRuntime( - func( - ctx context.Context, - storyName string, - storyNamespace string, - inputs map[string]any, - ) (*runsv1alpha1.StoryRun, error) { - return &runsv1alpha1.StoryRun{ - ObjectMeta: metav1.ObjectMeta{Name: "run-1", Namespace: "default"}, - }, nil - }, - func(ctx context.Context, storyRunName, storyNamespace string) error { - stopped = true - return sdk.ErrStoryRunNotFound - }, - ), - ) - - ctx := context.Background() - if _, err := dispatcher.Trigger(ctx, sdk.StoryTriggerRequest{Key: "room-1", StoryName: "demo"}); err != nil { - t.Fatalf("Trigger() failed: %v", err) - } - - session, err := dispatcher.Stop(ctx, "room-1") - if !errors.Is(err, sdk.ErrStoryRunNotFound) { - t.Fatalf("expected ErrStoryRunNotFound, got %v", err) - } - if session == nil || session.StoryRun != "run-1" { - t.Fatalf("unexpected session returned from Stop: %#v", session) - } - if !stopped { - t.Fatal("stop runtime was not invoked") - } - if dispatcher.HasSession("room-1") { - t.Fatal("session should not be reinserted when storyrun is already gone") - } -} - -func TestStoryDispatcher_SessionCloning(t *testing.T) { - dispatcher := sdk.NewStoryDispatcher( - sdk.WithStoryRuntime( - func(ctx context.Context, storyName, storyNamespace string, inputs map[string]any) (*runsv1alpha1.StoryRun, error) { - return &runsv1alpha1.StoryRun{ - ObjectMeta: metav1.ObjectMeta{Name: "run-1", Namespace: "default"}, - }, nil - }, - func(ctx context.Context, storyRunName, storyNamespace string) error { return nil }, - ), - ) - _, err := dispatcher.Trigger( - context.Background(), - sdk.StoryTriggerRequest{ - Key: "a", - StoryName: "demo", - Metadata: map[string]string{"policy": "welcome"}, - }, - ) - if err != nil { - t.Fatalf("Trigger() failed: %v", err) - } - - got, ok := dispatcher.Session("a") - if !ok || got == nil { - t.Fatalf("Session() failed, ok=%v value=%v", ok, got) - } - got.Metadata["policy"] = "mutated" - - next, ok := dispatcher.Session("a") - if !ok || next.Metadata["policy"] != "welcome" { - t.Fatalf("Session metadata should be cloned, got %#v", next.Metadata) - } -} diff --git a/stream.go b/stream.go index ebfa997..7117c5b 100644 --- a/stream.go +++ b/stream.go @@ -1,2058 +1,1304 @@ package sdk import ( - "bytes" "context" - "errors" + "crypto/tls" + "crypto/x509" "fmt" "io" - "log/slog" - randv2 "math/rand/v2" - "mime" + "math/rand" + "net" + "os" "strconv" "strings" "sync" "sync/atomic" "time" - "github.com/bubustack/bobrapet/pkg/storage" + bobravozgrpcproto "github.com/bubustack/bobravoz-grpc/proto/v1" "github.com/bubustack/bubu-sdk-go/engram" - "github.com/bubustack/bubu-sdk-go/runtime" - "github.com/bubustack/core/contracts" - coretransport "github.com/bubustack/core/runtime/transport" - transportconnector "github.com/bubustack/core/runtime/transport/connector" - "github.com/bubustack/tractatus/envelope" - transportpb "github.com/bubustack/tractatus/gen/go/proto/transport/v1" + "github.com/bubustack/bubu-sdk-go/pkg/metrics" "golang.org/x/sync/errgroup" + "google.golang.org/grpc" "google.golang.org/grpc/codes" + "google.golang.org/grpc/credentials" + "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/metadata" "google.golang.org/grpc/status" - "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/encoding/protojson" + pbproto "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/structpb" + + "github.com/bubustack/bubu-sdk-go/k8s" + "github.com/bubustack/bubu-sdk-go/runtime" +) + +var ( + // rng is a shared random source for backoff jitter, protected by a mutex + rng = rand.New(rand.NewSource(time.Now().UnixNano())) + rngMu sync.Mutex ) const ( - // DefaultChannelBufferSize is the in-memory buffer used for Engram stream channels. + // DefaultChannelBufferSize is the buffer size for gRPC streaming channels. + // + // A buffer of 16 provides reasonable throughput while limiting memory usage. + // Override via BUBU_GRPC_CHANNEL_BUFFER_SIZE for workloads with different + // latency/throughput profiles. DefaultChannelBufferSize = 16 - // DefaultMaxMessageSize caps gRPC message sizes when talking to connectors. - DefaultMaxMessageSize = 10 * 1024 * 1024 - defaultMessageTimeout = 30 * time.Second - defaultTimedSendCleanupWait = 2 * time.Second - defaultPacketDedupeEntries = 4096 - octetStreamCodec = "application/octet-stream" - reconnectJitterMinMultiplier = 0.8 - reconnectJitterSpanMultiplier = 0.4 - downstreamDeliveryReceiptType = "downstream.delivered" - deliveryReceiptStreamIDKey = "stream_id" - deliveryReceiptSequenceKey = "sequence" - deliveryReceiptPartitionKey = "partition" - deliveryReceiptSizeBytesKey = "size_bytes" -) -var errPacketDeduperPendingOverflow = errors.New("packet dedupe pending capacity exceeded") -var errTimedSendCleanupTimeout = errors.New("timed send worker cleanup exceeded timeout") -var errControlStartupHandshakeTimeout = errors.New("control startup handshake timed out") + // DefaultGRPCPort is the default port for gRPC servers in streaming mode. + // + // Override via BUBU_GRPC_PORT. The operator typically sets this to 50051. + DefaultGRPCPort = "50051" + + // DefaultMessageTimeout is the default timeout for individual message operations. + // + // Prevents indefinite hangs on network stalls. Override via BUBU_GRPC_MESSAGE_TIMEOUT. + DefaultMessageTimeout = 30 * time.Second + + // DefaultMaxMessageSize is the default max message size for gRPC (10 MiB). + // + // Override via BUBU_GRPC_MAX_RECV_BYTES and BUBU_GRPC_MAX_SEND_BYTES. + // Larger messages should use storage offloading instead of increasing this limit. + DefaultMaxMessageSize = 10 * 1024 * 1024 + + // Client buffer defaults (bounded) + DefaultClientBufferMaxMessages = 100 + DefaultClientBufferMaxBytes = 10 * 1024 * 1024 // 10 MiB +) -type reconnectPolicy struct { - base time.Duration - max time.Duration - maxRetries int +func getEnvInt(name string, def int) int { + if v := os.Getenv(name); v != "" { + if i, err := strconv.Atoi(v); err == nil && i > 0 { + return i + } + } + return def } -type streamRuntimeOptions struct { - messageTimeout time.Duration - publishHeartbeatInterval time.Duration - channelSendTimeout time.Duration - controlHeartbeatInterval time.Duration - hangWatcher *hangWatcher - packetDeduper *packetDeduper - controlRequests chan *transportpb.ControlRequest - sendTracker *timedSendTracker - startupHandshake *controlStartupHandshake +// nolint:unparam // def is intentionally configurable for future call sites +func getEnvBytes(name string, def int) int { + // Accept plain integers as bytes + return getEnvInt(name, def) } -type transportSessionRunner[C any] func( - context.Context, string, bindingReference, - engram.StreamingEngram[C], envResolver, -) error -type reconnectSleepFunc func(context.Context, time.Duration) error -type reconnectDelayFunc func(time.Duration, time.Duration) time.Duration - -type packetDeduperContextKey struct{} -type controlRequestQueueContextKey struct{} +func getMessageTimeout() time.Duration { + if v := os.Getenv("BUBU_GRPC_MESSAGE_TIMEOUT"); v != "" { + if d, err := time.ParseDuration(v); err == nil && d > 0 { + return d + } + } + return DefaultMessageTimeout +} -var reconnectJitterFloat64 = func() float64 { - return randv2.Float64() +func getBackpressureTimeout() time.Duration { + if v := os.Getenv("BUBU_GRPC_CHANNEL_SEND_TIMEOUT"); v != "" { + if d, err := time.ParseDuration(v); err == nil && d > 0 { + return d + } + } + // Fallback to message timeout if not specified + return getMessageTimeout() } -func resolveReconnectPolicy(env envResolver) reconnectPolicy { - base := defaultReconnectBaseBackoff - if d := parsePositiveDuration(env.lookup(contracts.GRPCReconnectBaseBackoffEnv)); d > 0 { - base = d +// getHeartbeatInterval returns the interval for sending heartbeats +func getHeartbeatInterval() time.Duration { + if v := os.Getenv("BUBU_GRPC_HEARTBEAT_INTERVAL"); v != "" { + if d, err := time.ParseDuration(v); err == nil && d > 0 { + return d + } } + return 10 * time.Second // Default: send heartbeat every 10s +} - max := defaultReconnectMaxBackoff - if d := parsePositiveDuration(env.lookup(contracts.GRPCReconnectMaxBackoffEnv)); d > 0 { - max = d +// getHangTimeout returns the timeout for detecting connection hangs +func getHangTimeout() time.Duration { + if v := os.Getenv("BUBU_GRPC_HANG_TIMEOUT"); v != "" { + if d, err := time.ParseDuration(v); err == nil && d > 0 { + return d + } } + return 30 * time.Second // Default: 30s without any message = hang +} - maxRetries := defaultReconnectMaxRetries - if raw := strings.TrimSpace(env.lookup(contracts.GRPCReconnectMaxRetriesEnv)); raw != "" { - n, err := strconv.Atoi(raw) - if err != nil { - slog.Default().Warn("ignoring invalid reconnect max retries env var", - "key", contracts.GRPCReconnectMaxRetriesEnv, "value", raw) - } else if n < 0 { - maxRetries = -1 - } else { - maxRetries = n +// getGracefulShutdownTimeout returns the timeout for graceful shutdown drain phase +func getGracefulShutdownTimeout() time.Duration { + if v := os.Getenv("BUBU_GRPC_GRACEFUL_SHUTDOWN_TIMEOUT"); v != "" { + if d, err := time.ParseDuration(v); err == nil && d > 0 { + return d } } + // Default 20s: leaves 10s margin before Kubernetes SIGKILL (default terminationGracePeriodSeconds=30s) + // This provides adequate time for gRPC GracefulStop() to complete even under load. + // Operators should tune both this value and terminationGracePeriodSeconds together. + return 20 * time.Second +} + +// isHeartbeat checks if a DataPacket is a heartbeat message +func isHeartbeat(meta map[string]string) bool { + const heartbeatTrue = "true" + return meta != nil && meta["bubu-heartbeat"] == heartbeatTrue +} - return reconnectPolicy{ - base: base, - max: max, - maxRetries: maxRetries, +// createHeartbeat creates a heartbeat DataPacket +func createHeartbeat() *bobravozgrpcproto.DataPacket { + return &bobravozgrpcproto.DataPacket{ + Metadata: map[string]string{"bubu-heartbeat": "true"}, + Payload: &structpb.Struct{}, } } -func parsePositiveDuration(raw string) time.Duration { - raw = strings.TrimSpace(raw) - if raw == "" { - return 0 +// server is the gRPC server implementation for the SDK sidecar. +type server struct { + bobravozgrpcproto.UnimplementedHubServiceServer + handler func(ctx context.Context, in <-chan engram.StreamMessage, out chan<- engram.StreamMessage) error + activeStreams sync.WaitGroup // Tracks active Process() handlers for graceful shutdown coordination +} + +// clientMessageBuffer is a bounded in-memory buffer of DataPackets for retry after transient send failures. +// It mirrors hub-side semantics: bounded by message count and total bytes; drops on overflow with reason. +type clientMessageBuffer struct { + mu sync.Mutex + messages []*bobravozgrpcproto.DataPacket + totalBytes int + maxMsgs int + maxBytes int + // metrics context + ctx context.Context +} + +func newClientMessageBufferWithContext(ctx context.Context) *clientMessageBuffer { + maxMsgs := getEnvInt("BUBU_GRPC_CLIENT_BUFFER_MAX_MESSAGES", DefaultClientBufferMaxMessages) + if maxMsgs <= 0 { + maxMsgs = DefaultClientBufferMaxMessages + } + maxBytes := getEnvBytes("BUBU_GRPC_CLIENT_BUFFER_MAX_BYTES", DefaultClientBufferMaxBytes) + if maxBytes <= 0 { + maxBytes = DefaultClientBufferMaxBytes + } + b := &clientMessageBuffer{ + messages: make([]*bobravozgrpcproto.DataPacket, 0, maxMsgs), + maxMsgs: maxMsgs, + maxBytes: maxBytes, + ctx: ctx, + } + // Register gauges: provide safe callbacks that lock and read size/bytes + metrics.RegisterClientBufferGauges( + func() float64 { + b.mu.Lock() + defer b.mu.Unlock() + return float64(len(b.messages)) + }, + func() float64 { + b.mu.Lock() + defer b.mu.Unlock() + return float64(b.totalBytes) + }, + ) + return b +} + +// newClientMessageBuffer is retained for backward compatibility in tests. +// It defaults metrics context to Background; production paths should prefer newClientMessageBufferWithContext. +func newClientMessageBuffer() *clientMessageBuffer { + return newClientMessageBufferWithContext(context.Background()) +} + +// add attempts to add; returns true if added, false if dropped (overflow or oversize) +func (b *clientMessageBuffer) add(p *bobravozgrpcproto.DataPacket) bool { + b.mu.Lock() + defer b.mu.Unlock() + size := pbproto.Size(p) + if size > b.maxBytes { + metrics.RecordClientBufferDrop(b.ctx, "oversize") + return false } - d, err := time.ParseDuration(raw) - if err != nil || d <= 0 { + if len(b.messages) >= b.maxMsgs || b.totalBytes+size > b.maxBytes { + metrics.RecordClientBufferDrop(b.ctx, "buffer_full") + return false + } + b.messages = append(b.messages, p) + b.totalBytes += size + return true +} + +// flush tries to send all buffered messages using the provided send function; keeps failed tail +func (b *clientMessageBuffer) flush(ctx context.Context, send func(*bobravozgrpcproto.DataPacket) error) int { + b.mu.Lock() + defer b.mu.Unlock() + if len(b.messages) == 0 { return 0 } - return d + flushed := 0 + remaining := make([]*bobravozgrpcproto.DataPacket, 0, len(b.messages)) + for _, msg := range b.messages { + if ctx.Err() != nil { + remaining = append(remaining, msg) + continue + } + if err := send(msg); err != nil { + remaining = append(remaining, msg) + break + } + flushed++ + b.totalBytes -= pbproto.Size(msg) + } + b.messages = remaining + return flushed } -const ( - controlBindingMetadataKey = "bubu.transport.binding" - defaultControlHeartbeatInterval = 30 * time.Second - // defaultPublishHeartbeatInterval keeps the publish stream alive when the - // engram has nothing to output. Must be shorter than the connector's - // MessageTimeout (default 30 s) so the server-side RecvWithTimeout never - // fires DeadlineExceeded on an idle-but-healthy publish stream. - defaultPublishHeartbeatInterval = 10 * time.Second -) +// Process is the gRPC bidirectional streaming endpoint with transparent heartbeat support. +// Heartbeats are sent/received automatically and filtered from the user's handler, +// eliminating the need for per-message timeout goroutines while detecting connection hangs. +// +// Graceful shutdown: On context cancellation (e.g., SIGTERM), this method: +// 1. Closes the input channel to signal EOF to the user handler. +// 2. Waits for the user handler to close the output channel (indicates flush complete). +// 3. Drains remaining messages from output and sends via gRPC. +// 4. Returns after drain completes or timeout (BUBU_GRPC_GRACEFUL_SHUTDOWN_TIMEOUT). +func (s *server) Process(stream bobravozgrpcproto.HubService_ProcessServer) error { + // Track this stream for graceful shutdown coordination + s.activeStreams.Add(1) + defer s.activeStreams.Done() -// StartStreamServer boots a StreamingEngram using the new transport connector contract. -// The Engram must have BUBU_TRANSPORT_BINDING set; no other transport modes are supported. -func StartStreamServer[C any](ctx context.Context, e engram.StreamingEngram[C]) error { - ctx, _ = withDefaultLogger(ctx) - defer publishCapturedLogs(ctx) - logger := LoggerFromContext(ctx) - logger.Info("Initializing Bubu SDK for streaming execution") + streamCtx := stream.Context() + ctx, cancelCtx := context.WithCancel(streamCtx) + defer cancelCtx() - config, secrets, err := loadStreamingExecutionContext[C](ctx) - if err != nil { + // Serialize all writes to the gRPC stream to avoid concurrent Send hazards. + // gRPC streams are not safe for concurrent Send() from multiple goroutines. + var sendMu sync.Mutex + + // Buffer channels to reduce risk of blocking when one side stops first + bufSize := getEnvInt("BUBU_GRPC_CHANNEL_BUFFER_SIZE", DefaultChannelBufferSize) + in := make(chan engram.StreamMessage, bufSize) + out := make(chan engram.StreamMessage, bufSize) + + g, gctx := errgroup.WithContext(ctx) + + // Graceful shutdown coordination + // shutdownInitiated channel removed; drainOnShutdown handles coordination + handlerDone := make(chan struct{}) + shutdownTimeout := getGracefulShutdownTimeout() + + // Heartbeat sender: sends periodic heartbeats to keep connection alive + // and allow remote side to detect if we're hung + g.Go(func() error { return heartbeatLoop(gctx, stream, &sendMu) }) + + // Reader: from gRPC → user handler (with heartbeat filtering and hang detection) + g.Go(func() error { return readLoop(gctx, stream, in) }) + + // Writer: from user handler → gRPC (simple, no timeout goroutines!) + g.Go(func() error { return writeLoop(gctx, stream, out, &sendMu) }) + + // User handler - unchanged API + g.Go(func() error { + defer close(out) + defer close(handlerDone) + err := s.handler(gctx, in, out) return err - } + }) - if err := callWithPanicRecoveryNoValue("streaming engram Init", func() error { - return e.Init(ctx, config, secrets) - }); err != nil { - return fmt.Errorf("streaming engram initialization failed: %w", err) - } + // Graceful shutdown coordinator: on context cancellation, drain pending messages + g.Go(func() error { return drainOnShutdown(gctx, out, stream, &sendMu, shutdownTimeout, handlerDone) }) - ref, err := bindingReferenceFromEnv() - if err != nil { + if err := g.Wait(); err != nil { + cancelCtx() return err } - if ref.Info == nil { - return fmt.Errorf("transport binding missing inline payload") + return nil +} + +// StartStreamServer is the main entry point for a StreamingEngram. This function +// bootstraps a long-running service that can process data in real-time over gRPC. +// +// This function orchestrates the lifecycle of a streaming service: +// 1. It loads the execution context for configuration and secrets. +// 2. It calls the StreamingEngram's `Init` method. +// 3. It starts a gRPC server on the configured port. +// 4. It registers the StreamingEngram's `Stream` method as the gRPC handler. +// 5. It gracefully handles server shutdown on context cancellation. +// +// # Streaming Delivery Guarantees +// +// The SDK provides reliable message delivery for direct engram-to-engram connections (peer-to-peer mode). +// In hub-and-spoke mode (primitives between streaming engrams), the Hub may drop messages if downstream +// engrams are not ready at the time of forwarding. For production use cases requiring guaranteed delivery: +// - Use peer-to-peer mode (avoid primitives between streaming engrams), OR +// - Implement application-level acknowledgment and retry in your engram logic, OR +// - Wait for Hub buffering support (tracked in bobravoz-grpc roadmap) +func StartStreamServer[C any](ctx context.Context, e engram.StreamingEngram[C]) error { + LoggerFromContext(ctx).Info("Initializing Bubu SDK for streaming execution...") + + execCtxData, err := runtime.LoadExecutionContextData() + if err != nil { + return fmt.Errorf("failed to load execution context: %w", err) } - endpoint := strings.TrimSpace(ref.endpoint()) - if endpoint == "" { - return fmt.Errorf("transport binding missing endpoint") + + // Unmarshal config. + config, err := runtime.UnmarshalFromMap[C](execCtxData.Config) + if err != nil { + return fmt.Errorf("failed to unmarshal config: %w", err) } + secrets := engram.NewSecrets(execCtxData.Secrets) - logger.Info("Connecting to transport connector", "endpoint", endpoint, "driver", normalizedDriver(ref)) - return runTransportConnectorStream(ctx, endpoint, ref, e) -} + if err := e.Init(ctx, config, secrets); err != nil { + return fmt.Errorf("streaming engram initialization failed: %w", err) + } -func runTransportConnectorStream[C any]( - ctx context.Context, - endpoint string, - ref bindingReference, - e engram.StreamingEngram[C], -) error { - env := newEnvResolver(ref.envOverrides()) - return runTransportConnectorStreamWithDeps( - ctx, - endpoint, - ref, - e, - env, - runTransportSession[C], - sleepWithContext, - jitterReconnectDelay, - ) -} + // Controller sets BUBU_GRPC_PORT + port := os.Getenv("BUBU_GRPC_PORT") + if port == "" { + port = DefaultGRPCPort + } -func runTransportConnectorStreamWithDeps[C any]( - ctx context.Context, - endpoint string, - ref bindingReference, - e engram.StreamingEngram[C], - env envResolver, - sessionFn transportSessionRunner[C], - sleepFn reconnectSleepFunc, - delayFn reconnectDelayFunc, -) error { - if sessionFn == nil { - sessionFn = runTransportSession[C] + lis, err := net.Listen("tcp", fmt.Sprintf(":%s", port)) + if err != nil { + return fmt.Errorf("failed to listen: %w", err) } - if sleepFn == nil { - sleepFn = sleepWithContext + + // gRPC server options + var opts []grpc.ServerOption + // Message size limits (bytes) + maxRecv := getEnvBytes("BUBU_GRPC_MAX_RECV_BYTES", DefaultMaxMessageSize) + if maxRecv > 0 { + opts = append(opts, grpc.MaxRecvMsgSize(maxRecv)) } - if delayFn == nil { - delayFn = jitterReconnectDelay + maxSend := getEnvBytes("BUBU_GRPC_MAX_SEND_BYTES", DefaultMaxMessageSize) + if maxSend > 0 { + opts = append(opts, grpc.MaxSendMsgSize(maxSend)) } - bufferSize := resolveChannelBufferSize(env) - baseSessionCtx := withPacketDeduperContext(ctx, newPacketDeduper(defaultPacketDedupeEntries)) - policy := resolveReconnectPolicy(env) - backoff := policy.base - if backoff <= 0 { - backoff = time.Second - } - retriesRemaining := policy.maxRetries - for { - if ctx.Err() != nil { - return ctx.Err() + // Optional TLS + certFile := os.Getenv("BUBU_GRPC_TLS_CERT_FILE") + keyFile := os.Getenv("BUBU_GRPC_TLS_KEY_FILE") + if certFile != "" && keyFile != "" { + creds, err := credentials.NewServerTLSFromFile(certFile, keyFile) + if err != nil { + return fmt.Errorf("failed to load TLS certs: %w", err) } + opts = append(opts, grpc.Creds(creds)) + } - // Use a fresh internal control queue per reconnect attempt so stale - // requests from a failed session cannot leak into the next session. - sessionCtx := withControlRequestQueueContext(baseSessionCtx, make(chan *transportpb.ControlRequest, bufferSize)) - err := sessionFn(sessionCtx, endpoint, ref, e, env) - if err == nil { - return nil - } - if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { - return err - } - if !isRetriableTransportSessionError(err) { - return err - } + grpcServer := grpc.NewServer(opts...) + svcImpl := &server{handler: e.Stream} + bobravozgrpcproto.RegisterHubServiceServer(grpcServer, svcImpl) - LoggerFromContext(ctx).Error("Transport connector session ended; retrying", "error", err) + LoggerFromContext(ctx).Info("gRPC server listening", "addr", lis.Addr()) + serveErr := make(chan error, 1) + go func() { + serveErr <- grpcServer.Serve(lis) + }() - if retriesRemaining >= 0 { - retriesRemaining-- - if retriesRemaining < 0 { - return fmt.Errorf("transport connector retries exhausted: %w", err) - } - } + select { + case <-ctx.Done(): + logger := LoggerFromContext(ctx) + logger.Info("Context canceled, initiating graceful shutdown") - wait := delayFn(backoff, policy.max) - if err := sleepFn(ctx, wait); err != nil { - return err - } + // Give active streams time to drain before stopping server + // Wait for all active Process() handlers to complete their drain phase + shutdownTimer := time.NewTimer(getGracefulShutdownTimeout() + 2*time.Second) + defer shutdownTimer.Stop() + + // Wait for active streams with timeout protection + drainComplete := make(chan struct{}) + go func() { + svcImpl.activeStreams.Wait() + close(drainComplete) + }() - backoff = nextReconnectBackoff(backoff, policy.max) + select { + case <-drainComplete: + logger.Info("All active streams drained, stopping server") + case <-shutdownTimer.C: + logger.Warn("Graceful shutdown timeout reached, forcing server stop", + "timeout", getGracefulShutdownTimeout()+2*time.Second, + "note", "Some streams may not have completed drain; messages may be dropped") + // Force stop even if streams haven't completed drain + // This prevents indefinite hang but may drop messages + } + + grpcServer.GracefulStop() + logger.Info("gRPC server stopped") + return ctx.Err() + case err := <-serveErr: + return err } } -func runTransportSession[C any]( //nolint:gocyclo +// StreamTo connects to a downstream gRPC server and streams data to it (client side). +// +// This function provides a simplified []byte channel API for backward compatibility. +// It wraps StreamToWithMetadata, converting []byte channels to StreamMessage channels +// with empty metadata and inputs fields. +// +// For new code that requires tracing, correlation, or dynamic per-message configuration, +// use StreamToWithMetadata directly. +// +// The function implements: +// - Automatic reconnection on transient failures (configurable via BUBU_GRPC_RECONNECT_MAX_RETRIES) +// - Exponential backoff with jitter (base/max configurable via env) +// - Transparent heartbeat sending/receiving to detect connection hangs +// - Optional TLS via BUBU_GRPC_CA_FILE or BUBU_GRPC_CLIENT_TLS=true +// - Backpressure handling with configurable timeouts +// +// Blocks until the input channel is closed, context is canceled, or a permanent error occurs. +// Respects context cancellation for graceful shutdown. +// +// Example: +// +// in := make(chan []byte, 16) +// out := make(chan []byte, 16) +// +// go func() { +// defer close(in) +// in <- []byte(`{"key": "value"}`) +// }() +// +// go func() { +// for msg := range out { +// log.Printf("Received: %s", msg) +// } +// }() +// +// if err := sdk.StreamTo(ctx, "downstream-service:50051", in, out); err != nil { +// return fmt.Errorf("streaming failed: %w", err) +// } +func StreamTo( ctx context.Context, - endpoint string, - ref bindingReference, - e engram.StreamingEngram[C], - env envResolver, -) (err error) { - sessionCtx, cancel := context.WithCancel(ctx) - - logger := LoggerFromContext(ctx) - sendTracker := newTimedSendTracker() - defer func() { - if cleanupErr := waitForTimedSendCleanup(logger, sendTracker, defaultTimedSendCleanupWait); cleanupErr != nil { - if err == nil { - err = cleanupErr - return - } - err = errors.Join(err, cleanupErr) + target string, + in <-chan []byte, + out chan<- []byte, +) error { + // Allow operator-provided env-based wiring when target is empty + if target == "" { + if v := os.Getenv("DOWNSTREAM_HOST"); v != "" { + target = v + } else if v := os.Getenv("UPSTREAM_HOST"); v != "" { + target = v } - }() + } + // Convert []byte channels to StreamMessage channels for backward compatibility + inMsg := make(chan engram.StreamMessage, DefaultChannelBufferSize) + outMsg := make(chan engram.StreamMessage, DefaultChannelBufferSize) + + // Use a derived context to coordinate converter goroutines and stream lifecycle + streamCtx, cancel := context.WithCancel(ctx) defer cancel() - conn, err := connectorDial(sessionCtx, endpoint, env) - if err != nil { - return err - } - if isDebugEnabled() { - logger.Debug("Transport connector session established", - slog.String("endpoint", endpoint), - slog.String("driver", normalizedDriver(ref)), - ) - } - defer func() { - if closeErr := conn.Close(); closeErr != nil { - logger.Warn("Failed to close transport connector", "error", closeErr) - } - }() + spawnBytesToMessageConverter(streamCtx, in, inMsg) + spawnMessageToBytesConverter(streamCtx, outMsg, out) - bufferSize := resolveChannelBufferSize(env) - in := make(chan engram.InboundMessage, bufferSize) - out := make(chan engram.StreamMessage, bufferSize) - controlRequests := controlRequestQueueFromContext(sessionCtx) - if controlRequests == nil { - controlRequests = make(chan *transportpb.ControlRequest, bufferSize) - } - - messageTimeout := resolveMessageTimeout(env) - opts := streamRuntimeOptions{ - messageTimeout: messageTimeout, - publishHeartbeatInterval: resolvePublishHeartbeatInterval(env, messageTimeout), - channelSendTimeout: resolveChannelSendTimeout(env), - controlHeartbeatInterval: resolveControlHeartbeatInterval(env), - packetDeduper: packetDeduperFromContext(sessionCtx), - controlRequests: controlRequests, - sendTracker: sendTracker, - startupHandshake: newControlStartupHandshake(), - } - if hangTimeout := resolveHangTimeout(env); hangTimeout > 0 { - opts.hangWatcher = newHangWatcher(sessionCtx, hangTimeout, cancel) - defer opts.hangWatcher.Stop() - } - - // Open a single bidirectional Data stream for both sending and receiving. - dataCtx, dataCancel := context.WithCancel(sessionCtx) - defer dataCancel() - dataCtx = metadata.NewOutgoingContext(dataCtx, metadata.Pairs( - coretransport.ProtocolMetadataKey, coretransport.ProtocolVersion, - )) - dataStream, err := conn.Client().Data(dataCtx) - if err != nil { - return fmt.Errorf("data stream open failed: %w", err) - } + // Run the metadata-aware stream with coordinated context + err := StreamToWithMetadata(streamCtx, target, inMsg, outMsg) + // Ensure the outbound converter terminates even if the underlying stream never closes it + close(outMsg) + // Cancel converters waiting on context/selects + cancel() + return err +} - g, gctx := errgroup.WithContext(sessionCtx) - var streamCompletedGracefully atomic.Bool - var primarySessionErr struct { - sync.Mutex - err error - } - recordPrimarySessionErr := func(candidate error) { - if candidate == nil { - return - } - if errors.Is(candidate, context.Canceled) && !errors.Is(candidate, io.EOF) && - !errors.Is(candidate, errTimedSendCleanupTimeout) && - !errors.Is(candidate, errControlStartupHandshakeTimeout) { - return - } - if errors.Is(candidate, context.DeadlineExceeded) { - return - } - primarySessionErr.Lock() - defer primarySessionErr.Unlock() - if primarySessionErr.err == nil { - primarySessionErr.err = candidate +// attemptResult encapsulates the outcome of a streaming attempt +type attemptResult struct { + err error + success bool // true if attempt completed successfully +} + +// clientRunConfig captures shared parameters for a single streaming attempt. +type clientRunConfig struct { + ctx context.Context + target string + dialOpts []grpc.DialOption + mdPairs []string + buffer *clientMessageBuffer + in <-chan engram.StreamMessage + out chan<- engram.StreamMessage +} + +// makeRPCContext constructs a per-attempt RPC context using env-configured timeouts. +func makeRPCContext(callCtx context.Context) (context.Context, context.CancelFunc) { + if v := os.Getenv("BUBU_GRPC_STREAM_TIMEOUT"); v != "" { + if d, err := time.ParseDuration(v); err == nil && d > 0 { + return context.WithTimeout(callCtx, d) } + return context.WithCancel(callCtx) } - loadPrimarySessionErr := func() error { - primarySessionErr.Lock() - defer primarySessionErr.Unlock() - return primarySessionErr.err + defaultD := 30 * time.Second + if v := os.Getenv("BUBU_HUB_PER_MESSAGE_TIMEOUT"); v != "" { + if d, err := time.ParseDuration(v); err == nil && d > 0 { + defaultD = d + } } - handler := controlDirectiveHandler(e) - g.Go(func() error { - logger.Debug("SDK: Starting Control loop goroutine") - err := connectorControlLoop(gctx, conn.Client(), ref, handler, opts) - recordPrimarySessionErr(err) - cancel() - return err - }) - if err := opts.startupHandshake.Wait(sessionCtx, messageTimeout); err != nil { - cancel() - groupErr := g.Wait() - if groupErr != nil && !errors.Is(groupErr, context.Canceled) { - return groupErr + return context.WithTimeout(callCtx, defaultD) +} + +// flushClientBuffer attempts to flush any buffered messages before normal send path resumes. +func flushClientBuffer( + ctx context.Context, + cfg *clientRunConfig, + stream bobravozgrpcproto.HubService_ProcessClient, + sendMu *sync.Mutex, +) { + if flushed := cfg.buffer.flush(ctx, func(p *bobravozgrpcproto.DataPacket) error { + sendMu.Lock() + defer sendMu.Unlock() + return stream.Send(&bobravozgrpcproto.ProcessRequest{Packet: p}) + }); flushed > 0 { + metrics.RecordClientBufferFlush(ctx, flushed) + } +} + +// clientHeartbeatLoop periodically sends heartbeats on the client stream. +func clientHeartbeatLoop( + gCtx context.Context, + stream bobravozgrpcproto.HubService_ProcessClient, + sendMu *sync.Mutex, +) error { + interval := getHeartbeatInterval() + ticker := time.NewTicker(interval) + defer ticker.Stop() + for { + select { + case <-gCtx.Done(): + return gCtx.Err() + case <-ticker.C: + heartbeat := createHeartbeat() + select { + case <-gCtx.Done(): + return gCtx.Err() + default: + sendMu.Lock() + err := stream.Send(&bobravozgrpcproto.ProcessRequest{Packet: heartbeat}) + sendMu.Unlock() + if err != nil { + return fmt.Errorf("heartbeat send failed: %w", err) + } + } } - return err } - g.Go(func() error { - defer close(in) - logger.Debug("SDK: Starting Data recv loop goroutine") - recvErr := connectorDataRecvLoop(gctx, dataStream, in, opts) - if recvErr == nil { - // A closed Data recv stream with no session cancellation leaves the - // session half-open (control/send loops still running). Surface this - // as a retriable session failure so reconnect logic can recover. - if err := gctx.Err(); err != nil { - cancel() - return err +} + +// clientSenderLoop forwards messages from cfg.in to the gRPC stream with buffering semantics. +func clientSenderLoop( + gCtx context.Context, + cfg *clientRunConfig, + stream bobravozgrpcproto.HubService_ProcessClient, + sendMu *sync.Mutex, +) error { + for { + select { + case <-gCtx.Done(): + return stream.CloseSend() + case msg, ok := <-cfg.in: + if !ok { + return stream.CloseSend() + } + packet, err := buildPacketFromMsg(msg) + if err != nil { + return fmt.Errorf("convert message: %w", err) + } + if cfg.buffer.flush(gCtx, func(p *bobravozgrpcproto.DataPacket) error { + sendMu.Lock() + defer sendMu.Unlock() + return stream.Send(&bobravozgrpcproto.ProcessRequest{Packet: p}) + }) > 0 { + // Preserve existing metric behavior + metrics.RecordClientBufferFlush(gCtx, 0) + } + sendMu.Lock() + err = stream.Send(&bobravozgrpcproto.ProcessRequest{Packet: packet}) + sendMu.Unlock() + if err != nil { + if cfg.buffer.add(packet) { + metrics.RecordClientBufferAdded(gCtx, "transient_error") + } + return fmt.Errorf("send on stream: %w", err) } - cancel() - err := fmt.Errorf("data stream recv failed: %w", io.EOF) - recordPrimarySessionErr(err) - return err } - recordPrimarySessionErr(recvErr) - cancel() - return recvErr - }) - g.Go(func() error { - logger.Debug("SDK: Starting Data send loop goroutine") - err := connectorDataSendLoop(gctx, dataStream, out, opts) - recordPrimarySessionErr(err) + } +} + +// buildMessageFromPacket converts a DataPacket to StreamMessage bytes. +func buildMessageFromPacket(p *bobravozgrpcproto.DataPacket) (engram.StreamMessage, error) { + payloadBytes, err := protojson.Marshal(p.Payload) + if err != nil { + return engram.StreamMessage{}, fmt.Errorf("marshal payload from stream: %w", err) + } + var inputsBytes []byte + if p.Inputs != nil { + inputsBytes, err = protojson.Marshal(p.Inputs) if err != nil { - cancel() - } - return err - }) - g.Go(func() error { - defer close(out) - logger.Debug("SDK: Starting Engram Stream goroutine - calling e.Stream()") - err := callWithPanicRecoveryNoValue("streaming engram Stream", func() error { - return e.Stream(gctx, in, out) - }) - recordPrimarySessionErr(err) - if err == nil { - streamCompletedGracefully.Store(true) + return engram.StreamMessage{}, fmt.Errorf("marshal inputs from stream: %w", err) } - cancel() - logger.Debug("SDK: Engram Stream goroutine exited", "error", err) - return err - }) + } + return engram.StreamMessage{Metadata: p.Metadata, Payload: payloadBytes, Inputs: inputsBytes}, nil +} - logger.Debug("SDK: All goroutines started, waiting for completion") - err = g.Wait() - logger.Debug("SDK: All goroutines completed", "error", err) - if streamCompletedGracefully.Load() && errors.Is(err, context.Canceled) { +// offerStreamMessageWithBackpressure tries to send with a timeout to avoid deadlocks. +func offerStreamMessageWithBackpressure( + gCtx context.Context, + ch chan<- engram.StreamMessage, + msg engram.StreamMessage, +) error { + select { + case <-gCtx.Done(): + return gCtx.Err() + case ch <- msg: return nil - } - if errors.Is(err, context.Canceled) { - if primary := loadPrimarySessionErr(); primary != nil { - return primary + default: + timer := time.NewTimer(getBackpressureTimeout()) + defer func() { + if !timer.Stop() { + <-timer.C + } + }() + select { + case <-gCtx.Done(): + return gCtx.Err() + case ch <- msg: + return nil + case <-timer.C: + return fmt.Errorf("timeout delivering message to caller: output channel not drained") } } - return err } -func connectorDataRecvLoop( //nolint:gocyclo - ctx context.Context, - stream transportpb.TransportConnectorService_DataClient, - in chan<- engram.InboundMessage, - opts streamRuntimeOptions, +// clientReceiverLoop reads from gRPC and forwards to cfg.out, filtering heartbeats and monitoring hangs. +func clientReceiverLoop( + gCtx context.Context, + cfg *clientRunConfig, + stream bobravozgrpcproto.HubService_ProcessClient, ) error { - logger := LoggerFromContext(ctx) - logger.Debug("SDK: Data recv loop started, waiting for packets from connector") - reassembler := newDataChunkReassembler(defaultChunkReassemblyTTL, 0, 0) - defer reassembler.Stop() - deduper := opts.packetDeduper - if deduper == nil { - deduper = newPacketDeduper(defaultPacketDedupeEntries) - } - deduper.StartSession() - packetCount := 0 + var lastRecvNano atomic.Int64 + lastRecvNano.Store(time.Now().UnixNano()) + hangTimeout := getHangTimeout() for { - // Do NOT use RecvWithTimeout here — Data streams can be idle for long - // stretches. A hard timeout kills the stream and cascades cancellations. - // Liveness is handled by gRPC keepalive and the optional hang watcher. + lastNano := lastRecvNano.Load() + if time.Since(time.Unix(0, lastNano)) > hangTimeout { + return fmt.Errorf("connection hang detected: no messages for %v", hangTimeout) + } resp, err := stream.Recv() if err == io.EOF { - logger.Debug("SDK: Data stream closed by connector", "packetsReceived", packetCount) return nil } if err != nil { - logger.Error("SDK: Data receive failed", "error", err, "packetsReceived", packetCount) - return fmt.Errorf("data receive failed: %w", err) + return fmt.Errorf("recv from stream: %w", err) } - // Heartbeat/keepalive packets have no frame — skip them. - if resp.GetFrame() == nil { - queueDownstreamDeliveryReceipt(ctx, opts, dataResponseToPublishRequest(resp)) - if opts.hangWatcher != nil { - opts.hangWatcher.Touch() - } + lastRecvNano.Store(time.Now().UnixNano()) + if isHeartbeat(resp.Packet.Metadata) { continue } - assembled, complete, err := reassembler.Add(dataResponseToPublishRequest(resp)) + msg, err := buildMessageFromPacket(resp.Packet) if err != nil { - logger.Error("SDK: Chunk reassembly failed", "error", err) return err } - if !complete { - if opts.hangWatcher != nil { - opts.hangWatcher.Touch() - } - continue - } - status, key, generation := deduper.Begin(assembled) //nolint:revive - switch status { - case packetDuplicatePending: - logger.Debug("SDK: Dropping duplicate packet still pending processing", "key", key) - if opts.hangWatcher != nil { - opts.hangWatcher.Touch() - } - continue - case packetDuplicateCompleted: - logger.Debug("SDK: Dropping completed duplicate packet from Data stream", "key", key) - queueDownstreamDeliveryReceipt(ctx, opts, assembled) - if opts.hangWatcher != nil { - opts.hangWatcher.Touch() - } - continue - case packetPendingOverflow: - return fmt.Errorf("%w: limit=%d", errPacketDeduperPendingOverflow, deduper.maxEntries) - } - packetCount++ - logger.Debug("SDK: Received packet from connector Data stream", "packetCount", packetCount) - msg, err := publishRequestToStreamMessage(assembled) - if err != nil { - logger.Error("SDK: Failed to translate packet", "error", err) + // Backpressure-aware deliver + if err := offerStreamMessageWithBackpressure(gCtx, cfg.out, msg); err != nil { return err } - if drop, reason := shouldDropSubscribeMessage(msg); drop { - logger.Debug("SDK: Dropping packet from Data stream", "reason", reason) - receipt := downstreamDeliveryReceiptControlRequest(assembled) - if receipt == nil || queueControlRequest(ctx, opts.controlRequests, receipt) { - deduper.Complete(key, generation) - } else { - deduper.Release(key, generation) - } - if opts.hangWatcher != nil { - opts.hangWatcher.Touch() - } - continue + } +} + +func runClientAttempt(cfg *clientRunConfig) attemptResult { + conn, err := grpc.NewClient(cfg.target, cfg.dialOpts...) + if err != nil { + return attemptResult{err: err} + } + defer func() { _ = conn.Close() }() + + // Outgoing context with metadata if available + callCtx := cfg.ctx + if len(cfg.mdPairs) > 0 { + callCtx = metadata.NewOutgoingContext(callCtx, metadata.Pairs(cfg.mdPairs...)) + } + + // RPC context per attempt with optional deadline + rpcCtx, rpcCancel := makeRPCContext(callCtx) + defer rpcCancel() + + client := bobravozgrpcproto.NewHubServiceClient(conn) + stream, err := client.Process(rpcCtx) + if err != nil { + return attemptResult{err: err} + } + + g, gCtx := errgroup.WithContext(rpcCtx) + var sendMu sync.Mutex + + g.Go(func() error { + <-gCtx.Done() + rpcCancel() + return nil + }) + + // Flush buffered messages accumulated from previous attempts + flushClientBuffer(rpcCtx, cfg, stream, &sendMu) + + // Heartbeat sender + g.Go(func() error { return clientHeartbeatLoop(gCtx, stream, &sendMu) }) + + // Sender: from user → gRPC + g.Go(func() error { return clientSenderLoop(gCtx, cfg, stream, &sendMu) }) + + // Receiver: gRPC → user + g.Go(func() error { return clientReceiverLoop(gCtx, cfg, stream) }) + + if err := g.Wait(); err != nil { + return attemptResult{err: err} + } + return attemptResult{success: true} +} + +// StreamToWithMetadata connects to a downstream gRPC server with full metadata and inputs support (client side). +// +// This function provides the full StreamMessage API, enabling: +// - Metadata propagation for tracing (StoryRunID, StepName, custom trace IDs) +// - Per-message dynamic configuration via the Inputs field (analogous to BUBU_INPUTS in batch mode) +// - End-to-end correlation across streaming pipeline steps +// +// The SDK automatically injects Hub metadata (storyrun-name, storyrun-namespace, current-step-id) +// from the execution context if available, enabling interop with the bobravoz Hub. +// +// The function implements: +// - Automatic reconnection on transient failures (Unavailable, ResourceExhausted, Aborted, DeadlineExceeded) +// - Exponential backoff with jitter (configurable via BUBU_GRPC_RECONNECT_BASE_BACKOFF and _MAX_BACKOFF) +// - Transparent heartbeat sending/filtering to detect connection hangs (BUBU_GRPC_HANG_TIMEOUT) +// - Optional TLS via BUBU_GRPC_CA_FILE (custom CA) or BUBU_GRPC_CLIENT_TLS=true (system roots) +// - Backpressure handling with timeouts (BUBU_GRPC_CHANNEL_SEND_TIMEOUT or BUBU_GRPC_MESSAGE_TIMEOUT) +// - Configurable message size limits (BUBU_GRPC_CLIENT_MAX_RECV_BYTES, BUBU_GRPC_CLIENT_MAX_SEND_BYTES) +// +// Blocks until the input channel is closed, context is canceled, or a permanent error occurs. +// Respects context cancellation for graceful shutdown. +// +// Example: +// +// in := make(chan engram.StreamMessage, 16) +// out := make(chan engram.StreamMessage, 16) +// +// go func() { +// defer close(in) +// in <- engram.StreamMessage{ +// Metadata: map[string]string{"trace-id": "abc123"}, +// Payload: []byte(`{"key": "value"}`), +// Inputs: []byte(`{"configKey": "configValue"}`), +// } +// }() +// +// go func() { +// for msg := range out { +// log.Printf("Received: %s (trace: %s)", msg.Payload, msg.Metadata["trace-id"]) +// } +// }() +// +// if err := sdk.StreamToWithMetadata(ctx, "downstream:50051", in, out); err != nil { +// return fmt.Errorf("streaming failed: %w", err) +// } +func StreamToWithMetadata( + ctx context.Context, + target string, + in <-chan engram.StreamMessage, + out chan<- engram.StreamMessage, +) error { + // Allow operator-provided env-based wiring when target is empty + if target == "" { + if v := os.Getenv("DOWNSTREAM_HOST"); v != "" { + target = v + } else if v := os.Getenv("UPSTREAM_HOST"); v != "" { + target = v } - logger.Debug("SDK: Translated packet, enqueueing for engram", "packetCount", packetCount) - if opts.hangWatcher != nil { - opts.hangWatcher.Touch() + } + // Dial options (sizes and TLS) + dialOpts, err := buildDialOptionsFromEnv(ctx) + if err != nil { + return err + } + + // Reconnection settings + baseBackoff, maxBackoff, maxRetries := parseReconnectSettingsFromEnv() + + // Attach required Hub metadata for interop if available. + mdPairs := attachHubMetadataPairs() + + // Bounded client buffer that persists across reconnect attempts + buffer := newClientMessageBufferWithContext(ctx) + + // Reconnect loop with a single attempt counter for backoff + attempt := 0 + for { + // Check context before attempting + if ctx.Err() != nil { + return ctx.Err() } - inbound := attachDownstreamProcessingReceipt(ctx, opts, engram.NewInboundMessage(msg), - assembled, deduper, key, generation) - delivered, err := enqueueStreamMessage(ctx, in, inbound, opts.channelSendTimeout) - if err != nil { - logger.Error("SDK: Failed to enqueue packet for engram", "error", err) - return err + + cfg := &clientRunConfig{ + ctx: ctx, + target: target, + dialOpts: dialOpts, + mdPairs: mdPairs, + buffer: buffer, + in: in, + out: out, } - if !delivered { - deduper.Release(key, generation) - logger.Debug("SDK: Dropped packet before engram consumption", "packetCount", packetCount) - continue + // Record a reconnect attempt for any iteration beyond the first attempt + if attempt > 0 { + metrics.RecordStreamReconnectAttempt(ctx) } - logger.Debug("SDK: Packet enqueued for engram successfully", "packetCount", packetCount) - } -} + result := runClientAttempt(cfg) -//nolint:lll,unparam -func queueDownstreamDeliveryReceipt(ctx context.Context, opts streamRuntimeOptions, req *transportpb.PublishRequest) bool { - return queueControlRequest(ctx, opts.controlRequests, downstreamDeliveryReceiptControlRequest(req)) -} + // Handle attempt result + if result.success { + return nil // Successful completion, no reconnect + } -func attachDownstreamProcessingReceipt( - ctx context.Context, - opts streamRuntimeOptions, - msg engram.InboundMessage, - req *transportpb.PublishRequest, - deduper *packetDeduper, - key string, - generation uint64, -) engram.InboundMessage { - receipt := downstreamDeliveryReceiptControlRequest(req) - if receipt == nil && key == "" { - return msg - } - return engram.BindProcessingReceipt(msg, func() { - if receipt == nil { - deduper.Complete(key, generation) - return + if result.err != nil && (!shouldRetry(result.err) || exceeded(attempt, maxRetries)) { + // Terminal failure after attempts + metrics.RecordStreamReconnectFailure(ctx) + return result.err } - if queueControlRequest(ctx, opts.controlRequests, receipt) { - deduper.Complete(key, generation) - return + + // Retryable error: check context before backoff + if ctx.Err() != nil { + return ctx.Err() } - // If the receipt cannot be queued, release the pending entry so a later - // replay can be delivered instead of being suppressed as already handled. - deduper.Release(key, generation) - }) + + // Apply backoff and retry + attempt++ + backoffSleep(ctx, attempt, baseBackoff, maxBackoff) + } } -func queueControlRequest(ctx context.Context, controlRequests chan *transportpb.ControlRequest, req *transportpb.ControlRequest) bool { //nolint:lll - if controlRequests == nil || req == nil { +// shouldRetry determines whether an error is transient and warrants a reconnect. +func shouldRetry(err error) bool { + if err == nil { return false } - if ctx == nil { - ctx = context.Background() + st, ok := status.FromError(err) + if !ok { + // Non-gRPC error: assume retryable for common network errors + msg := err.Error() + return strings.Contains(msg, "connection refused") || + strings.Contains(msg, "transport is closing") || + strings.Contains(msg, "deadline exceeded") } - select { - case <-ctx.Done(): - return false - default: - } - if safeSendControlRequest(ctx, controlRequests, req) { - return true - } - if ctx.Err() == nil { - LoggerFromContext(ctx).Debug("Dropping internal control request because control queue is unavailable", - "action", req.GetCustomAction()) - } - return false -} - -func safeSendControlRequest(ctx context.Context, controlRequests chan *transportpb.ControlRequest, req *transportpb.ControlRequest) (sent bool) { //nolint:lll - defer func() { - if recover() != nil { - sent = false - } - }() - select { - case controlRequests <- req: - return true - case <-ctx.Done(): - return false + switch st.Code() { + case codes.Unavailable, codes.ResourceExhausted, codes.Aborted, codes.DeadlineExceeded: + return true default: return false } } -func downstreamDeliveryReceiptControlRequest(req *transportpb.PublishRequest) *transportpb.ControlRequest { - if req == nil { - return nil - } - env := req.GetEnvelope() - if env == nil || env.GetSequence() == 0 { - return nil - } - streamID := strings.TrimSpace(env.GetStreamId()) - if streamID == "" { - return nil - } - dataReq := publishRequestToDataRequest(req) - if dataReq == nil { - return nil - } - return &transportpb.ControlRequest{ - CustomAction: downstreamDeliveryReceiptType, - Metadata: map[string]string{ - deliveryReceiptStreamIDKey: streamID, - deliveryReceiptSequenceKey: strconv.FormatUint(env.GetSequence(), 10), - deliveryReceiptPartitionKey: strings.TrimSpace(env.GetPartition()), - deliveryReceiptSizeBytesKey: strconv.Itoa(proto.Size(dataReq)), - }, - } -} - -type packetDeduper struct { - mu sync.Mutex - maxEntries int - order []string - completed map[string]struct{} - pending map[string]uint64 - generation uint64 -} - -func newPacketDeduper(maxEntries int) *packetDeduper { - if maxEntries <= 0 { - maxEntries = defaultPacketDedupeEntries - } - return &packetDeduper{ - maxEntries: maxEntries, - order: make([]string, 0, maxEntries), - completed: make(map[string]struct{}, maxEntries), - pending: make(map[string]uint64, maxEntries), - } -} - -func withPacketDeduperContext(ctx context.Context, deduper *packetDeduper) context.Context { - if ctx == nil || deduper == nil { - return ctx - } - return context.WithValue(ctx, packetDeduperContextKey{}, deduper) -} - -func packetDeduperFromContext(ctx context.Context) *packetDeduper { - if ctx == nil { - return nil - } - deduper, _ := ctx.Value(packetDeduperContextKey{}).(*packetDeduper) - return deduper -} - -func withControlRequestQueueContext(ctx context.Context, queue chan *transportpb.ControlRequest) context.Context { - if ctx == nil || queue == nil { - return ctx - } - return context.WithValue(ctx, controlRequestQueueContextKey{}, queue) -} - -func controlRequestQueueFromContext(ctx context.Context) chan *transportpb.ControlRequest { - if ctx == nil { - return nil - } - queue, _ := ctx.Value(controlRequestQueueContextKey{}).(chan *transportpb.ControlRequest) - return queue -} - -type packetDuplicateStatus int - -const ( - packetNew packetDuplicateStatus = iota - packetDuplicatePending - packetDuplicateCompleted - packetPendingOverflow -) - -func (d *packetDeduper) StartSession() { - if d == nil { - return - } - d.mu.Lock() - defer d.mu.Unlock() - d.generation++ - clear(d.pending) -} - -func (d *packetDeduper) Begin(req *transportpb.PublishRequest) (packetDuplicateStatus, string, uint64) { - if d == nil { - return packetNew, "", 0 - } - key := dedupeKeyForPacket(req) - if key == "" { - return packetNew, "", 0 - } - d.mu.Lock() - defer d.mu.Unlock() - generation := d.generation - if pendingGeneration, ok := d.pending[key]; ok && pendingGeneration == generation { - return packetDuplicatePending, key, generation - } - if _, ok := d.completed[key]; ok { - return packetDuplicateCompleted, key, generation - } - if d.maxEntries > 0 && len(d.pending) >= d.maxEntries { - return packetPendingOverflow, key, generation +func exceeded(attempt, maxRetries int) bool { + if maxRetries == 0 { + return false } - d.pending[key] = generation - return packetNew, key, generation + return attempt >= maxRetries } -func (d *packetDeduper) Complete(key string, generation uint64) { - if d == nil || key == "" { - return +func backoffSleep(ctx context.Context, attempt int, base, max time.Duration) { + // Exponential backoff with jitter, capped to prevent overflow + // Cap attempt to prevent integer overflow in bit shift operation + // (500ms << 30 would overflow int64) + safeAttempt := attempt + if safeAttempt > 30 { + safeAttempt = 30 } - d.mu.Lock() - defer d.mu.Unlock() - if pendingGeneration, ok := d.pending[key]; !ok || pendingGeneration != generation { - return + d := base << safeAttempt + if d > max || d < 0 { // Also check for negative (overflow wrap-around) + d = max } - delete(d.pending, key) - if _, ok := d.completed[key]; ok { + jitter := time.Duration(int64(d) / 5) + sleep := d - jitter + time.Duration(randInt63n(int64(2*jitter))) + t := time.NewTimer(sleep) + defer t.Stop() + select { + case <-ctx.Done(): return - } - if d.maxEntries > 0 && len(d.order) >= d.maxEntries { - evicted := d.order[0] - d.order = d.order[1:] - delete(d.completed, evicted) - } - d.order = append(d.order, key) - d.completed[key] = struct{}{} -} - -func (d *packetDeduper) Release(key string, generation uint64) { - if d == nil || key == "" { + case <-t.C: return } - d.mu.Lock() - defer d.mu.Unlock() - if pendingGeneration, ok := d.pending[key]; ok && pendingGeneration == generation { - delete(d.pending, key) - } } -func dedupeKeyForPacket(req *transportpb.PublishRequest) string { - if req == nil { - return "" +// buildPacketFromMsg converts a StreamMessage to the gRPC DataPacket. +func buildPacketFromMsg(msg engram.StreamMessage) (*bobravozgrpcproto.DataPacket, error) { + payload := &structpb.Struct{} + if err := protojson.Unmarshal(msg.Payload, payload); err != nil { + return nil, fmt.Errorf("unmarshal payload: %w", err) } - if key := dedupeKeyForEnvelope(req.GetEnvelope()); key != "" { - return "env|" + key - } - if key := dedupeKeyForMessageID(req.GetMetadata()); key != "" { - return "msg|" + key + var inputs *structpb.Struct + if len(msg.Inputs) > 0 { + inputs = &structpb.Struct{} + if err := protojson.Unmarshal(msg.Inputs, inputs); err != nil { + return nil, fmt.Errorf("unmarshal inputs: %w", err) + } } - return "" + return &bobravozgrpcproto.DataPacket{ + Metadata: msg.Metadata, + Payload: payload, + Inputs: inputs, + }, nil } -func dedupeKeyForEnvelope(env *transportpb.StreamEnvelope) string { - if env == nil { - return "" - } - streamID := strings.TrimSpace(env.GetStreamId()) - if streamID == "" || env.GetSequence() == 0 { - return "" - } - return streamID + "|" + strings.TrimSpace(env.GetPartition()) + "|" + strconv.FormatUint(env.GetSequence(), 10) +// spawnBytesToMessageConverter launches a goroutine that converts []byte to StreamMessage. +func spawnBytesToMessageConverter(ctx context.Context, in <-chan []byte, inMsg chan<- engram.StreamMessage) { + go func() { + defer close(inMsg) + for { + select { + case <-ctx.Done(): + return + case data, ok := <-in: + if !ok { + return + } + msg := engram.StreamMessage{Metadata: map[string]string{}, Payload: data, Inputs: nil} + select { + case <-ctx.Done(): + return + case inMsg <- msg: + } + } + } + }() } -func dedupeKeyForMessageID(metadata map[string]string) string { //nolint:revive - if len(metadata) == 0 { - return "" - } - return strings.TrimSpace(metadata[metadataEnvelopeMessageIDKey]) +// spawnMessageToBytesConverter launches a goroutine that converts StreamMessage to []byte. +func spawnMessageToBytesConverter(ctx context.Context, outMsg <-chan engram.StreamMessage, out chan<- []byte) { + go func() { + defer close(out) + for { + select { + case <-ctx.Done(): + return + case msg, ok := <-outMsg: + if !ok { + return + } + select { + case <-ctx.Done(): + return + case out <- msg.Payload: + } + } + } + }() } -func shouldDropSubscribeMessage(msg engram.StreamMessage) (bool, string) { - if isMessageHeartbeat(msg) { - return true, "heartbeat" //nolint:goconst +// buildDialOptionsFromEnv constructs grpc.DialOptions including message sizes and TLS based on environment variables. +func buildDialOptionsFromEnv(ctx context.Context) ([]grpc.DialOption, error) { + var dialOpts []grpc.DialOption + maxRecv := getEnvBytes("BUBU_GRPC_CLIENT_MAX_RECV_BYTES", DefaultMaxMessageSize) + if maxRecv > 0 { + dialOpts = append(dialOpts, grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(maxRecv))) } - if isMessageNoop(msg) { - return true, "noop" //nolint:goconst + maxSend := getEnvBytes("BUBU_GRPC_CLIENT_MAX_SEND_BYTES", DefaultMaxMessageSize) + if maxSend > 0 { + dialOpts = append(dialOpts, grpc.WithDefaultCallOptions(grpc.MaxCallSendMsgSize(maxSend))) } - if isMessageEmpty(msg) { - return true, "empty" - } - return false, "" -} -func isMessageHeartbeat(msg engram.StreamMessage) bool { - if msg.Metadata != nil && strings.EqualFold(strings.TrimSpace(msg.Metadata["bubu-heartbeat"]), "true") { - return true + var tlsConf *tls.Config + if caFile := os.Getenv("BUBU_GRPC_CA_FILE"); caFile != "" { + tlsConf = &tls.Config{} + if pem, err := os.ReadFile(caFile); err == nil { + pool := x509.NewCertPool() + if pool.AppendCertsFromPEM(pem) { + tlsConf.RootCAs = pool + } else { + LoggerFromContext(ctx).Warn("Failed to append CA certs; falling back to system roots", "caFile", caFile) + } + } else { + LoggerFromContext(ctx).Warn("Failed to read BUBU_GRPC_CA_FILE; falling back to system roots", "error", err) + } } - return strings.EqualFold(strings.TrimSpace(msg.Kind), engram.StreamMessageKindHeartbeat) -} - -func isMessageNoop(msg engram.StreamMessage) bool { - return strings.EqualFold(strings.TrimSpace(msg.Kind), engram.StreamMessageKindNoop) + certFile := os.Getenv("BUBU_GRPC_CLIENT_CERT_FILE") + keyFile := os.Getenv("BUBU_GRPC_CLIENT_KEY_FILE") + if certFile != "" && keyFile != "" { + if tlsConf == nil { + tlsConf = &tls.Config{} + } + if cert, err := tls.LoadX509KeyPair(certFile, keyFile); err == nil { + tlsConf.Certificates = []tls.Certificate{cert} + } else { + LoggerFromContext(ctx).Warn("Failed to load client certificate; proceeding without mTLS", "error", err) + } + } + if tlsConf != nil { + dialOpts = append(dialOpts, grpc.WithTransportCredentials(credentials.NewTLS(tlsConf))) + } else if os.Getenv("BUBU_GRPC_CLIENT_TLS") == "true" { + dialOpts = append(dialOpts, grpc.WithTransportCredentials(credentials.NewTLS(&tls.Config{}))) + } else { + if os.Getenv("BUBU_GRPC_REQUIRE_TLS") == "true" { + return nil, fmt.Errorf( + "TLS required by BUBU_GRPC_REQUIRE_TLS; " + + "set BUBU_GRPC_CLIENT_TLS=true or provide BUBU_GRPC_CA_FILE/BUBU_GRPC_CLIENT_CERT_FILE", + ) + } + LoggerFromContext(ctx).Warn( + "gRPC client using insecure transport; " + + "set BUBU_GRPC_CLIENT_TLS=true or provide BUBU_GRPC_CA_FILE", + ) + dialOpts = append(dialOpts, grpc.WithTransportCredentials(insecure.NewCredentials())) + } + return dialOpts, nil } -func isMessageEmpty(msg engram.StreamMessage) bool { - if msg.Audio != nil && len(msg.Audio.PCM) > 0 { - return false - } - if msg.Video != nil && len(msg.Video.Payload) > 0 { - return false - } - if msg.Binary != nil && len(msg.Binary.Payload) > 0 { - return false - } - if len(msg.Payload) > 0 || len(msg.Inputs) > 0 { - return false +// parseReconnectSettingsFromEnv parses backoff and retry settings for reconnect loop. +func parseReconnectSettingsFromEnv() (time.Duration, time.Duration, int) { + maxBackoff := 30 * time.Second + if v := os.Getenv("BUBU_GRPC_RECONNECT_MAX_BACKOFF"); v != "" { + if d, err := time.ParseDuration(v); err == nil && d > 0 { + maxBackoff = d + } } - if strings.TrimSpace(msg.Kind) != "" || strings.TrimSpace(msg.MessageID) != "" || !msg.Timestamp.IsZero() { - return false + baseBackoff := 500 * time.Millisecond + if v := os.Getenv("BUBU_GRPC_RECONNECT_BASE_BACKOFF"); v != "" { + if d, err := time.ParseDuration(v); err == nil && d > 0 { + baseBackoff = d + } } - if len(msg.Transports) > 0 || msg.Envelope != nil { - return false + maxRetries := 10 + if v := os.Getenv("BUBU_GRPC_RECONNECT_MAX_RETRIES"); v != "" { + if i, err := strconv.Atoi(v); err == nil && i >= 0 { + maxRetries = i + } } - return true + return baseBackoff, maxBackoff, maxRetries } -func connectorDataSendLoop( - ctx context.Context, - stream transportpb.TransportConnectorService_DataClient, - out <-chan engram.StreamMessage, - opts streamRuntimeOptions, -) error { - streamCtx, cancel := context.WithCancel(ctx) - defer cancel() - - // Heartbeat ticker keeps the data stream alive when the engram has - // nothing to output (e.g. VAD detected no speech, STT has no transcript). - // Without this the connector's RecvWithTimeout fires DeadlineExceeded - // after MessageTimeout (default 30 s), tearing down all streams. - heartbeatInterval := opts.publishHeartbeatInterval - if heartbeatInterval <= 0 { - heartbeatInterval = defaultPublishHeartbeatInterval - } - ticker := time.NewTicker(heartbeatInterval) - defer ticker.Stop() - - for { - select { - case <-streamCtx.Done(): - bestEffortCloseSend(streamCtx, "data", stream.CloseSend) - // Best-effort drain any buffered messages without waiting for the - // producer to close the channel. Cancellation must not block - // forever when an engram ignores context shutdown. - drainStreamMessages(out) - return streamCtx.Err() - case msg, ok := <-out: - if !ok { - if err := stream.CloseSend(); err != nil { - return fmt.Errorf("data stream close send failed: %w", err) - } - return nil - } - injectTraceContext(ctx, &msg) - req, err := streamMessageToPublishRequest(msg) - if err != nil { - return err - } - dataReq := publishRequestToDataRequest(req) - if err := callSendWithTimeout(streamCtx, opts.messageTimeout, cancel, "data send", opts.sendTracker, func() error { - return stream.Send(dataReq) - }); err != nil { - return fmt.Errorf("data stream send failed: %w", err) - } - if opts.hangWatcher != nil { - opts.hangWatcher.Touch() - } - ticker.Reset(heartbeatInterval) - case <-ticker.C: - hb := &transportpb.DataRequest{ - Metadata: map[string]string{"bubu-heartbeat": "true"}, - } - if err := callSendWithTimeout(streamCtx, opts.messageTimeout, cancel, "data heartbeat", opts.sendTracker, func() error { //nolint:lll - return stream.Send(hb) - }); err != nil { - return fmt.Errorf("data heartbeat send failed: %w", err) - } - if opts.hangWatcher != nil { - opts.hangWatcher.Touch() +// attachHubMetadataPairs collects standard hub metadata from the current execution context. +func attachHubMetadataPairs() []string { + if execCtxData, err := runtime.LoadExecutionContextData(); err == nil { + storyRunName := execCtxData.StoryInfo.StoryRunID + stepID := execCtxData.StoryInfo.StepName + ns := k8s.ResolvePodNamespace() + if storyRunName != "" && stepID != "" && ns != "" { + return []string{ + "storyrun-name", storyRunName, + "storyrun-namespace", ns, + "current-step-id", stepID, } } } + return nil } -func drainStreamMessages(out <-chan engram.StreamMessage) { +// heartbeatLoop periodically sends heartbeats on the stream; returns error when send fails. +func heartbeatLoop(ctx context.Context, stream bobravozgrpcproto.HubService_ProcessServer, sendMu *sync.Mutex) error { + interval := getHeartbeatInterval() + ticker := time.NewTicker(interval) + defer ticker.Stop() for { select { - case _, ok := <-out: - if !ok { - return + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + heartbeat := createHeartbeat() + select { + case <-ctx.Done(): + return ctx.Err() + default: + sendMu.Lock() + err := stream.Send(&bobravozgrpcproto.ProcessResponse{Packet: heartbeat}) + sendMu.Unlock() + if err != nil { + return fmt.Errorf("heartbeat send failed (connection dead): %w", err) + } } - default: - return } } } -func connectorControlLoop( //nolint:gocyclo - ctx context.Context, - client transportpb.TransportConnectorServiceClient, - ref bindingReference, - handler engram.ControlDirectiveHandler, - opts streamRuntimeOptions, +// readLoop reads from gRPC stream, filters heartbeats, forwards to input channel with backpressure, and monitors hangs. +func readLoop( + gctx context.Context, + stream bobravozgrpcproto.HubService_ProcessServer, + in chan<- engram.StreamMessage, ) error { - controlCtx, cancel := context.WithCancel(ctx) - defer cancel() - mdPairs := []string{ - coretransport.ProtocolMetadataKey, coretransport.ProtocolVersion, - } - if bindingID := controlBindingMetadataValue(ref); bindingID != "" { - mdPairs = append(mdPairs, controlBindingMetadataKey, bindingID) - } - controlCtx = metadata.NewOutgoingContext(controlCtx, metadata.Pairs(mdPairs...)) - stream, err := client.Control(controlCtx) - if err != nil { - return fmt.Errorf("control stream open failed: %w", err) - } - heartbeatInterval := opts.controlHeartbeatInterval - if heartbeatInterval <= 0 { - heartbeatInterval = defaultControlHeartbeatInterval - } - ticker := time.NewTicker(heartbeatInterval) - defer ticker.Stop() + defer close(in) - recvCh := startControlReceiver(controlCtx, stream) - controlRequests := opts.controlRequests + var lastRecvNano atomic.Int64 + lastRecvNano.Store(time.Now().UnixNano()) + hangTimeout := getHangTimeout() for { select { - case <-controlCtx.Done(): - bestEffortCloseSend(controlCtx, "control", stream.CloseSend) - return controlCtx.Err() - case req, ok := <-controlRequests: - if !ok { - controlRequests = nil - continue - } - if req == nil { - continue - } - if err := callSendWithTimeout(controlCtx, opts.messageTimeout, cancel, "control internal send", opts.sendTracker, func() error { //nolint:lll - return stream.Send(req) - }); err != nil { - return fmt.Errorf("control stream send failed: %w", err) - } - if opts.hangWatcher != nil { - opts.hangWatcher.Touch() - } - case msg, ok := <-recvCh: - if !ok { - if err := controlCtx.Err(); err != nil { - return err - } - return fmt.Errorf("control stream recv failed: %w", io.EOF) - } - if err := opts.startupHandshake.Observe(msg.response); err != nil { - return err - } - if opts.hangWatcher != nil { - opts.hangWatcher.Touch() - } - if err := processControlMessage(controlCtx, stream, handler, msg, opts.messageTimeout, cancel, opts.sendTracker); err != nil { //nolint:lll - if err == io.EOF { - if cerr := controlCtx.Err(); cerr != nil { - return cerr - } - return fmt.Errorf("control stream recv failed: %w", err) - } - return err - } - case <-ticker.C: - if err := sendControlHeartbeat(controlCtx, stream, opts.messageTimeout, cancel, opts.sendTracker); err != nil { - return err - } - if opts.hangWatcher != nil { - opts.hangWatcher.Touch() - } + case <-gctx.Done(): + return gctx.Err() + default: } - } -} -func bestEffortCloseSend(ctx context.Context, streamName string, closeSend func() error) { - if closeSend == nil { - return - } - if err := closeSend(); err != nil { - LoggerFromContext(ctx).Debug("best-effort close send failed during cancellation", - "stream", streamName, - "error", err) - } -} - -type controlMessage struct { - response *transportpb.ControlResponse - err error -} + // Hang detection window + lastNano := lastRecvNano.Load() + if time.Since(time.Unix(0, lastNano)) > hangTimeout { + ago := time.Since(time.Unix(0, lastNano)) + return fmt.Errorf( + "connection hang detected: no messages for %v (last recv: %v ago)", + hangTimeout, + ago, + ) + } -type controlStartupHandshake struct { - mu sync.Mutex - doneCh chan struct{} - readySeen bool - capabilityRequired bool - startupCapabilitySeen bool - err error - once sync.Once -} + req, err := stream.Recv() + if err == io.EOF { + return nil + } + if err != nil { + return fmt.Errorf("error receiving from gRPC stream: %w", err) + } -func newControlStartupHandshake() *controlStartupHandshake { - return &controlStartupHandshake{doneCh: make(chan struct{})} -} + lastRecvNano.Store(time.Now().UnixNano()) + if isHeartbeat(req.Packet.Metadata) { + continue + } -func (h *controlStartupHandshake) Observe(resp *transportpb.ControlResponse) error { - if h == nil || resp == nil { - return nil - } - typ := normalizeControlType(protoControlActionToType(resp.GetAction(), resp.GetCustomAction())) - h.mu.Lock() - defer h.mu.Unlock() - switch typ { - case "connector.ready": //nolint:goconst - h.readySeen = true - mode, err := coretransport.NormalizeStartupCapabilitiesMode(resp.GetMetadata()[coretransport.StartupCapabilitiesMetadataKey]) //nolint:lll + msg, err := buildMessageFromPacket(req.Packet) if err != nil { - h.err = fmt.Errorf("invalid connector.ready startup metadata: %w", err) - h.finishLocked() - return h.err + return fmt.Errorf("error marshaling message from gRPC stream: %w", err) } - h.capabilityRequired = mode == coretransport.StartupCapabilitiesRequired - h.finishLocked() - case "connector.capabilities": //nolint:goconst - h.startupCapabilitySeen = true - h.finishLocked() - } - return nil -} + metrics.RecordStreamMessage(gctx, "received") -func (h *controlStartupHandshake) finishLocked() { - if h.err != nil { - h.once.Do(func() { - close(h.doneCh) - }) - return - } - if !h.readySeen { - return - } - if h.capabilityRequired && !h.startupCapabilitySeen { - return + if err := offerStreamMessageWithBackpressure(gctx, in, msg); err != nil { + return fmt.Errorf("timeout sending to handler: %w", err) + } } - h.once.Do(func() { - close(h.doneCh) - }) } -func (h *controlStartupHandshake) Wait(ctx context.Context, timeout time.Duration) error { - if h == nil { - return nil - } - if timeout <= 0 { - timeout = defaultMessageTimeout - } - timer := time.NewTimer(timeout) - defer timer.Stop() - select { - case <-h.doneCh: - h.mu.Lock() - defer h.mu.Unlock() - return h.err - case <-ctx.Done(): - return ctx.Err() - case <-timer.C: - return fmt.Errorf("%w: timeout=%s", errControlStartupHandshakeTimeout, timeout) +// writeLoop converts messages from out and sends them to gRPC stream with serialization and metrics. +func writeLoop( + gctx context.Context, + stream bobravozgrpcproto.HubService_ProcessServer, + out <-chan engram.StreamMessage, + sendMu *sync.Mutex, +) error { + for { + select { + case <-gctx.Done(): + return gctx.Err() + case msg, ok := <-out: + if !ok { + return nil + } + packet, err := buildPacketFromMsg(msg) + if err != nil { + return fmt.Errorf("error converting StreamMessage to DataPacket: %w", err) + } + metrics.RecordStreamMessage(gctx, "sent") + sendMu.Lock() + sendErr := stream.Send(&bobravozgrpcproto.ProcessResponse{Packet: packet}) + sendMu.Unlock() + if sendErr != nil { + return fmt.Errorf("error sending data to gRPC stream: %w", sendErr) + } + } } } -func startControlReceiver( - ctx context.Context, - stream transportpb.TransportConnectorService_ControlClient, -) <-chan controlMessage { - recvCh := make(chan controlMessage, 1) +// startServerDrain launches a background drain of remaining messages on server shutdown. +func startServerDrain( + gctx context.Context, + out <-chan engram.StreamMessage, + stream bobravozgrpcproto.HubService_ProcessServer, + sendMu *sync.Mutex, +) <-chan struct{} { + drainComplete := make(chan struct{}) + logger := LoggerFromContext(gctx) go func() { - defer close(recvCh) + defer close(drainComplete) + drainCount := 0 + drainTimer := time.NewTimer(5 * time.Second) + defer drainTimer.Stop() for { - // Control streams can be idle for long stretches. Do NOT enforce a - // recv timeout here; gRPC keepalive + optional hang watcher handle liveness. - response, recvErr := stream.Recv() - if recvErr != nil { - sendControlMessage(ctx, recvCh, controlMessage{err: recvErr}) - return - } - if !sendControlMessage(ctx, recvCh, controlMessage{response: response}) { + select { + case msg, ok := <-out: + if !ok { + logger.Info("Drain complete", "messagesDrained", drainCount) + return + } + packet, err := buildPacketFromMsg(msg) + if err != nil { + logger.Error("Failed to convert message during drain, dropping", "error", err) + continue + } + sendMu.Lock() + sendErr := stream.Send(&bobravozgrpcproto.ProcessResponse{Packet: packet}) + sendMu.Unlock() + if sendErr != nil { + logger.Error("Failed to send message during drain", "error", sendErr) + return + } + drainCount++ + case <-drainTimer.C: + logger.Warn("Drain timeout exceeded, aborting", "drainedCount", drainCount) return } } }() - return recvCh + return drainComplete } -func sendControlMessage(ctx context.Context, recvCh chan<- controlMessage, msg controlMessage) bool { +// waitForDrainWithGrace waits for drain completion with an additional grace period. +func waitForDrainWithGrace( + gctx context.Context, + drainComplete <-chan struct{}, + grace time.Duration, + reason string, +) { + logger := LoggerFromContext(gctx) select { - case recvCh <- msg: - return true - case <-ctx.Done(): - return false + case <-drainComplete: + logger.Info("Graceful shutdown complete", "reason", reason) + case <-time.After(grace): + logger.Warn("Drain did not complete within grace period", "grace", grace, "reason", reason) } } -func resolveControlHeartbeatInterval(env envResolver) time.Duration { - if d := parsePositiveDuration(env.lookup(contracts.TransportHeartbeatIntervalEnv)); d > 0 { - return d - } - return defaultControlHeartbeatInterval -} +// drainOnShutdown coordinates graceful drain after context cancellation. +func drainOnShutdown( + gctx context.Context, + out <-chan engram.StreamMessage, + stream bobravozgrpcproto.HubService_ProcessServer, + sendMu *sync.Mutex, + shutdownTimeout time.Duration, + handlerDone <-chan struct{}, +) error { + select { + case <-gctx.Done(): + LoggerFromContext(gctx).Info("Graceful shutdown initiated, draining stream") + drainComplete := startServerDrain(gctx, out, stream, sendMu) -func resolvePublishHeartbeatInterval(env envResolver, messageTimeout time.Duration) time.Duration { - interval := defaultPublishHeartbeatInterval - if d := parsePositiveDuration(env.lookup(contracts.GRPCHeartbeatIntervalEnv)); d > 0 { - interval = d - } - if messageTimeout > 0 && interval >= messageTimeout { - clamped := messageTimeout - time.Nanosecond - if clamped <= 0 { - clamped = time.Nanosecond + handlerTimer := time.NewTimer(shutdownTimeout) + defer handlerTimer.Stop() + + select { + case <-drainComplete: + LoggerFromContext(gctx).Info("Graceful shutdown complete (drain finished)") + return nil + case <-handlerDone: + LoggerFromContext(gctx).Info("User handler completed, waiting for drain") + waitForDrainWithGrace(gctx, drainComplete, 5*time.Second, "handler exit") + return nil + case <-handlerTimer.C: + LoggerFromContext(gctx).Warn( + "User handler did not complete within timeout, waiting for drain", + "timeout", shutdownTimeout, + ) + waitForDrainWithGrace(gctx, drainComplete, 5*time.Second, "handler timeout") + return nil } - slog.Default().Warn("clamping publish heartbeat interval below message timeout", - "heartbeatInterval", interval, - "messageTimeout", messageTimeout, - "clamped", clamped, - "key", contracts.GRPCHeartbeatIntervalEnv, - ) - return clamped + case <-handlerDone: + return nil } - return interval -} - -func resolveChannelBufferSize(env envResolver) int { - return transportconnector.ChannelBufferSize(env, transportconnector.DefaultChannelBufferSize) -} - -func resolveMessageTimeout(env envResolver) time.Duration { - return transportconnector.MessageTimeout(env, defaultMessageTimeout) -} - -func resolveChannelSendTimeout(env envResolver) time.Duration { - return transportconnector.ChannelSendTimeout(env) } -func resolveHangTimeout(env envResolver) time.Duration { - return transportconnector.HangTimeout(env) +// randInt63n is a concurrency-safe pseudo-random helper for backoff jitter. +// Uses math/rand with a mutex-protected source to ensure proper randomness +// even when multiple goroutines reconnect simultaneously. +func randInt63n(n int64) int64 { + if n <= 0 { + return 0 + } + rngMu.Lock() + defer rngMu.Unlock() + return rng.Int63n(n) } - -func processControlMessage( - ctx context.Context, - stream transportpb.TransportConnectorService_ControlClient, - handler engram.ControlDirectiveHandler, - msg controlMessage, - timeout time.Duration, - cancel context.CancelFunc, - tracker *timedSendTracker, -) error { - if msg.err != nil { - if msg.err == io.EOF { - return io.EOF - } - return fmt.Errorf("control stream recv failed: %w", msg.err) - } - - response, handleErr := handler.HandleControlDirective(ctx, protoToControlDirective(msg.response)) - if handleErr != nil { - response = &engram.ControlDirective{ - Type: "error", - Metadata: map[string]string{ - "message": handleErr.Error(), - }, - } - } - if response == nil { - return nil - } - if err := callSendWithTimeout(ctx, timeout, cancel, "control send", tracker, func() error { - return stream.Send(controlDirectiveToProto(response)) - }); err != nil { - return fmt.Errorf("control stream send failed: %w", err) - } - return nil -} - -func sendControlHeartbeat( - ctx context.Context, - stream transportpb.TransportConnectorService_ControlClient, - timeout time.Duration, - cancel context.CancelFunc, - tracker *timedSendTracker, -) error { - if err := callSendWithTimeout(ctx, timeout, cancel, "control heartbeat", tracker, func() error { - return stream.Send(connectorHeartbeatDirective()) - }); err != nil { - return fmt.Errorf("control stream heartbeat failed: %w", err) - } - return nil -} - -func connectorHeartbeatDirective() *transportpb.ControlRequest { - return controlDirectiveToProto(&engram.ControlDirective{ - Type: "heartbeat", - Metadata: map[string]string{ - "ts": strconv.FormatInt(time.Now().UnixMilli(), 10), - }, - }) -} - -func controlDirectiveHandler[C any](e engram.StreamingEngram[C]) engram.ControlDirectiveHandler { - if handler, ok := any(e).(engram.ControlDirectiveHandler); ok { - return handler - } - return defaultControlDirectiveHandler{} -} - -type defaultControlDirectiveHandler struct{} - -func (defaultControlDirectiveHandler) HandleControlDirective( - ctx context.Context, - directive engram.ControlDirective, -) (*engram.ControlDirective, error) { - logger := LoggerFromContext(ctx) - typ := strings.ToLower(strings.TrimSpace(directive.Type)) - switch typ { - case "", "noop": - logger.Debug("Control directive noop; acknowledging", "type", directive.Type) - return ackControlDirective(directive.Type, false, "noop"), nil - case "connector.ready": - logger.Info("Connector startup ready received", "metadata", directive.Metadata) - return ackControlDirective(directive.Type, true, "startup"), nil - case "connector.capabilities": - logger.Info("Connector capability update received", "metadata", directive.Metadata) - return ackControlDirective(directive.Type, true, "capabilities"), nil - case "handoff.draining", "handoff.cutover", "handoff.ready": //nolint:goconst - logger.Info("Handoff directive received", "type", directive.Type, "metadata", directive.Metadata) - return ackControlDirective(directive.Type, true, "handoff"), nil - case "ack": //nolint:goconst - logger.Debug("Control acknowledgement received", "metadata", directive.Metadata) - return nil, nil - default: - logger.Debug("Control directive received with no handler registered; acknowledging as noop", "type", directive.Type) - return ackControlDirective(directive.Type, false, "no_control_handler"), nil - } -} - -func ackControlDirective(originalType string, handled bool, reason string) *engram.ControlDirective { - return &engram.ControlDirective{ - Type: "ack", - Metadata: map[string]string{ - "handled": strconv.FormatBool(handled), - "reason": reason, - "type": strings.TrimSpace(originalType), - }, - } -} - -func normalizeControlType(val string) string { - typ := strings.ToLower(strings.TrimSpace(val)) - if typ == "" { - return "unknown" - } - return typ -} - -func protoToControlDirective(d *transportpb.ControlResponse) engram.ControlDirective { - if d == nil { - return engram.ControlDirective{} - } - return engram.ControlDirective{ - Type: protoControlActionToType(d.GetAction(), d.GetCustomAction()), - Metadata: cloneStringMap(d.GetMetadata()), - } -} - -func controlDirectiveToProto(d *engram.ControlDirective) *transportpb.ControlRequest { - if d == nil { - return nil - } - action, custom := controlTypeToProto(d.Type) - return &transportpb.ControlRequest{ - Action: action, - CustomAction: custom, - Metadata: cloneStringMap(d.Metadata), - } -} - -func controlTypeToProto(typ string) (transportpb.ControlAction, string) { //nolint:gocyclo - switch normalizeControlType(typ) { - case "", "noop": - return transportpb.ControlAction_CONTROL_ACTION_NOOP, "" - case "start": - return transportpb.ControlAction_CONTROL_ACTION_START, "" - case "stop": - return transportpb.ControlAction_CONTROL_ACTION_STOP, "" - case "start-upstream": - return transportpb.ControlAction_CONTROL_ACTION_START_UPSTREAM, "" - case "stop-upstream": - return transportpb.ControlAction_CONTROL_ACTION_STOP_UPSTREAM, "" - case "start-downstream": - return transportpb.ControlAction_CONTROL_ACTION_START_DOWNSTREAM, "" - case "stop-downstream": - return transportpb.ControlAction_CONTROL_ACTION_STOP_DOWNSTREAM, "" - case "heartbeat": - return transportpb.ControlAction_CONTROL_ACTION_HEARTBEAT, "" - case "connector.ready": - return transportpb.ControlAction_CONTROL_ACTION_CONNECTOR_READY, "" - case "connector.capabilities": - return transportpb.ControlAction_CONTROL_ACTION_CONNECTOR_CAPABILITIES, "" - case "handoff.draining": - return transportpb.ControlAction_CONTROL_ACTION_HANDOFF_DRAINING, "" - case "handoff.cutover": - return transportpb.ControlAction_CONTROL_ACTION_HANDOFF_CUTOVER, "" - case "handoff.ready": - return transportpb.ControlAction_CONTROL_ACTION_HANDOFF_READY, "" - case "ack": - return transportpb.ControlAction_CONTROL_ACTION_ACK, "" - case "error": - return transportpb.ControlAction_CONTROL_ACTION_ERROR, "" - case "codec-select": - return transportpb.ControlAction_CONTROL_ACTION_CODEC_SELECT, "" - default: - return transportpb.ControlAction_CONTROL_ACTION_UNSPECIFIED, strings.TrimSpace(typ) - } -} - -func protoControlActionToType(action transportpb.ControlAction, custom string) string { //nolint:gocyclo - if strings.TrimSpace(custom) != "" { - return strings.TrimSpace(custom) - } - switch action { - case transportpb.ControlAction_CONTROL_ACTION_NOOP: - return "noop" - case transportpb.ControlAction_CONTROL_ACTION_START: - return "start" - case transportpb.ControlAction_CONTROL_ACTION_STOP: - return "stop" - case transportpb.ControlAction_CONTROL_ACTION_START_UPSTREAM: - return "start-upstream" - case transportpb.ControlAction_CONTROL_ACTION_STOP_UPSTREAM: - return "stop-upstream" - case transportpb.ControlAction_CONTROL_ACTION_START_DOWNSTREAM: - return "start-downstream" - case transportpb.ControlAction_CONTROL_ACTION_STOP_DOWNSTREAM: - return "stop-downstream" - case transportpb.ControlAction_CONTROL_ACTION_HEARTBEAT: - return "heartbeat" - case transportpb.ControlAction_CONTROL_ACTION_CONNECTOR_READY: - return "connector.ready" - case transportpb.ControlAction_CONTROL_ACTION_CONNECTOR_CAPABILITIES: - return "connector.capabilities" - case transportpb.ControlAction_CONTROL_ACTION_HANDOFF_DRAINING: - return "handoff.draining" - case transportpb.ControlAction_CONTROL_ACTION_HANDOFF_CUTOVER: - return "handoff.cutover" - case transportpb.ControlAction_CONTROL_ACTION_HANDOFF_READY: - return "handoff.ready" - case transportpb.ControlAction_CONTROL_ACTION_ACK: - return "ack" - case transportpb.ControlAction_CONTROL_ACTION_ERROR: - return "error" - case transportpb.ControlAction_CONTROL_ACTION_CODEC_SELECT: - return "codec-select" - default: - return "" - } -} - -func shouldSubscribeBinary(ref bindingReference) bool { - if ref.Info == nil { - return true - } - if len(ref.Info.GetAudioCodecs()) == 0 && len(ref.Info.GetVideoCodecs()) == 0 && len(ref.Info.GetBinaryTypes()) == 0 { - return true - } - return len(ref.Info.GetBinaryTypes()) > 0 -} - -func streamMessageToPublishRequest(msg engram.StreamMessage) (*transportpb.PublishRequest, error) { //nolint:gocyclo - if err := msg.Validate(); err != nil { - return nil, err - } - if msg.Audio != nil { - req := &transportpb.PublishRequest{ - Frame: &transportpb.PublishRequest_Audio{ - Audio: &transportpb.AudioFrame{ - Pcm: append([]byte(nil), msg.Audio.PCM...), - SampleRateHz: msg.Audio.SampleRateHz, - Channels: msg.Audio.Channels, - Codec: strings.TrimSpace(msg.Audio.Codec), - TimestampMs: durationToMillis(msg.Audio.Timestamp), - }, - }, - } - if err := applyStreamContextToPublishRequest(req, msg); err != nil { - return nil, err - } - return req, nil - } - if msg.Video != nil { - req := &transportpb.PublishRequest{ - Frame: &transportpb.PublishRequest_Video{ - Video: &transportpb.VideoFrame{ - Payload: append([]byte(nil), msg.Video.Payload...), - Codec: strings.TrimSpace(msg.Video.Codec), - Width: msg.Video.Width, - Height: msg.Video.Height, - TimestampMs: durationToMillis(msg.Video.Timestamp), - Raw: msg.Video.Raw, - }, - }, - } - if err := applyStreamContextToPublishRequest(req, msg); err != nil { - return nil, err - } - return req, nil - } - if !shouldBypassEnvelopeForRawBinary(msg) { - env := streamMessageEnvelope(msg) - if env != nil && (env.Kind != "" || len(env.Payload) > 0 || len(env.Inputs) > 0 || len(env.Transports) > 0) { - frame, err := envelope.ToBinaryFrame(env) - if err != nil { - return nil, fmt.Errorf("envelope marshal failed: %w", err) - } - if msg.Binary != nil { - frame.TimestampMs = durationToMillis(msg.Binary.Timestamp) - } - req := &transportpb.PublishRequest{ - Frame: &transportpb.PublishRequest_Binary{Binary: frame}, - } - if err := applyStreamContextToPublishRequest(req, msg); err != nil { - return nil, err - } - return req, nil - } - } - if msg.Binary != nil { - mimeType := strings.TrimSpace(msg.Binary.MimeType) - if mimeType == "" { - mimeType = octetStreamCodec - } - req := &transportpb.PublishRequest{ - Frame: &transportpb.PublishRequest_Binary{ - Binary: &transportpb.BinaryFrame{ - Payload: copyBytes(msg.Binary.Payload), - MimeType: mimeType, - TimestampMs: durationToMillis(msg.Binary.Timestamp), - }, - }, - } - if err := applyStreamContextToPublishRequest(req, msg); err != nil { - return nil, err - } - return req, nil - } - return nil, fmt.Errorf("stream message missing audio, video, or binary payload") -} - -func shouldBypassEnvelopeForRawBinary(msg engram.StreamMessage) bool { - if msg.Binary == nil { - return false - } - if isReservedEnvelopeMimeType(msg.Binary.MimeType) { - return false - } - if len(msg.Inputs) > 0 || len(msg.Transports) > 0 || msg.Envelope != nil { - return false - } - if len(msg.Payload) == 0 { - return true - } - return bytes.Equal(msg.Payload, msg.Binary.Payload) -} - -func isReservedEnvelopeMimeType(mimeType string) bool { - canonical, ok := canonicalBinaryMimeType(mimeType) - return ok && canonical == envelope.MIMEType -} - -func canonicalBinaryMimeType(mimeType string) (string, bool) { - mimeType = strings.TrimSpace(mimeType) - if mimeType == "" { - return "", false - } - parsed, _, err := mime.ParseMediaType(mimeType) - if err != nil { - return mimeType, false - } - return strings.ToLower(parsed), true -} - -func publishRequestToStreamMessage(req *transportpb.PublishRequest) (engram.StreamMessage, error) { - var msg engram.StreamMessage - switch frame := req.GetFrame().(type) { - case *transportpb.PublishRequest_Audio: - audio := frame.Audio - if audio == nil { - return engram.StreamMessage{}, fmt.Errorf("audio frame missing payload") - } - payload := append([]byte(nil), audio.GetPcm()...) - msg = engram.StreamMessage{ - Audio: &engram.AudioFrame{ - PCM: payload, - SampleRateHz: audio.GetSampleRateHz(), - Channels: audio.GetChannels(), - Codec: audio.GetCodec(), - Timestamp: durationFromMillis(audio.GetTimestampMs()), - }, - } - case *transportpb.PublishRequest_Video: - video := frame.Video - if video == nil { - return engram.StreamMessage{}, fmt.Errorf("video frame missing payload") - } - msg = engram.StreamMessage{ - Video: &engram.VideoFrame{ - Payload: append([]byte(nil), video.GetPayload()...), - Codec: video.GetCodec(), - Width: video.GetWidth(), - Height: video.GetHeight(), - Timestamp: durationFromMillis(video.GetTimestampMs()), - Raw: video.GetRaw(), - }, - } - case *transportpb.PublishRequest_Binary: - binary := frame.Binary - if binary == nil { - return engram.StreamMessage{}, fmt.Errorf("binary frame missing payload") - } - if isReservedEnvelopeMimeType(binary.GetMimeType()) { - env, err := envelope.FromBinaryFrame(&transportpb.BinaryFrame{ - Payload: binary.GetPayload(), - MimeType: envelope.MIMEType, - TimestampMs: binary.GetTimestampMs(), - }) - if err != nil { - return engram.StreamMessage{}, fmt.Errorf("envelope decode failed: %w", err) - } - populateMessageFromEnvelope(&msg, env) - if err := mergeRequestContextIntoStreamMessage(req, &msg); err != nil { - return engram.StreamMessage{}, err - } - if err := msg.Validate(); err != nil { - return engram.StreamMessage{}, fmt.Errorf("decoded stream message invalid: %w", err) - } - return msg, nil - } - payload := append([]byte(nil), binary.GetPayload()...) - mimeType := strings.TrimSpace(binary.GetMimeType()) - if mimeType == "" { - mimeType = octetStreamCodec - } - msg = engram.StreamMessage{ - Payload: payload, - Binary: &engram.BinaryFrame{ - // Keep payload and binary payload mirrored without a second full copy - // on the binary passthrough hot path. - Payload: payload, - MimeType: mimeType, - Timestamp: durationFromMillis(binary.GetTimestampMs()), - }, - } - default: - return engram.StreamMessage{}, fmt.Errorf("publish request missing frame payload") - } - if err := mergeRequestContextIntoStreamMessage(req, &msg); err != nil { - return engram.StreamMessage{}, err - } - if err := msg.Validate(); err != nil { - return engram.StreamMessage{}, fmt.Errorf("decoded stream message invalid: %w", err) - } - return msg, nil -} - -func durationToMillis(d time.Duration) uint64 { - if d <= 0 { - return 0 - } - return uint64(d / time.Millisecond) -} - -func durationFromMillis(ms uint64) time.Duration { - return time.Duration(ms) * time.Millisecond -} - -func enqueueStreamMessage( - ctx context.Context, - dst chan<- engram.InboundMessage, - msg engram.InboundMessage, - timeout time.Duration, -) (bool, error) { - if timeout <= 0 { - select { - case <-ctx.Done(): - return false, ctx.Err() - case dst <- msg: - return true, nil - } - } - timer := time.NewTimer(timeout) - defer timer.Stop() - select { - case <-ctx.Done(): - return false, ctx.Err() - case dst <- msg: - return true, nil - case <-timer.C: - // channelSendTimeout > 0 is a lossy configuration: drop the message and - // return a non-delivered result so the caller can release any pending - // dedupe state without emitting downstream delivery receipts. - LoggerFromContext(ctx).Warn("stream channel send timeout; dropping message", - "timeout", timeout) - return false, nil - } -} - -func callSendWithTimeout( - ctx context.Context, - timeout time.Duration, - cancel context.CancelFunc, - opName string, - tracker *timedSendTracker, - fn func() error, -) error { - select { - case <-ctx.Done(): - return ctx.Err() - default: - } - errCh := make(chan error, 1) - if err := startTimedSend(tracker, func() { - err := fn() - // Non-blocking send: if the caller already timed out and nobody reads - // errCh, the buffered channel absorbs this single write and the - // goroutine exits immediately instead of leaking. - select { - case errCh <- err: - default: - } - }); err != nil { - return fmt.Errorf("%s rejected: %w", opName, err) - } - if timeout <= 0 { - select { - case <-ctx.Done(): - return ctx.Err() - case err := <-errCh: - return err - } - } - timer := time.NewTimer(timeout) - defer timer.Stop() - select { - case <-ctx.Done(): - return ctx.Err() - case err := <-errCh: - return err - case <-timer.C: - if cancel != nil { - cancel() - } - return fmt.Errorf("%s timed out after %s", opName, timeout) - } -} - -const maxTimedSendGoroutines = 16 - -type timedSendTracker struct { - mu sync.Mutex - active int - done chan struct{} -} - -func newTimedSendTracker() *timedSendTracker { - done := make(chan struct{}) - close(done) - return &timedSendTracker{done: done} -} - -func startTimedSend(tracker *timedSendTracker, fn func()) error { - if fn == nil { - return nil - } - if tracker == nil { - go fn() - return nil - } - tracker.mu.Lock() - if tracker.active >= maxTimedSendGoroutines { - tracker.mu.Unlock() - return fmt.Errorf( - "too many in-flight send goroutines (%d)", - maxTimedSendGoroutines, - ) - } - tracker.mu.Unlock() - tracker.begin() - go func() { - defer tracker.end() - fn() - }() - return nil -} - -func (t *timedSendTracker) Wait(timeout time.Duration) bool { - if t == nil { - return true - } - done := t.currentDone() - if timeout <= 0 { - <-done - return true - } - timer := time.NewTimer(timeout) - defer timer.Stop() - select { - case <-done: - return true - case <-timer.C: - return false - } -} - -func (t *timedSendTracker) begin() { - t.mu.Lock() - defer t.mu.Unlock() - if t.active == 0 { - t.done = make(chan struct{}) - } - t.active++ -} - -func (t *timedSendTracker) end() { - t.mu.Lock() - defer t.mu.Unlock() - if t.active <= 0 { - return - } - t.active-- - if t.active == 0 { - close(t.done) - } -} - -func (t *timedSendTracker) currentDone() <-chan struct{} { - t.mu.Lock() - defer t.mu.Unlock() - if t.done == nil { - done := make(chan struct{}) - close(done) - t.done = done - } - return t.done -} - -func waitForTimedSendCleanup(logger *slog.Logger, tracker *timedSendTracker, timeout time.Duration) error { - if tracker == nil { - return nil - } - if tracker.Wait(timeout) { - return nil - } - if logger != nil { - logger.Warn("Timed send worker cleanup exceeded timeout", "timeout", timeout) - } - return fmt.Errorf("%w: timeout=%s", errTimedSendCleanupTimeout, timeout) -} - -func nextReconnectBackoff(current time.Duration, max time.Duration) time.Duration { - backoff := current - if backoff <= 0 { - backoff = time.Second - } - if max > 0 { - backoff *= 2 - if backoff > max { - return max - } - return backoff - } - if backoff < defaultReconnectMaxBackoff { - backoff *= 2 - if backoff > defaultReconnectMaxBackoff { - return defaultReconnectMaxBackoff - } - } - return backoff -} - -func jitterReconnectDelay(wait time.Duration, max time.Duration) time.Duration { - if wait <= 0 { - return 0 - } - jittered := time.Duration(float64(wait) * (reconnectJitterMinMultiplier + reconnectJitterFloat64()*reconnectJitterSpanMultiplier)) //nolint:lll - if jittered <= 0 { - jittered = time.Millisecond - } - if max > 0 && jittered > max { - return max - } - return jittered -} - -func sleepWithContext(ctx context.Context, wait time.Duration) error { - if wait <= 0 { - return nil - } - timer := time.NewTimer(wait) - defer timer.Stop() - select { - case <-ctx.Done(): - return ctx.Err() - case <-timer.C: - return nil - } -} - -func isRetriableTransportSessionError(err error) bool { - if err == nil { - return false - } - if errors.Is(err, errTimedSendCleanupTimeout) { - return false - } - if errors.Is(err, errControlStartupHandshakeTimeout) { - return false - } - if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { - return false - } - switch status.Code(err) { - case codes.InvalidArgument, codes.PermissionDenied, codes.Unauthenticated, codes.Unimplemented, codes.FailedPrecondition: //nolint:lll - return false - } - msg := strings.ToLower(err.Error()) - for _, marker := range []string{ - "transport binding missing", - "transport connector endpoint empty", - "transport binding missing endpoint", - "protocol mismatch", - } { - if strings.Contains(msg, marker) { - return false - } - } - return true -} - -type hangWatcher struct { - timeout time.Duration - cancel context.CancelFunc - resetCh chan struct{} - stopCh chan struct{} - once sync.Once - logger *slog.Logger -} - -func newHangWatcher(ctx context.Context, timeout time.Duration, cancel context.CancelFunc) *hangWatcher { - if timeout <= 0 || cancel == nil { - return nil - } - hw := &hangWatcher{ - timeout: timeout, - cancel: cancel, - resetCh: make(chan struct{}, 1), - stopCh: make(chan struct{}), - logger: LoggerFromContext(ctx), - } - go hw.loop() - return hw -} - -func (w *hangWatcher) loop() { - timer := time.NewTimer(w.timeout) - defer timer.Stop() - for { - select { - case <-timer.C: - if w.logger != nil { - w.logger.Error("Transport hang timeout triggered", "timeout", w.timeout) - } - w.cancel() - return - case <-w.resetCh: - if !timer.Stop() { - select { - case <-timer.C: - default: - } - } - timer.Reset(w.timeout) - case <-w.stopCh: - if !timer.Stop() { - select { - case <-timer.C: - default: - } - } - return - } - } -} - -func (w *hangWatcher) Touch() { - if w == nil { - return - } - // Drain any pending signal so the next send always succeeds. - // This prevents burst Touch() calls from silently dropping resets. - select { - case <-w.resetCh: - default: - } - select { - case <-w.stopCh: - case w.resetCh <- struct{}{}: - default: - } -} - -func (w *hangWatcher) Stop() { - if w == nil { - return - } - w.once.Do(func() { - close(w.stopCh) - }) -} - -func loadStreamingExecutionContext[C any](ctx context.Context) (C, *engram.Secrets, error) { - var zeroConfig C - - execCtxData, err := runtime.LoadExecutionContextData() - if err != nil { - return zeroConfig, nil, fmt.Errorf("failed to load execution context: %w", err) - } - logExecutionContextDebug(LoggerFromContext(ctx), execCtxData) - - sm, err := storage.SharedManager(ctx) - if err != nil { - return zeroConfig, nil, fmt.Errorf("failed to create storage manager: %w", err) - } - configMap, err := hydrateConfig(ctx, sm, execCtxData.Config, execCtxData.CELContext) - if err != nil { - return zeroConfig, nil, fmt.Errorf("failed to hydrate config: %w", err) - } - config, err := runtime.UnmarshalFromMap[C](configMap) - if err != nil { - return zeroConfig, nil, fmt.Errorf("failed to unmarshal config: %w", err) - } - - secrets, err := engram.NewSecretsWithError(ctx, execCtxData.Secrets) - if err != nil { - return zeroConfig, nil, fmt.Errorf("failed to expand secrets: %w", err) - } - return config, secrets, nil -} - -func normalizedDriver(ref bindingReference) string { - if ref.Info == nil { - return "" - } - return strings.ToLower(strings.TrimSpace(ref.Info.GetDriver())) -} - -func controlBindingMetadataValue(ref bindingReference) string { - name := strings.TrimSpace(ref.Name) - if name == "" { - return "" - } - namespace := strings.TrimSpace(ref.Namespace) - if namespace == "" { - return name - } - return namespace + "/" + name -} - -// publishRequestToDataRequest converts the internal PublishRequest representation -// into a DataRequest for the Data bidirectional stream. -func publishRequestToDataRequest(req *transportpb.PublishRequest) *transportpb.DataRequest { - if req == nil { - return nil - } - dr := &transportpb.DataRequest{ - Metadata: req.GetMetadata(), - Payload: req.GetPayload(), - Inputs: req.GetInputs(), - Transports: req.GetTransports(), - Envelope: req.GetEnvelope(), - } - switch f := req.GetFrame().(type) { - case *transportpb.PublishRequest_Audio: - dr.Frame = &transportpb.DataRequest_Audio{Audio: f.Audio} - case *transportpb.PublishRequest_Video: - dr.Frame = &transportpb.DataRequest_Video{Video: f.Video} - case *transportpb.PublishRequest_Binary: - dr.Frame = &transportpb.DataRequest_Binary{Binary: f.Binary} - } - return dr -} - -// dataResponseToPublishRequest converts a DataResponse from the Data stream -// into a PublishRequest so the existing chunk reassembler and message -// translation pipeline can be reused without modification. -func dataResponseToPublishRequest(resp *transportpb.DataResponse) *transportpb.PublishRequest { - if resp == nil { - return nil - } - pr := &transportpb.PublishRequest{ - Metadata: resp.GetMetadata(), - Payload: resp.GetPayload(), - Inputs: resp.GetInputs(), - Transports: resp.GetTransports(), - Envelope: resp.GetEnvelope(), - } - switch f := resp.GetFrame().(type) { - case *transportpb.DataResponse_Audio: - pr.Frame = &transportpb.PublishRequest_Audio{Audio: f.Audio} - case *transportpb.DataResponse_Video: - pr.Frame = &transportpb.PublishRequest_Video{Video: f.Video} - case *transportpb.DataResponse_Binary: - pr.Frame = &transportpb.PublishRequest_Binary{Binary: f.Binary} - } - return pr -} - -// newDataChunkReassembler is an alias for newPublishChunkReassembler. -// The reassembler operates on PublishRequest (the internal representation) -// regardless of the wire type. -var newDataChunkReassembler = newPublishChunkReassembler diff --git a/stream_buffer_test.go b/stream_buffer_test.go new file mode 100644 index 0000000..7b5aecb --- /dev/null +++ b/stream_buffer_test.go @@ -0,0 +1,108 @@ +package sdk + +import ( + "context" + "fmt" + "testing" + + bobravozgrpcproto "github.com/bubustack/bobravoz-grpc/proto/v1" + "google.golang.org/protobuf/types/known/structpb" +) + +// makePacket is kept for potential future use in tests; currently unused. +// nolint:unused +func makePacket(meta string, sizeHint int) *bobravozgrpcproto.DataPacket { + // Use sizeHint only to vary payload size approximately + payload := map[string]any{"k": meta} + for i := 0; i < sizeHint; i++ { + payload["p"+string(rune('a'+(i%26)))] = i + } + // Build a minimal Struct; tests do not rely on exact wire size + s, _ := structpb.NewStruct(payload) + return &bobravozgrpcproto.DataPacket{ + Metadata: map[string]string{"id": meta}, + Payload: s, + } +} + +// Test that oversize messages are dropped by client buffer +func TestClientBufferAddOversizeDrops(t *testing.T) { + t.Setenv("BUBU_GRPC_CLIENT_BUFFER_MAX_MESSAGES", "10") + t.Setenv("BUBU_GRPC_CLIENT_BUFFER_MAX_BYTES", "64") // tiny budget + b := newClientMessageBuffer() + + bigStr := make([]byte, 128) + s, _ := structpb.NewStruct(map[string]any{"big": string(bigStr)}) + pkt := &bobravozgrpcproto.DataPacket{Payload: s} + if added := b.add(pkt); added { + t.Fatalf("expected oversize drop, got added") + } +} + +// Test that overflow is dropped when limits reached +func TestClientBufferOverflowDrops(t *testing.T) { + t.Setenv("BUBU_GRPC_CLIENT_BUFFER_MAX_MESSAGES", "2") + t.Setenv("BUBU_GRPC_CLIENT_BUFFER_MAX_BYTES", "1024") + b := newClientMessageBuffer() + + p1 := &bobravozgrpcproto.DataPacket{Metadata: map[string]string{"i": "1"}} + p2 := &bobravozgrpcproto.DataPacket{Metadata: map[string]string{"i": "2"}} + if !b.add(p1) || !b.add(p2) { + t.Fatalf("expected first two adds to succeed") + } + p3 := &bobravozgrpcproto.DataPacket{Metadata: map[string]string{"i": "3"}} + if b.add(p3) { + t.Fatalf("expected overflow drop on third add") + } +} + +// Test flush order and that buffer is emptied on success +func TestClientBufferFlushOrder(t *testing.T) { + t.Setenv("BUBU_GRPC_CLIENT_BUFFER_MAX_MESSAGES", "10") + t.Setenv("BUBU_GRPC_CLIENT_BUFFER_MAX_BYTES", "1048576") + b := newClientMessageBuffer() + + p1 := &bobravozgrpcproto.DataPacket{Metadata: map[string]string{"i": "1"}} + p2 := &bobravozgrpcproto.DataPacket{Metadata: map[string]string{"i": "2"}} + if !b.add(p1) || !b.add(p2) { + t.Fatalf("expected adds to succeed") + } + + var sent []*bobravozgrpcproto.DataPacket + flushed := b.flush(context.Background(), func(p *bobravozgrpcproto.DataPacket) error { + sent = append(sent, p) + return nil + }) + if flushed != 2 { + t.Fatalf("expected flushed=2, got %d", flushed) + } + if len(sent) != 2 || sent[0] != p1 || sent[1] != p2 { + t.Fatalf("unexpected send order: %+v", sent) + } +} + +// Test flush stops on error and retains remaining messages +func TestClientBufferFlushStopsOnError(t *testing.T) { + t.Setenv("BUBU_GRPC_CLIENT_BUFFER_MAX_MESSAGES", "10") + t.Setenv("BUBU_GRPC_CLIENT_BUFFER_MAX_BYTES", "1048576") + b := newClientMessageBuffer() + + p1 := &bobravozgrpcproto.DataPacket{Metadata: map[string]string{"i": "1"}} + p2 := &bobravozgrpcproto.DataPacket{Metadata: map[string]string{"i": "2"}} + p3 := &bobravozgrpcproto.DataPacket{Metadata: map[string]string{"i": "3"}} + _ = b.add(p1) + _ = b.add(p2) + _ = b.add(p3) + + count := 0 + flushed := b.flush(context.Background(), func(p *bobravozgrpcproto.DataPacket) error { + count++ + if count == 2 { + return fmt.Errorf("send error") + } + return nil + }) + if flushed != 1 { + t.Fatalf("expected flushed=1 before error, got %d", flushed) + } +} diff --git a/stream_chunks.go b/stream_chunks.go deleted file mode 100644 index 64f3ab3..0000000 --- a/stream_chunks.go +++ /dev/null @@ -1,294 +0,0 @@ -package sdk - -import ( - "fmt" - "strings" - "sync" - "time" - - transportpb "github.com/bubustack/tractatus/gen/go/proto/transport/v1" - "google.golang.org/protobuf/proto" -) - -const ( - defaultChunkReassemblyTTL = 2 * time.Minute - defaultChunkMaxInFlightChunks = 256 - defaultChunkMaxAssemblyChunks = 256 - defaultChunkMaxInFlightBytes = 64 * 1024 * 1024 - defaultChunkMaxChunkBytes = DefaultMaxMessageSize -) - -type publishChunkReassembler struct { - mu sync.Mutex - ttl time.Duration - sweepInterval time.Duration - maxChunks int - maxBytes int - maxChunkBytes int - maxAssembly int - inflightBytes int - assemblies map[string]*publishChunkAssembly - stopCh chan struct{} - doneCh chan struct{} -} - -type publishChunkAssembly struct { - lastSeen time.Time - expected uint32 - received uint32 - totalBytes uint32 - base *transportpb.PublishRequest - chunks [][]byte - receivedByte int -} - -func newPublishChunkReassembler(ttl time.Duration, maxChunks int, maxBytes int) *publishChunkReassembler { - if ttl <= 0 { - ttl = defaultChunkReassemblyTTL - } - if maxChunks <= 0 { - maxChunks = defaultChunkMaxInFlightChunks - } - if maxBytes <= 0 { - maxBytes = defaultChunkMaxInFlightBytes - } - reassembler := &publishChunkReassembler{ - ttl: ttl, - sweepInterval: chunkReassemblySweepInterval(ttl), - maxChunks: maxChunks, - maxBytes: maxBytes, - maxChunkBytes: defaultChunkMaxChunkBytes, - maxAssembly: defaultChunkMaxAssemblyChunks, - assemblies: make(map[string]*publishChunkAssembly), - stopCh: make(chan struct{}), - doneCh: make(chan struct{}), - } - go reassembler.runEvictionLoop() - return reassembler -} - -//nolint:gocyclo,lll -func (r *publishChunkReassembler) Add(req *transportpb.PublishRequest) (*transportpb.PublishRequest, bool, error) { - if r == nil || req == nil { - return nil, false, nil - } - env := req.GetEnvelope() - if !isChunkedEnvelope(env) { - return req, true, nil - } - if env == nil { - return nil, false, fmt.Errorf("chunked publish request missing envelope") - } - chunkID := strings.TrimSpace(env.GetChunkId()) - if chunkID == "" { - return nil, false, fmt.Errorf("chunked publish request missing chunk_id") - } - streamID := strings.TrimSpace(env.GetStreamId()) - if streamID == "" { - return nil, false, fmt.Errorf("chunked publish request missing stream_id") - } - count := env.GetChunkCount() - index := env.GetChunkIndex() - if count == 0 { - return nil, false, fmt.Errorf("chunked publish request missing chunk_count") - } - if index >= count { - return nil, false, fmt.Errorf("chunk_index %d out of range (count %d)", index, count) - } - if maxAssembly := r.maxAssembly; maxAssembly > 0 && count > uint32(maxAssembly) { - return nil, false, fmt.Errorf("chunk_count %d exceeds max chunks per assembly %d", count, maxAssembly) - } - - binary := req.GetBinary() - if binary == nil { - return nil, false, fmt.Errorf("chunked publish request missing binary frame") - } - payload := binary.GetPayload() - if maxChunk := r.maxChunkBytes; maxChunk > 0 && len(payload) > maxChunk { - return nil, false, fmt.Errorf("chunk payload %d exceeds max chunk size %d", len(payload), maxChunk) - } - if env.GetChunkBytes() > 0 && int(env.GetChunkBytes()) != len(payload) { - return nil, false, fmt.Errorf("chunk_bytes %d does not match payload size %d", env.GetChunkBytes(), len(payload)) - } - - key := chunkKey(streamID, env.GetPartition(), chunkID) - now := time.Now() - r.mu.Lock() - defer r.mu.Unlock() - r.evictExpiredLocked(now) - assembly := r.assemblies[key] - if assembly == nil { - if len(r.assemblies) >= r.maxChunks { - return nil, false, fmt.Errorf("too many chunk assemblies in flight") - } - if r.inflightBytes+len(payload) > r.maxBytes { - return nil, false, fmt.Errorf("chunk reassembly buffer exceeded (%d > %d)", r.inflightBytes+len(payload), r.maxBytes) - } - assembly = &publishChunkAssembly{ - lastSeen: now, - expected: count, - totalBytes: env.GetTotalBytes(), - base: proto.Clone(req).(*transportpb.PublishRequest), - chunks: make([][]byte, count), - } - r.assemblies[key] = assembly - } - - if assembly.expected != count { - return nil, false, fmt.Errorf("chunk_count mismatch for %s: %d != %d", chunkID, count, assembly.expected) - } - if assembly.chunks[index] != nil { - // Duplicate chunk; ignore. - assembly.lastSeen = now - return nil, false, nil - } - if r.inflightBytes+len(payload) > r.maxBytes { - return nil, false, fmt.Errorf("chunk reassembly buffer exceeded (%d > %d)", r.inflightBytes+len(payload), r.maxBytes) - } - assembly.chunks[index] = append([]byte(nil), payload...) - assembly.received++ - assembly.receivedByte += len(payload) - assembly.lastSeen = now - r.inflightBytes += len(payload) - - if assembly.received < assembly.expected { - return nil, false, nil - } - - assembled, err := reassemblePublishRequest(assembly) - if err != nil { - delete(r.assemblies, key) - r.inflightBytes -= assembly.receivedByte - return nil, false, err - } - delete(r.assemblies, key) - r.inflightBytes -= assembly.receivedByte - return assembled, true, nil -} - -func chunkReassemblySweepInterval(ttl time.Duration) time.Duration { - if ttl <= 0 { - return 0 - } - interval := ttl / 2 - if interval <= 0 { - return ttl - } - return interval -} - -func (r *publishChunkReassembler) runEvictionLoop() { - if r == nil { - return - } - defer close(r.doneCh) - if r.ttl <= 0 || r.sweepInterval <= 0 { - return - } - ticker := time.NewTicker(r.sweepInterval) - defer ticker.Stop() - for { - select { - case <-ticker.C: - r.evictExpired(time.Now()) - case <-r.stopCh: - return - } - } -} - -func (r *publishChunkReassembler) Stop() { - if r == nil { - return - } - select { - case <-r.doneCh: - return - default: - } - select { - case <-r.stopCh: - default: - close(r.stopCh) - } - <-r.doneCh -} - -func reassemblePublishRequest(assembly *publishChunkAssembly) (*transportpb.PublishRequest, error) { - if assembly == nil || assembly.base == nil { - return nil, fmt.Errorf("chunk assembly missing base request") - } - payloadLen := 0 - for i := uint32(0); i < assembly.expected; i++ { - chunk := assembly.chunks[i] - if chunk == nil { - return nil, fmt.Errorf("chunk %d missing", i) - } - payloadLen += len(chunk) - } - if assembly.totalBytes > 0 && payloadLen != int(assembly.totalBytes) { - return nil, fmt.Errorf("total_bytes %d does not match assembled payload %d", assembly.totalBytes, payloadLen) - } - payload := make([]byte, 0, payloadLen) - for _, chunk := range assembly.chunks { - payload = append(payload, chunk...) - } - cloned := proto.Clone(assembly.base).(*transportpb.PublishRequest) - binary := cloned.GetBinary() - if binary == nil { - return nil, fmt.Errorf("chunked publish request missing binary frame") - } - binary.Payload = payload - clearChunkFields(cloned.GetEnvelope()) - return cloned, nil -} - -func clearChunkFields(env *transportpb.StreamEnvelope) { - if env == nil { - return - } - env.ChunkId = "" - env.ChunkIndex = 0 - env.ChunkCount = 0 - env.ChunkBytes = 0 - env.TotalBytes = 0 -} - -func isChunkedEnvelope(env *transportpb.StreamEnvelope) bool { - if env == nil { - return false - } - return env.GetChunkId() != "" || env.GetChunkCount() > 0 || env.GetChunkIndex() > 0 || env.GetChunkBytes() > 0 || env.GetTotalBytes() > 0 //nolint:lll -} - -func chunkKey(streamID, partition, chunkID string) string { - if partition == "" { - return streamID + "|" + chunkID - } - return streamID + "|" + partition + "|" + chunkID -} - -func (r *publishChunkReassembler) evictExpired(now time.Time) { - if r == nil { - return - } - r.mu.Lock() - defer r.mu.Unlock() - r.evictExpiredLocked(now) -} - -func (r *publishChunkReassembler) evictExpiredLocked(now time.Time) { - if r == nil || r.ttl <= 0 { - return - } - for key, assembly := range r.assemblies { - if assembly == nil { - delete(r.assemblies, key) - continue - } - if now.Sub(assembly.lastSeen) > r.ttl { - delete(r.assemblies, key) - r.inflightBytes -= assembly.receivedByte - } - } -} diff --git a/stream_chunks_test.go b/stream_chunks_test.go deleted file mode 100644 index 7809060..0000000 --- a/stream_chunks_test.go +++ /dev/null @@ -1,360 +0,0 @@ -package sdk - -import ( - "fmt" - "testing" - "time" - - transportpb "github.com/bubustack/tractatus/gen/go/proto/transport/v1" - "github.com/stretchr/testify/require" -) - -func TestPublishChunkReassembler(t *testing.T) { - reassembler := newPublishChunkReassembler(time.Minute, 0, 0) - t.Cleanup(reassembler.Stop) - - baseEnv := &transportpb.StreamEnvelope{ - StreamId: "stream-1", - Sequence: 42, - Partition: "p0", - ChunkId: "chunk-1", - ChunkCount: 2, - } - - req1 := &transportpb.PublishRequest{ - Metadata: map[string]string{"k": "v"}, - Envelope: cloneStreamEnvelope(baseEnv), - Frame: &transportpb.PublishRequest_Binary{Binary: &transportpb.BinaryFrame{ - Payload: []byte("hello"), - MimeType: "application/octet-stream", - }}, - } - req1.Envelope.ChunkIndex = 0 - req1.Envelope.ChunkBytes = uint32(len(req1.GetBinary().GetPayload())) - - result, complete, err := reassembler.Add(req1) - require.NoError(t, err) - require.False(t, complete) - require.Nil(t, result) - - req2 := &transportpb.PublishRequest{ - Metadata: map[string]string{"k": "v"}, - Envelope: cloneStreamEnvelope(baseEnv), - Frame: &transportpb.PublishRequest_Binary{Binary: &transportpb.BinaryFrame{ - Payload: []byte("world"), - MimeType: "application/octet-stream", - }}, - } - req2.Envelope.ChunkIndex = 1 - req2.Envelope.ChunkBytes = uint32(len(req2.GetBinary().GetPayload())) - - result, complete, err = reassembler.Add(req2) - require.NoError(t, err) - require.True(t, complete) - require.NotNil(t, result) - - binary := result.GetBinary() - require.NotNil(t, binary) - require.Equal(t, []byte("helloworld"), binary.GetPayload()) - require.Equal(t, "application/octet-stream", binary.GetMimeType()) - - env := result.GetEnvelope() - require.NotNil(t, env) - require.Equal(t, "stream-1", env.GetStreamId()) - require.Equal(t, uint64(42), env.GetSequence()) - require.Equal(t, "p0", env.GetPartition()) - require.Empty(t, env.GetChunkId()) - require.Equal(t, uint32(0), env.GetChunkCount()) - require.Equal(t, uint32(0), env.GetChunkIndex()) -} - -func TestPublishChunkReassemblerRejectsExcessiveChunkCount(t *testing.T) { - reassembler := newPublishChunkReassembler(time.Minute, 0, 0) - t.Cleanup(reassembler.Stop) - req := &transportpb.PublishRequest{ - Envelope: &transportpb.StreamEnvelope{ - StreamId: "stream-1", - ChunkId: "chunk-1", - ChunkCount: uint32(defaultChunkMaxAssemblyChunks + 1), - ChunkIndex: 0, - ChunkBytes: 1, - }, - Frame: &transportpb.PublishRequest_Binary{Binary: &transportpb.BinaryFrame{ - Payload: []byte("a"), - }}, - } - - _, _, err := reassembler.Add(req) - require.ErrorContains(t, err, "exceeds max chunks per assembly") -} - -func TestPublishChunkReassemblerEnforcesByteLimitAcrossChunks(t *testing.T) { - reassembler := newPublishChunkReassembler(time.Minute, 0, 6) - t.Cleanup(reassembler.Stop) - - baseEnv := &transportpb.StreamEnvelope{ - StreamId: "stream-1", - ChunkId: "chunk-1", - ChunkCount: 2, - } - - req1 := &transportpb.PublishRequest{ - Envelope: cloneStreamEnvelope(baseEnv), - Frame: &transportpb.PublishRequest_Binary{Binary: &transportpb.BinaryFrame{ - Payload: []byte("four"), - }}, - } - req1.Envelope.ChunkIndex = 0 - req1.Envelope.ChunkBytes = uint32(len(req1.GetBinary().GetPayload())) - - _, complete, err := reassembler.Add(req1) - require.NoError(t, err) - require.False(t, complete) - - req2 := &transportpb.PublishRequest{ - Envelope: cloneStreamEnvelope(baseEnv), - Frame: &transportpb.PublishRequest_Binary{Binary: &transportpb.BinaryFrame{ - Payload: []byte("four"), - }}, - } - req2.Envelope.ChunkIndex = 1 - req2.Envelope.ChunkBytes = uint32(len(req2.GetBinary().GetPayload())) - - _, _, err = reassembler.Add(req2) - require.ErrorContains(t, err, "chunk reassembly buffer exceeded") -} - -func TestPublishChunkReassemblerDuplicateChunkIsIdempotent(t *testing.T) { - reassembler := newPublishChunkReassembler(time.Minute, 0, 0) - t.Cleanup(reassembler.Stop) - - req := &transportpb.PublishRequest{ - Envelope: &transportpb.StreamEnvelope{ - StreamId: "stream-1", - ChunkId: "chunk-dup", - ChunkCount: 2, - ChunkIndex: 0, - ChunkBytes: 5, - }, - Frame: &transportpb.PublishRequest_Binary{Binary: &transportpb.BinaryFrame{ - Payload: []byte("hello"), - }}, - } - - _, complete, err := reassembler.Add(req) - require.NoError(t, err) - require.False(t, complete) - - // Send same chunk again — should be no-op - _, complete, err = reassembler.Add(req) - require.NoError(t, err) - require.False(t, complete) - - // Inflight bytes should count only once - require.Equal(t, 5, reassembler.inflightBytes) -} - -func TestPublishChunkReassemblerMaxAssembliesInFlight(t *testing.T) { - reassembler := newPublishChunkReassembler(time.Minute, 0, 0) - t.Cleanup(reassembler.Stop) - - // Override maxChunks to a small value for testing - reassembler.maxChunks = 2 - - for i := range uint32(2) { - req := &transportpb.PublishRequest{ - Envelope: &transportpb.StreamEnvelope{ - StreamId: fmt.Sprintf("stream-%d", i), - ChunkId: fmt.Sprintf("chunk-%d", i), - ChunkCount: 2, - ChunkIndex: 0, - ChunkBytes: 1, - }, - Frame: &transportpb.PublishRequest_Binary{Binary: &transportpb.BinaryFrame{ - Payload: []byte("x"), - }}, - } - _, _, err := reassembler.Add(req) - require.NoError(t, err) - } - - // Third assembly should be rejected - overflow := &transportpb.PublishRequest{ - Envelope: &transportpb.StreamEnvelope{ - StreamId: "stream-overflow", - ChunkId: "chunk-overflow", - ChunkCount: 2, - ChunkIndex: 0, - ChunkBytes: 1, - }, - Frame: &transportpb.PublishRequest_Binary{Binary: &transportpb.BinaryFrame{ - Payload: []byte("x"), - }}, - } - _, _, err := reassembler.Add(overflow) - require.ErrorContains(t, err, "too many chunk assemblies in flight") -} - -func TestPublishChunkReassemblerMaxChunkBytes(t *testing.T) { - reassembler := newPublishChunkReassembler(time.Minute, 0, 0) - t.Cleanup(reassembler.Stop) - reassembler.maxChunkBytes = 3 - - req := &transportpb.PublishRequest{ - Envelope: &transportpb.StreamEnvelope{ - StreamId: "stream-1", - ChunkId: "chunk-big", - ChunkCount: 2, - ChunkIndex: 0, - ChunkBytes: 10, - }, - Frame: &transportpb.PublishRequest_Binary{Binary: &transportpb.BinaryFrame{ - Payload: []byte("toolarge!!"), - }}, - } - - _, _, err := reassembler.Add(req) - require.ErrorContains(t, err, "exceeds max chunk size") -} - -func TestPublishChunkReassemblerValidationErrors(t *testing.T) { - reassembler := newPublishChunkReassembler(time.Minute, 0, 0) - t.Cleanup(reassembler.Stop) - - tests := []struct { - name string - req *transportpb.PublishRequest - wantErr string - }{ - { - name: "nil request", - req: nil, - wantErr: "", - }, - { - name: "zero chunk_count", - req: &transportpb.PublishRequest{ - Envelope: &transportpb.StreamEnvelope{ - StreamId: "s", - ChunkId: "c", - ChunkCount: 0, - }, - Frame: &transportpb.PublishRequest_Binary{Binary: &transportpb.BinaryFrame{Payload: []byte("a")}}, - }, - wantErr: "missing chunk_count", - }, - { - name: "chunk_index out of range", - req: &transportpb.PublishRequest{ - Envelope: &transportpb.StreamEnvelope{ - StreamId: "s", - ChunkId: "c", - ChunkCount: 2, - ChunkIndex: 5, - }, - Frame: &transportpb.PublishRequest_Binary{Binary: &transportpb.BinaryFrame{Payload: []byte("a")}}, - }, - wantErr: "out of range", - }, - { - name: "missing binary frame", - req: &transportpb.PublishRequest{ - Envelope: &transportpb.StreamEnvelope{ - StreamId: "s", - ChunkId: "c", - ChunkCount: 2, - ChunkIndex: 0, - }, - }, - wantErr: "missing binary frame", - }, - { - name: "chunk_bytes mismatch", - req: &transportpb.PublishRequest{ - Envelope: &transportpb.StreamEnvelope{ - StreamId: "s", - ChunkId: "c", - ChunkCount: 2, - ChunkIndex: 0, - ChunkBytes: 999, - }, - Frame: &transportpb.PublishRequest_Binary{Binary: &transportpb.BinaryFrame{Payload: []byte("a")}}, - }, - wantErr: "does not match payload size", - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result, complete, err := reassembler.Add(tt.req) - if tt.wantErr == "" { - require.NoError(t, err) - require.Nil(t, result) - require.False(t, complete) - } else { - require.ErrorContains(t, err, tt.wantErr) - } - }) - } -} - -func TestPublishChunkReassemblerChunkCountMismatch(t *testing.T) { - reassembler := newPublishChunkReassembler(time.Minute, 0, 0) - t.Cleanup(reassembler.Stop) - - req1 := &transportpb.PublishRequest{ - Envelope: &transportpb.StreamEnvelope{ - StreamId: "s", - ChunkId: "c", - ChunkCount: 2, - ChunkIndex: 0, - ChunkBytes: 1, - }, - Frame: &transportpb.PublishRequest_Binary{Binary: &transportpb.BinaryFrame{Payload: []byte("a")}}, - } - _, _, err := reassembler.Add(req1) - require.NoError(t, err) - - req2 := &transportpb.PublishRequest{ - Envelope: &transportpb.StreamEnvelope{ - StreamId: "s", - ChunkId: "c", - ChunkCount: 3, // different count - ChunkIndex: 1, - ChunkBytes: 1, - }, - Frame: &transportpb.PublishRequest_Binary{Binary: &transportpb.BinaryFrame{Payload: []byte("b")}}, - } - _, _, err = reassembler.Add(req2) - require.ErrorContains(t, err, "chunk_count mismatch") -} - -func TestPublishChunkReassemblerEvictsStaleAssembliesWithoutNewChunks(t *testing.T) { - reassembler := newPublishChunkReassembler(20*time.Millisecond, 0, 0) - t.Cleanup(reassembler.Stop) - - req := &transportpb.PublishRequest{ - Envelope: &transportpb.StreamEnvelope{ - StreamId: "stream-1", - ChunkId: "stale-chunk", - ChunkCount: 2, - ChunkIndex: 0, - ChunkBytes: 4, - }, - Frame: &transportpb.PublishRequest_Binary{Binary: &transportpb.BinaryFrame{ - Payload: []byte("wait"), - }}, - } - - result, complete, err := reassembler.Add(req) - require.NoError(t, err) - require.False(t, complete) - require.Nil(t, result) - require.Len(t, reassembler.assemblies, 1) - require.Equal(t, len(req.GetBinary().GetPayload()), reassembler.inflightBytes) - - require.Eventually(t, func() bool { - reassembler.mu.Lock() - defer reassembler.mu.Unlock() - return len(reassembler.assemblies) == 0 && reassembler.inflightBytes == 0 - }, 250*time.Millisecond, 10*time.Millisecond) -} diff --git a/stream_connector_test.go b/stream_connector_test.go deleted file mode 100644 index 42c95e3..0000000 --- a/stream_connector_test.go +++ /dev/null @@ -1,1922 +0,0 @@ -package sdk - -import ( - "context" - "io" - "net" - "strconv" - "strings" - "testing" - "time" - - "github.com/bubustack/bubu-sdk-go/engram" - "github.com/bubustack/core/contracts" - coretransport "github.com/bubustack/core/runtime/transport" - transportconnector "github.com/bubustack/core/runtime/transport/connector" - "github.com/bubustack/tractatus/envelope" - transportpb "github.com/bubustack/tractatus/gen/go/proto/transport/v1" - "github.com/stretchr/testify/require" - "google.golang.org/grpc" - "google.golang.org/grpc/credentials/insecure" - "google.golang.org/grpc/metadata" - "google.golang.org/grpc/test/bufconn" - "google.golang.org/protobuf/proto" - "google.golang.org/protobuf/types/known/structpb" -) - -func TestStreamMessageToPublishRequestAudio(t *testing.T) { - msg := engram.StreamMessage{ - Kind: "speech", - MessageID: "audio-1", - Timestamp: time.UnixMilli(1234).UTC(), - Metadata: map[string]string{"storyRun": "sr-01"}, - Envelope: &transportpb.StreamEnvelope{ - StreamId: "stream-1", - Sequence: 42, - Partition: "p0", - }, - Audio: &engram.AudioFrame{ - PCM: []byte{0x00, 0x01}, - SampleRateHz: 16000, - Channels: 1, - Codec: "pcm16", - Timestamp: 10 * time.Millisecond, - }, - } - req, err := streamMessageToPublishRequest(msg) - require.NoError(t, err) - audio := req.GetAudio() - require.NotNil(t, audio) - require.Equal(t, uint64(10), audio.GetTimestampMs()) - require.Equal(t, int32(16000), audio.GetSampleRateHz()) - require.Equal(t, []byte{0x00, 0x01}, audio.GetPcm()) - meta := req.GetMetadata() - require.Equal(t, "sr-01", meta["storyRun"]) - require.Equal(t, "speech", meta[metadataEnvelopeKindKey]) - require.Equal(t, "audio-1", meta[metadataEnvelopeMessageIDKey]) - require.Equal(t, strconv.FormatInt(msg.Timestamp.UTC().UnixMilli(), 10), meta[metadataEnvelopeTimeKey]) - streamEnv := req.GetEnvelope() - require.NotNil(t, streamEnv) - require.Equal(t, "stream-1", streamEnv.GetStreamId()) - require.Equal(t, uint64(42), streamEnv.GetSequence()) - require.Equal(t, "p0", streamEnv.GetPartition()) -} - -func TestPublishRequestToStreamMessageAudio(t *testing.T) { - payload, err := structpb.NewStruct(map[string]any{"foo": "bar"}) - require.NoError(t, err) - inputs, err := structpb.NewStruct(map[string]any{"baz": 1}) - require.NoError(t, err) - req := &transportpb.PublishRequest{ - Metadata: map[string]string{ - metadataEnvelopeKindKey: "speech", - metadataEnvelopeMessageIDKey: "msg-123", - metadataEnvelopeTimeKey: strconv.FormatInt(25, 10), - "storyRun": "sr-55", - }, - Envelope: &transportpb.StreamEnvelope{ - StreamId: "stream-2", - Sequence: 7, - Partition: "us-east-1", - }, - Payload: payload, - Inputs: inputs, - Transports: []*transportpb.TransportDescriptor{ - {Name: "livekit", Kind: "media", Mode: "bi"}, - }, - Frame: &transportpb.PublishRequest_Audio{ - Audio: &transportpb.AudioFrame{ - Pcm: []byte{0x02, 0x03}, - SampleRateHz: 8000, - Channels: 1, - Codec: "g711", - TimestampMs: 25, - }, - }, - } - msg, err := publishRequestToStreamMessage(req) - require.NoError(t, err) - require.NotNil(t, msg.Audio) - require.Equal(t, []byte{0x02, 0x03}, msg.Audio.PCM) - require.Equal(t, 25*time.Millisecond, msg.Audio.Timestamp) - require.Equal(t, "speech", msg.Kind) - require.Equal(t, "msg-123", msg.MessageID) - require.Equal(t, time.UnixMilli(25).UTC(), msg.Timestamp) - require.Equal(t, map[string]string{"storyRun": "sr-55"}, msg.Metadata) - require.Equal(t, `{"foo":"bar"}`, string(msg.Payload)) - require.Equal(t, `{"baz":1}`, string(msg.Inputs)) - require.Len(t, msg.Transports, 1) - require.Equal(t, "livekit", msg.Transports[0].Name) - require.Equal(t, "media", msg.Transports[0].Kind) - require.NotNil(t, msg.Envelope) - require.Equal(t, "stream-2", msg.Envelope.StreamId) - require.Equal(t, uint64(7), msg.Envelope.Sequence) - require.Equal(t, "us-east-1", msg.Envelope.Partition) -} - -func TestPublishRequestToStreamMessageRejectsAudioWithoutPCM(t *testing.T) { - req := &transportpb.PublishRequest{ - Frame: &transportpb.PublishRequest_Audio{ - Audio: &transportpb.AudioFrame{ - SampleRateHz: 16000, - Channels: 1, - }, - }, - } - - msg, err := publishRequestToStreamMessage(req) - require.ErrorIs(t, err, engram.ErrInvalidStreamMessage) - require.Equal(t, engram.StreamMessage{}, msg) -} - -func TestStreamMessageToPublishRequestEnvelope(t *testing.T) { - msg := engram.StreamMessage{ - Kind: "data", - MessageID: "msg-1", - Timestamp: time.UnixMilli(123).UTC(), - Metadata: map[string]string{"storyRun": "abc"}, - Payload: []byte(`{"foo":"bar"}`), - Inputs: []byte(`{"baz":1}`), - Transports: []engram.TransportDescriptor{ - {Name: "default", Kind: "live"}, - }, - } - req, err := streamMessageToPublishRequest(msg) - require.NoError(t, err) - binary := req.GetBinary() - require.NotNil(t, binary) - require.Equal(t, envelope.MIMEType, binary.GetMimeType()) - env, err := envelope.FromBinaryFrame(binary) - require.NoError(t, err) - require.Equal(t, "data", env.Kind) - require.Equal(t, "msg-1", env.MessageID) - require.Equal(t, int64(123), env.TimestampMs) - require.Equal(t, "abc", env.Metadata["storyRun"]) - require.Equal(t, []byte(`{"foo":"bar"}`), []byte(env.Payload)) - require.Equal(t, []byte(`{"baz":1}`), []byte(env.Inputs)) - require.Len(t, env.Transports, 1) - require.Equal(t, "default", env.Transports[0].Name) -} - -func TestStreamMessageEnvelopeClonesTypedTransportConfig(t *testing.T) { - type nestedMap map[string]string - type nestedSlice []map[string]int - - msg := engram.StreamMessage{ - Transports: []engram.TransportDescriptor{ - { - Name: "primary", - Kind: "livekit", - Config: map[string]any{ - "labels": nestedMap{"room": "alpha"}, - "routes": nestedSlice{{"priority": 1}}, - }, - }, - }, - } - - env := streamMessageEnvelope(msg) - require.NotNil(t, env) - require.Len(t, env.Transports, 1) - - msg.Transports[0].Config["labels"].(nestedMap)["room"] = "beta" - msg.Transports[0].Config["routes"].(nestedSlice)[0]["priority"] = 2 - - labels, ok := env.Transports[0].Config["labels"].(nestedMap) - require.True(t, ok) - require.Equal(t, "alpha", labels["room"]) - - routes, ok := env.Transports[0].Config["routes"].(nestedSlice) - require.True(t, ok) - require.Equal(t, 1, routes[0]["priority"]) -} - -func TestStreamMessageToPublishRequestBinaryFrame(t *testing.T) { - msg := engram.StreamMessage{ - Binary: &engram.BinaryFrame{ - Payload: []byte{0x0A, 0x0B}, - MimeType: "application/octet-stream", - Timestamp: 42 * time.Millisecond, - }, - } - req, err := streamMessageToPublishRequest(msg) - require.NoError(t, err) - binary := req.GetBinary() - require.NotNil(t, binary) - require.Equal(t, []byte{0x0A, 0x0B}, binary.GetPayload()) - require.Equal(t, "application/octet-stream", binary.GetMimeType()) - require.Equal(t, uint64(42), binary.GetTimestampMs()) -} - -func TestStreamMessageToPublishRequestBinaryFrameMirroredPayloadBypassesEnvelope(t *testing.T) { - msg := engram.StreamMessage{ - Payload: []byte{0x0A, 0x0B}, - Binary: &engram.BinaryFrame{ - Payload: []byte{0x0A, 0x0B}, - MimeType: "application/custom-binary", - Timestamp: 7 * time.Millisecond, - }, - } - - req, err := streamMessageToPublishRequest(msg) - require.NoError(t, err) - binary := req.GetBinary() - require.NotNil(t, binary) - require.Equal(t, []byte{0x0A, 0x0B}, binary.GetPayload()) - require.Equal(t, "application/custom-binary", binary.GetMimeType()) - require.Equal(t, uint64(7), binary.GetTimestampMs()) -} - -func TestStreamMessageToPublishRequestRejectsMultipleFrameTypes(t *testing.T) { - msg := engram.StreamMessage{ - Audio: &engram.AudioFrame{PCM: []byte{0x01}}, - Video: &engram.VideoFrame{Payload: []byte{0x02}, Codec: "vp8"}, - } - - req, err := streamMessageToPublishRequest(msg) - require.ErrorIs(t, err, engram.ErrInvalidStreamMessage) - require.Nil(t, req) -} - -func TestStreamMessageToPublishRequestRejectsBinaryPayloadMismatch(t *testing.T) { - msg := engram.StreamMessage{ - Payload: []byte(`{"ok":true}`), - Binary: &engram.BinaryFrame{ - Payload: []byte("raw"), - MimeType: "application/octet-stream", - }, - } - - req, err := streamMessageToPublishRequest(msg) - require.ErrorIs(t, err, engram.ErrInvalidStreamMessage) - require.Nil(t, req) -} - -func TestStreamMessageToPublishRequestRejectsErrorKindWithoutPayload(t *testing.T) { - msg := engram.StreamMessage{Kind: engram.StreamMessageKindError} - - req, err := streamMessageToPublishRequest(msg) - require.ErrorIs(t, err, engram.ErrInvalidStreamMessage) - require.Nil(t, req) -} - -func TestStreamMessageToPublishRequestRejectsMessageIDWithSurroundingWhitespace(t *testing.T) { - msg := engram.StreamMessage{ - MessageID: " msg-1 ", - Payload: []byte(`{"ok":true}`), - } - - req, err := streamMessageToPublishRequest(msg) - require.ErrorIs(t, err, engram.ErrInvalidStreamMessage) - require.Nil(t, req) -} - -func TestStreamMessageToPublishRequestRejectsEmptyMetadataKey(t *testing.T) { - msg := engram.StreamMessage{ - Payload: []byte(`{"ok":true}`), - Metadata: map[string]string{"": "value"}, - } - - req, err := streamMessageToPublishRequest(msg) - require.ErrorIs(t, err, engram.ErrInvalidStreamMessage) - require.Nil(t, req) -} - -func TestStreamMessageToPublishRequestRejectsHeartbeatWithPayload(t *testing.T) { - msg := engram.StreamMessage{ - Kind: engram.StreamMessageKindHeartbeat, - Payload: []byte(`{"ok":true}`), - } - - req, err := streamMessageToPublishRequest(msg) - require.ErrorIs(t, err, engram.ErrInvalidStreamMessage) - require.Nil(t, req) -} - -func TestStreamMessageToPublishRequestRejectsAudioWithoutPCM(t *testing.T) { - msg := engram.StreamMessage{ - Audio: &engram.AudioFrame{ - SampleRateHz: 16000, - Channels: 1, - }, - } - - req, err := streamMessageToPublishRequest(msg) - require.ErrorIs(t, err, engram.ErrInvalidStreamMessage) - require.Nil(t, req) -} - -func TestStreamMessageToPublishRequestRejectsEncodedVideoWithoutCodec(t *testing.T) { - msg := engram.StreamMessage{ - Video: &engram.VideoFrame{ - Payload: []byte{0x02}, - }, - } - - req, err := streamMessageToPublishRequest(msg) - require.ErrorIs(t, err, engram.ErrInvalidStreamMessage) - require.Nil(t, req) -} - -func TestStreamMessageToPublishRequestRejectsRawVideoWithoutDimensions(t *testing.T) { - msg := engram.StreamMessage{ - Video: &engram.VideoFrame{ - Payload: []byte{0x02}, - Raw: true, - }, - } - - req, err := streamMessageToPublishRequest(msg) - require.ErrorIs(t, err, engram.ErrInvalidStreamMessage) - require.Nil(t, req) -} - -func TestStreamMessageToPublishRequestRejectsReservedEnvelopeMimeWithoutEnvelopeFields(t *testing.T) { - msg := engram.StreamMessage{ - Binary: &engram.BinaryFrame{ - Payload: []byte("raw"), - MimeType: envelope.MIMEType, - }, - } - - req, err := streamMessageToPublishRequest(msg) - require.ErrorIs(t, err, engram.ErrInvalidStreamMessage) - require.Nil(t, req) -} - -func TestStreamMessageToPublishRequestRejectsReservedEnvelopeMimeWithParametersWithoutEnvelopeFields(t *testing.T) { - msg := engram.StreamMessage{ - Binary: &engram.BinaryFrame{ - Payload: []byte(`{"ok":true}`), - MimeType: envelope.MIMEType + "; charset=utf-8", - }, - } - - req, err := streamMessageToPublishRequest(msg) - require.ErrorIs(t, err, engram.ErrInvalidStreamMessage) - require.Nil(t, req) -} - -func TestStreamMessageToPublishRequestRejectsInvalidBinaryMimeType(t *testing.T) { - msg := engram.StreamMessage{ - Binary: &engram.BinaryFrame{ - Payload: []byte("raw"), - MimeType: "not a mime type", - }, - } - - req, err := streamMessageToPublishRequest(msg) - require.ErrorIs(t, err, engram.ErrInvalidStreamMessage) - require.Nil(t, req) -} - -func TestStreamMessageToPublishRequestRejectsReservedEnvelopeMimePayloadMismatch(t *testing.T) { - msg := engram.StreamMessage{ - Kind: "telemetry", - Binary: &engram.BinaryFrame{ - Payload: []byte("raw"), - MimeType: envelope.MIMEType, - }, - } - - req, err := streamMessageToPublishRequest(msg) - require.ErrorIs(t, err, engram.ErrInvalidStreamMessage) - require.Nil(t, req) -} - -func TestStreamMessageToPublishRequestRejectsReservedEnvelopeMimeCaseInsensitivePayloadMismatch(t *testing.T) { - msg := engram.StreamMessage{ - Kind: "telemetry", - Payload: []byte(`{"ok":true}`), - Binary: &engram.BinaryFrame{ - Payload: []byte(`{"ok":false}`), - MimeType: strings.ToUpper(envelope.MIMEType), - }, - } - - req, err := streamMessageToPublishRequest(msg) - require.ErrorIs(t, err, engram.ErrInvalidStreamMessage) - require.Nil(t, req) -} - -func TestPublishRequestToStreamMessageBinary(t *testing.T) { - envFrame, err := envelope.ToBinaryFrame(&envelope.Envelope{ - Kind: "telemetry", - MessageID: "env-42", - TimestampMs: 900, - Metadata: map[string]string{"step": "test"}, - Payload: []byte(`{"ok":true}`), - }) - require.NoError(t, err) - req := &transportpb.PublishRequest{ - Frame: &transportpb.PublishRequest_Binary{ - Binary: envFrame, - }, - } - msg, err := publishRequestToStreamMessage(req) - require.NoError(t, err) - require.Equal(t, "telemetry", msg.Kind) - require.Equal(t, "env-42", msg.MessageID) - require.Equal(t, time.UnixMilli(900).UTC(), msg.Timestamp) - require.Equal(t, map[string]string{"step": "test"}, msg.Metadata) - require.Equal(t, []byte(`{"ok":true}`), msg.Payload) - require.NotNil(t, msg.Binary) - require.Equal(t, []byte(`{"ok":true}`), msg.Binary.Payload) - require.Equal(t, defaultEnvelopePayloadMIME, msg.Binary.MimeType) - require.Equal(t, 900*time.Millisecond, msg.Binary.Timestamp) - msg.Payload[0] = 'X' - require.Equal(t, byte('X'), msg.Binary.Payload[0]) -} - -func TestPopulateMessageFromEnvelopeClonesTypedTransportConfig(t *testing.T) { - type nestedMap map[string]string - type nestedSlice []map[string]int - - env := &envelope.Envelope{ - Transports: []envelope.TransportDescriptor{ - { - Name: "primary", - Kind: "livekit", - Config: map[string]any{ - "labels": nestedMap{"room": "alpha"}, - "routes": nestedSlice{{"priority": 1}}, - }, - }, - }, - } - - var msg engram.StreamMessage - populateMessageFromEnvelope(&msg, env) - require.Len(t, msg.Transports, 1) - - env.Transports[0].Config["labels"].(nestedMap)["room"] = "beta" - env.Transports[0].Config["routes"].(nestedSlice)[0]["priority"] = 2 - - labels, ok := msg.Transports[0].Config["labels"].(nestedMap) - require.True(t, ok) - require.Equal(t, "alpha", labels["room"]) - - routes, ok := msg.Transports[0].Config["routes"].(nestedSlice) - require.True(t, ok) - require.Equal(t, 1, routes[0]["priority"]) -} - -func TestPublishRequestToStreamMessageRejectsEncodedVideoWithoutCodec(t *testing.T) { - req := &transportpb.PublishRequest{ - Frame: &transportpb.PublishRequest_Video{ - Video: &transportpb.VideoFrame{ - Payload: []byte{0x02}, - }, - }, - } - - msg, err := publishRequestToStreamMessage(req) - require.ErrorIs(t, err, engram.ErrInvalidStreamMessage) - require.Equal(t, engram.StreamMessage{}, msg) -} - -func TestPublishRequestToStreamMessageRejectsErrorEnvelopeWithoutPayload(t *testing.T) { - envFrame, err := envelope.ToBinaryFrame(&envelope.Envelope{ - Kind: engram.StreamMessageKindError, - }) - require.NoError(t, err) - - req := &transportpb.PublishRequest{ - Frame: &transportpb.PublishRequest_Binary{ - Binary: envFrame, - }, - } - - msg, err := publishRequestToStreamMessage(req) - require.ErrorIs(t, err, engram.ErrInvalidStreamMessage) - require.Equal(t, engram.StreamMessage{}, msg) -} - -func TestPublishRequestToStreamMessageDecodesReservedEnvelopeMimeWithParameters(t *testing.T) { - envFrame, err := envelope.ToBinaryFrame(&envelope.Envelope{ - Kind: "telemetry", - MessageID: "env-params", - Payload: []byte(`{"ok":true}`), - }) - require.NoError(t, err) - envFrame.MimeType = envelope.MIMEType + "; charset=utf-8" - - req := &transportpb.PublishRequest{ - Frame: &transportpb.PublishRequest_Binary{ - Binary: envFrame, - }, - } - - msg, err := publishRequestToStreamMessage(req) - require.NoError(t, err) - require.Equal(t, "telemetry", msg.Kind) - require.Equal(t, "env-params", msg.MessageID) - require.Equal(t, []byte(`{"ok":true}`), msg.Payload) -} - -func TestPublishRequestToStreamMessageDecodesReservedEnvelopeMimeCaseInsensitive(t *testing.T) { - envFrame, err := envelope.ToBinaryFrame(&envelope.Envelope{ - Kind: "telemetry", - MessageID: "env-upper", - Payload: []byte(`{"ok":true}`), - }) - require.NoError(t, err) - envFrame.MimeType = strings.ToUpper(envelope.MIMEType) - - req := &transportpb.PublishRequest{ - Frame: &transportpb.PublishRequest_Binary{ - Binary: envFrame, - }, - } - - msg, err := publishRequestToStreamMessage(req) - require.NoError(t, err) - require.Equal(t, "telemetry", msg.Kind) - require.Equal(t, "env-upper", msg.MessageID) - require.Equal(t, []byte(`{"ok":true}`), msg.Payload) -} - -func TestPublishRequestToStreamMessageBinaryPassthrough(t *testing.T) { - req := &transportpb.PublishRequest{ - Frame: &transportpb.PublishRequest_Binary{ - Binary: &transportpb.BinaryFrame{ - Payload: []byte("bin"), - MimeType: "text/plain", - TimestampMs: 7, - }, - }, - } - msg, err := publishRequestToStreamMessage(req) - require.NoError(t, err) - require.Equal(t, []byte("bin"), msg.Payload) - require.NotNil(t, msg.Binary) - require.Equal(t, "text/plain", msg.Binary.MimeType) - require.Equal(t, 7*time.Millisecond, msg.Binary.Timestamp) -} - -func TestPublishRequestToStreamMessageBinaryDefaultsEmptyMimeType(t *testing.T) { - req := &transportpb.PublishRequest{ - Frame: &transportpb.PublishRequest_Binary{ - Binary: &transportpb.BinaryFrame{ - Payload: []byte("bin"), - TimestampMs: 7, - }, - }, - } - msg, err := publishRequestToStreamMessage(req) - require.NoError(t, err) - require.NotNil(t, msg.Binary) - require.Equal(t, "application/octet-stream", msg.Binary.MimeType) -} - -func TestPublishRequestToStreamMessageRejectsInvalidBinaryMimeType(t *testing.T) { - req := &transportpb.PublishRequest{ - Frame: &transportpb.PublishRequest_Binary{ - Binary: &transportpb.BinaryFrame{ - Payload: []byte("bin"), - MimeType: "not a mime type", - }, - }, - } - msg, err := publishRequestToStreamMessage(req) - require.ErrorIs(t, err, engram.ErrInvalidStreamMessage) - require.Equal(t, engram.StreamMessage{}, msg) -} - -func TestPublishRequestToStreamMessageBinaryPassthroughRoundTrip(t *testing.T) { - req := &transportpb.PublishRequest{ - Frame: &transportpb.PublishRequest_Binary{ - Binary: &transportpb.BinaryFrame{ - Payload: []byte("bin"), - MimeType: "text/plain", - TimestampMs: 7, - }, - }, - } - msg, err := publishRequestToStreamMessage(req) - require.NoError(t, err) - - roundTrip, err := streamMessageToPublishRequest(msg) - require.NoError(t, err) - require.NotNil(t, roundTrip.GetBinary()) - require.Equal(t, []byte("bin"), roundTrip.GetBinary().GetPayload()) - require.Equal(t, "text/plain", roundTrip.GetBinary().GetMimeType()) - require.Nil(t, roundTrip.GetPayload()) - require.Nil(t, roundTrip.GetInputs()) -} - -func TestPublishRequestToStreamMessageMetadataFallback(t *testing.T) { - payload, err := structpb.NewStruct(map[string]any{"foo": "bar"}) - require.NoError(t, err) - req := &transportpb.PublishRequest{ - Metadata: map[string]string{ - metadataEnvelopeKindKey: "data", - metadataEnvelopeMessageIDKey: "xyz", - metadataEnvelopeTimeKey: strconv.FormatInt(123, 10), - "storyRun": "sr-2", - }, - Payload: payload, - Frame: &transportpb.PublishRequest_Binary{ - Binary: &transportpb.BinaryFrame{ - Payload: []byte("bin"), - MimeType: "text/plain", - }, - }, - } - msg, err := publishRequestToStreamMessage(req) - require.NoError(t, err) - require.Equal(t, "data", msg.Kind) - require.Equal(t, "xyz", msg.MessageID) - require.Equal(t, time.UnixMilli(123).UTC(), msg.Timestamp) - require.Equal(t, map[string]string{"storyRun": "sr-2"}, msg.Metadata) - require.Equal(t, []byte("bin"), msg.Payload) -} - -func TestShouldSubscribeBinary(t *testing.T) { - t.Run("nil info defaults to true", func(t *testing.T) { - require.True(t, shouldSubscribeBinary(bindingReference{})) - }) - t.Run("explicit binary types", func(t *testing.T) { - ref := bindingReference{Info: &transportpb.BindingInfo{BinaryTypes: []string{"application/json"}}} - require.True(t, shouldSubscribeBinary(ref)) - }) - t.Run("audio only does not subscribe binary", func(t *testing.T) { - ref := bindingReference{Info: &transportpb.BindingInfo{AudioCodecs: []string{"pcm16"}}} - require.False(t, shouldSubscribeBinary(ref)) - }) - t.Run("no media declarations defaults to true", func(t *testing.T) { - ref := bindingReference{Info: &transportpb.BindingInfo{}} - require.True(t, shouldSubscribeBinary(ref)) - }) -} - -func TestShouldDropSubscribeMessage(t *testing.T) { - t.Run("drops metadata heartbeat", func(t *testing.T) { - drop, reason := shouldDropSubscribeMessage(engram.StreamMessage{ - Metadata: map[string]string{"bubu-heartbeat": "true"}, - }) - require.True(t, drop) - require.Equal(t, "heartbeat", reason) - }) - - t.Run("drops kind heartbeat", func(t *testing.T) { - drop, reason := shouldDropSubscribeMessage(engram.StreamMessage{Kind: "heartbeat"}) - require.True(t, drop) - require.Equal(t, "heartbeat", reason) - }) - - t.Run("drops noop", func(t *testing.T) { - drop, reason := shouldDropSubscribeMessage(engram.StreamMessage{Kind: "noop"}) - require.True(t, drop) - require.Equal(t, "noop", reason) - }) - - t.Run("drops fully empty packet", func(t *testing.T) { - drop, reason := shouldDropSubscribeMessage(engram.StreamMessage{}) - require.True(t, drop) - require.Equal(t, "empty", reason) - }) - - t.Run("keeps audio packet", func(t *testing.T) { - drop, reason := shouldDropSubscribeMessage(engram.StreamMessage{ - Audio: &engram.AudioFrame{PCM: []byte{0x01}}, - }) - require.False(t, drop) - require.Equal(t, "", reason) - }) - - t.Run("keeps payload packet", func(t *testing.T) { - drop, reason := shouldDropSubscribeMessage(engram.StreamMessage{ - Payload: []byte(`{"hook":{"event":"storyrun.ready"}}`), - }) - require.False(t, drop) - require.Equal(t, "", reason) - }) -} - -func TestConnectorControlLoop_DefaultHandlerAck(t *testing.T) { - srv := &stubConnectorServer{ - control: func(stream transportpb.TransportConnectorService_ControlServer) error { - require.NoError(t, stream.Send(&transportpb.ControlResponse{ - Action: transportpb.ControlAction_CONTROL_ACTION_START, - })) - resp, err := stream.Recv() - require.NoError(t, err) - require.Equal(t, transportpb.ControlAction_CONTROL_ACTION_ACK, resp.GetAction()) - require.Equal(t, "start", resp.GetMetadata()["type"]) - require.Equal(t, "false", resp.GetMetadata()["handled"]) - return nil - }, - } - client := newTestTransportConnectorClient(t, srv) - ctx := t.Context() - opts := streamRuntimeOptions{messageTimeout: time.Second} - err := connectorControlLoop(ctx, client, bindingReference{}, defaultControlDirectiveHandler{}, opts) - require.ErrorIs(t, err, io.EOF) -} - -func TestConnectorControlLoop_DefaultHandlerCapabilities(t *testing.T) { - srv := &stubConnectorServer{ - control: func(stream transportpb.TransportConnectorService_ControlServer) error { - require.NoError(t, stream.Send(&transportpb.ControlResponse{ - Action: transportpb.ControlAction_CONTROL_ACTION_CONNECTOR_CAPABILITIES, - Metadata: map[string]string{ - "audio.codec": "pcm16", - }, - })) - resp, err := stream.Recv() - require.NoError(t, err) - require.Equal(t, transportpb.ControlAction_CONTROL_ACTION_ACK, resp.GetAction()) - require.Equal(t, "connector.capabilities", resp.GetMetadata()["type"]) - require.Equal(t, "true", resp.GetMetadata()["handled"]) - require.Equal(t, "capabilities", resp.GetMetadata()["reason"]) - return nil - }, - } - client := newTestTransportConnectorClient(t, srv) - ctx := t.Context() - opts := streamRuntimeOptions{messageTimeout: time.Second} - err := connectorControlLoop(ctx, client, bindingReference{}, defaultControlDirectiveHandler{}, opts) - require.ErrorIs(t, err, io.EOF) -} - -func TestConnectorControlLoop_CustomHandler(t *testing.T) { - srv := &stubConnectorServer{ - control: func(stream transportpb.TransportConnectorService_ControlServer) error { - require.NoError(t, stream.Send(&transportpb.ControlResponse{ - Action: transportpb.ControlAction_CONTROL_ACTION_HEARTBEAT, - })) - resp, err := stream.Recv() - require.NoError(t, err) - require.Equal(t, transportpb.ControlAction_CONTROL_ACTION_UNSPECIFIED, resp.GetAction()) - require.Equal(t, "custom", resp.GetCustomAction()) - require.Equal(t, "true", resp.GetMetadata()["ok"]) - return nil - }, - } - client := newTestTransportConnectorClient(t, srv) - ctx := t.Context() - handler := &recordingControlHandler{seen: make(chan engram.ControlDirective, 1)} - opts := streamRuntimeOptions{messageTimeout: time.Second} - err := connectorControlLoop(ctx, client, bindingReference{}, handler, opts) - require.ErrorIs(t, err, io.EOF) - select { - case directive := <-handler.seen: - require.Equal(t, "heartbeat", directive.Type) - case <-time.After(time.Second): - t.Fatal("expected directive to reach handler") - } -} - -func TestConnectorControlLoop_ClosedControlRequestsChannelDoesNotBusyLoop(t *testing.T) { - srv := &stubConnectorServer{ - control: func(stream transportpb.TransportConnectorService_ControlServer) error { - require.NoError(t, stream.Send(&transportpb.ControlResponse{ - Action: transportpb.ControlAction_CONTROL_ACTION_NOOP, - })) - resp, err := stream.Recv() - require.NoError(t, err) - require.Equal(t, transportpb.ControlAction_CONTROL_ACTION_ACK, resp.GetAction()) - return nil - }, - } - client := newTestTransportConnectorClient(t, srv) - ctx, cancel := context.WithTimeout(context.Background(), time.Second) - defer cancel() - - controlRequests := make(chan *transportpb.ControlRequest) - close(controlRequests) - opts := streamRuntimeOptions{ - messageTimeout: time.Second, - controlRequests: controlRequests, - } - - done := make(chan error, 1) - go func() { - done <- connectorControlLoop(ctx, client, bindingReference{}, defaultControlDirectiveHandler{}, opts) - }() - - select { - case err := <-done: - require.ErrorIs(t, err, io.EOF) - case <-time.After(500 * time.Millisecond): - t.Fatal("expected connectorControlLoop to continue processing when control request channel is closed") - } -} - -func TestConnectorControlLoop_SendsBindingReferenceMetadataOnly(t *testing.T) { - srv := &stubConnectorServer{ - control: func(stream transportpb.TransportConnectorService_ControlServer) error { - md, ok := metadata.FromIncomingContext(stream.Context()) - require.True(t, ok) - require.Equal(t, []string{coretransport.ProtocolVersion}, md.Get(coretransport.ProtocolMetadataKey)) - require.Equal(t, []string{"runtime-ns/binding-a"}, md.Get(controlBindingMetadataKey)) - require.NotContains(t, md.Get(controlBindingMetadataKey)[0], "{") - require.NoError(t, stream.Send(&transportpb.ControlResponse{ - Action: transportpb.ControlAction_CONTROL_ACTION_NOOP, - })) - resp, err := stream.Recv() - require.NoError(t, err) - require.Equal(t, transportpb.ControlAction_CONTROL_ACTION_ACK, resp.GetAction()) - return nil - }, - } - client := newTestTransportConnectorClient(t, srv) - ctx := t.Context() - opts := streamRuntimeOptions{messageTimeout: time.Second} - ref := bindingReference{ - Name: "binding-a", - Namespace: "runtime-ns", - Raw: `{"binding":{"driver":"demo","endpoint":"secret:9000"}}`, - } - err := connectorControlLoop(ctx, client, ref, defaultControlDirectiveHandler{}, opts) - require.ErrorIs(t, err, io.EOF) -} - -type recordingControlHandler struct { - seen chan engram.ControlDirective -} - -func (h *recordingControlHandler) HandleControlDirective( - ctx context.Context, - directive engram.ControlDirective, -) (*engram.ControlDirective, error) { - if h.seen != nil { - h.seen <- directive - } - return &engram.ControlDirective{ - Type: "custom", - Metadata: map[string]string{"ok": "true"}, - }, nil -} - -type startupGatedStreamingEngram struct { - started chan struct{} -} - -func (startupGatedStreamingEngram) Init(context.Context, struct{}, *engram.Secrets) error { return nil } - -func (e startupGatedStreamingEngram) Stream(ctx context.Context, _ <-chan engram.InboundMessage, _ chan<- engram.StreamMessage) error { //nolint:lll - if e.started != nil { - close(e.started) - } - <-ctx.Done() - return ctx.Err() -} - -type stubConnectorServer struct { - transportpb.UnimplementedTransportConnectorServiceServer - data func(stream transportpb.TransportConnectorService_DataServer) error - control func(stream transportpb.TransportConnectorService_ControlServer) error -} - -func (s *stubConnectorServer) Data(stream transportpb.TransportConnectorService_DataServer) error { - if s.data != nil { - return s.data(stream) - } - return nil -} - -func (s *stubConnectorServer) Control(stream transportpb.TransportConnectorService_ControlServer) error { - if s.control != nil { - return s.control(stream) - } - return nil -} - -func newTestTransportConnectorClient( - t *testing.T, - server transportpb.TransportConnectorServiceServer, -) transportpb.TransportConnectorServiceClient { - t.Helper() - listener := bufconn.Listen(1 << 20) - grpcServer := grpc.NewServer() - transportpb.RegisterTransportConnectorServiceServer(grpcServer, server) - go func() { - _ = grpcServer.Serve(listener) - }() - dialer := func(ctx context.Context, _ string) (net.Conn, error) { - return listener.Dial() - } - conn, err := grpc.NewClient( - "passthrough:///bufnet", - grpc.WithContextDialer(dialer), - grpc.WithTransportCredentials(insecure.NewCredentials()), - ) - require.NoError(t, err) - waitCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - require.NoError(t, transportconnector.WaitForReady(waitCtx, conn)) - t.Cleanup(func() { - _ = conn.Close() - grpcServer.Stop() - }) - return transportpb.NewTransportConnectorServiceClient(conn) -} - -func TestRunTransportSessionFailsWhenConnectorReadyMissingStartupCapabilitiesMetadata(t *testing.T) { - prevDial := connectorDial - t.Cleanup(func() { - connectorDial = prevDial - }) - - allowReady := make(chan struct{}) - recvErrCh := make(chan error, 1) - srv := &stubConnectorServer{ - data: func(stream transportpb.TransportConnectorService_DataServer) error { - <-stream.Context().Done() - return nil - }, - control: func(stream transportpb.TransportConnectorService_ControlServer) error { - <-allowReady - require.NoError(t, stream.Send(&transportpb.ControlResponse{ - Action: transportpb.ControlAction_CONTROL_ACTION_CONNECTOR_READY, - })) - _, err := stream.Recv() - recvErrCh <- err - <-stream.Context().Done() - return nil - }, - } - client := newTestTransportConnectorClient(t, srv) - connectorDial = func(context.Context, string, envResolver, ...grpc.DialOption) (*TransportConnectorClient, error) { - return &TransportConnectorClient{client: client}, nil - } - - started := make(chan struct{}) - ctx := t.Context() - - errCh := make(chan error, 1) - go func() { - errCh <- runTransportSession( - ctx, - "connector:9000", - bindingReference{}, - startupGatedStreamingEngram{started: started}, - newEnvResolver(map[string]string{ - contracts.GRPCMessageTimeoutEnv: "250ms", - }), - ) - }() - - select { - case <-started: - t.Fatal("expected Engram.Stream not to start before connector.ready") - case <-time.After(100 * time.Millisecond): - } - - close(allowReady) - - select { - case err := <-errCh: - require.Error(t, err) - require.ErrorContains(t, err, "invalid connector.ready startup metadata") - require.ErrorContains(t, err, coretransport.StartupCapabilitiesMetadataKey) - case <-time.After(time.Second): - t.Fatal("expected runTransportSession to fail when startup capability metadata is missing") - } - - select { - case <-started: - t.Fatal("expected Engram.Stream not to start when startup capability metadata is missing") - default: - } - - select { - case recvErr := <-recvErrCh: - require.Error(t, recvErr) - require.ErrorContains(t, recvErr, "context canceled") - case <-time.After(time.Second): - t.Fatal("expected control stream recv to unblock after startup failure") - } -} - -func TestRunTransportSessionFailsWhenConnectorReadyStartupCapabilitiesMetadataInvalid(t *testing.T) { - prevDial := connectorDial - t.Cleanup(func() { - connectorDial = prevDial - }) - - recvErrCh := make(chan error, 1) - srv := &stubConnectorServer{ - data: func(stream transportpb.TransportConnectorService_DataServer) error { - <-stream.Context().Done() - return nil - }, - control: func(stream transportpb.TransportConnectorService_ControlServer) error { - require.NoError(t, stream.Send(&transportpb.ControlResponse{ - Action: transportpb.ControlAction_CONTROL_ACTION_CONNECTOR_READY, - Metadata: map[string]string{ - coretransport.StartupCapabilitiesMetadataKey: "legacy", - }, - })) - _, err := stream.Recv() - recvErrCh <- err - <-stream.Context().Done() - return nil - }, - } - client := newTestTransportConnectorClient(t, srv) - connectorDial = func(context.Context, string, envResolver, ...grpc.DialOption) (*TransportConnectorClient, error) { - return &TransportConnectorClient{client: client}, nil - } - - started := make(chan struct{}) - err := runTransportSession( - context.Background(), - "connector:9000", - bindingReference{}, - startupGatedStreamingEngram{started: started}, - newEnvResolver(nil), - ) - require.Error(t, err) - require.ErrorContains(t, err, "invalid connector.ready startup metadata") - require.ErrorContains(t, err, "invalid "+coretransport.StartupCapabilitiesMetadataKey) - select { - case <-started: - t.Fatal("expected Engram.Stream not to start when startup capability metadata is invalid") - default: - } - select { - case recvErr := <-recvErrCh: - require.Error(t, recvErr) - require.ErrorContains(t, recvErr, "context canceled") - case <-time.After(time.Second): - t.Fatal("expected control stream recv to unblock after startup failure") - } -} - -func TestRunTransportSessionFailsWhenConnectorReadyHandshakeMissing(t *testing.T) { - prevDial := connectorDial - t.Cleanup(func() { - connectorDial = prevDial - }) - - srv := &stubConnectorServer{ - data: func(stream transportpb.TransportConnectorService_DataServer) error { - <-stream.Context().Done() - return nil - }, - control: func(stream transportpb.TransportConnectorService_ControlServer) error { - <-stream.Context().Done() - return nil - }, - } - client := newTestTransportConnectorClient(t, srv) - connectorDial = func(context.Context, string, envResolver, ...grpc.DialOption) (*TransportConnectorClient, error) { - return &TransportConnectorClient{client: client}, nil - } - - started := make(chan struct{}) - err := runTransportSession( - context.Background(), - "connector:9000", - bindingReference{}, - startupGatedStreamingEngram{started: started}, - newEnvResolver(map[string]string{ - contracts.GRPCMessageTimeoutEnv: "15ms", - }), - ) - require.ErrorIs(t, err, errControlStartupHandshakeTimeout) - select { - case <-started: - t.Fatal("expected Engram.Stream not to start when connector.ready handshake is missing") - default: - } -} - -func TestRunTransportSessionWaitsForStartupCapabilitiesWhenReadyRequiresThem(t *testing.T) { - prevDial := connectorDial - t.Cleanup(func() { - connectorDial = prevDial - }) - - allowCapabilities := make(chan struct{}) - readyAcked := make(chan struct{}) - capabilitiesAcked := make(chan struct{}) - srv := &stubConnectorServer{ - data: func(stream transportpb.TransportConnectorService_DataServer) error { - <-stream.Context().Done() - return nil - }, - control: func(stream transportpb.TransportConnectorService_ControlServer) error { - require.NoError(t, stream.Send(&transportpb.ControlResponse{ - Action: transportpb.ControlAction_CONTROL_ACTION_CONNECTOR_READY, - Metadata: map[string]string{ - coretransport.StartupCapabilitiesMetadataKey: coretransport.StartupCapabilitiesRequired, - }, - })) - resp, err := stream.Recv() - require.NoError(t, err) - require.Equal(t, transportpb.ControlAction_CONTROL_ACTION_ACK, resp.GetAction()) - require.Equal(t, "connector.ready", resp.GetMetadata()["type"]) - close(readyAcked) - - <-allowCapabilities - - require.NoError(t, stream.Send(&transportpb.ControlResponse{ - Action: transportpb.ControlAction_CONTROL_ACTION_CONNECTOR_CAPABILITIES, - Metadata: map[string]string{ - "audio.codec": "pcm16", - }, - })) - resp, err = stream.Recv() - require.NoError(t, err) - require.Equal(t, transportpb.ControlAction_CONTROL_ACTION_ACK, resp.GetAction()) - require.Equal(t, "connector.capabilities", resp.GetMetadata()["type"]) - close(capabilitiesAcked) - - <-stream.Context().Done() - return nil - }, - } - client := newTestTransportConnectorClient(t, srv) - connectorDial = func(context.Context, string, envResolver, ...grpc.DialOption) (*TransportConnectorClient, error) { - return &TransportConnectorClient{client: client}, nil - } - - started := make(chan struct{}) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - errCh := make(chan error, 1) - go func() { - errCh <- runTransportSession( - ctx, - "connector:9000", - bindingReference{}, - startupGatedStreamingEngram{started: started}, - newEnvResolver(nil), - ) - }() - - select { - case <-readyAcked: - case <-time.After(time.Second): - t.Fatal("expected connector.ready acknowledgement") - } - - select { - case <-started: - t.Fatal("expected Engram.Stream not to start before required startup capabilities arrive") - case <-time.After(100 * time.Millisecond): - } - - close(allowCapabilities) - - select { - case <-started: - case <-time.After(time.Second): - t.Fatal("expected Engram.Stream to start after startup capabilities arrive") - } - - select { - case <-capabilitiesAcked: - case <-time.After(time.Second): - t.Fatal("expected connector.capabilities acknowledgement") - } - - cancel() - - select { - case err := <-errCh: - require.Error(t, err) - require.ErrorContains(t, err, "context canceled") - case <-time.After(time.Second): - t.Fatal("expected runTransportSession to exit after cancellation") - } -} - -func TestRunTransportSessionFailsWhenRequiredStartupCapabilitiesMissing(t *testing.T) { - prevDial := connectorDial - t.Cleanup(func() { - connectorDial = prevDial - }) - - srv := &stubConnectorServer{ - data: func(stream transportpb.TransportConnectorService_DataServer) error { - <-stream.Context().Done() - return nil - }, - control: func(stream transportpb.TransportConnectorService_ControlServer) error { - require.NoError(t, stream.Send(&transportpb.ControlResponse{ - Action: transportpb.ControlAction_CONTROL_ACTION_CONNECTOR_READY, - Metadata: map[string]string{ - coretransport.StartupCapabilitiesMetadataKey: coretransport.StartupCapabilitiesRequired, - }, - })) - resp, err := stream.Recv() - require.NoError(t, err) - require.Equal(t, transportpb.ControlAction_CONTROL_ACTION_ACK, resp.GetAction()) - require.Equal(t, "connector.ready", resp.GetMetadata()["type"]) - <-stream.Context().Done() - return nil - }, - } - client := newTestTransportConnectorClient(t, srv) - connectorDial = func(context.Context, string, envResolver, ...grpc.DialOption) (*TransportConnectorClient, error) { - return &TransportConnectorClient{client: client}, nil - } - - started := make(chan struct{}) - err := runTransportSession( - context.Background(), - "connector:9000", - bindingReference{}, - startupGatedStreamingEngram{started: started}, - newEnvResolver(map[string]string{ - contracts.GRPCMessageTimeoutEnv: "15ms", - }), - ) - require.ErrorIs(t, err, errControlStartupHandshakeTimeout) - select { - case <-started: - t.Fatal("expected Engram.Stream not to start when required startup capabilities are missing") - default: - } -} - -func TestConnectorDataRecvLoop_DeduplicatesByEnvelopeSequence(t *testing.T) { - payload, err := structpb.NewStruct(map[string]any{"foo": "bar"}) - require.NoError(t, err) - - srv := &stubConnectorServer{ - data: func(stream transportpb.TransportConnectorService_DataServer) error { - resp := &transportpb.DataResponse{ - Metadata: map[string]string{ - metadataEnvelopeKindKey: "data", - }, - Envelope: &transportpb.StreamEnvelope{ - StreamId: "downstream-step", - Sequence: 9, - Partition: "p0", - }, - Payload: payload, - Frame: &transportpb.DataResponse_Binary{ - Binary: &transportpb.BinaryFrame{ - Payload: []byte("bin"), - MimeType: "text/plain", - }, - }, - } - require.NoError(t, stream.Send(resp)) - require.NoError(t, stream.Send(resp)) - return nil - }, - } - - client := newTestTransportConnectorClient(t, srv) - ctx := t.Context() - stream, err := client.Data(ctx) - require.NoError(t, err) - - in := make(chan engram.InboundMessage, 4) - require.NoError(t, connectorDataRecvLoop(ctx, stream, in, streamRuntimeOptions{})) - require.Len(t, in, 1) - - msg := <-in - require.NotNil(t, msg.Envelope) - require.Equal(t, "downstream-step", msg.Envelope.GetStreamId()) - require.Equal(t, uint64(9), msg.Envelope.GetSequence()) - require.Equal(t, "p0", msg.Envelope.GetPartition()) -} - -func TestConnectorDataRecvLoop_PendingDeduperCapacityExceededReturnsOverflow(t *testing.T) { - mkResp := func(seq uint64) *transportpb.DataResponse { - payload, err := structpb.NewStruct(map[string]any{"seq": seq}) - require.NoError(t, err) - return &transportpb.DataResponse{ - Metadata: map[string]string{ - metadataEnvelopeKindKey: "data", - }, - Envelope: &transportpb.StreamEnvelope{ - StreamId: "downstream-step", - Sequence: seq, - Partition: "p0", - }, - Payload: payload, - Frame: &transportpb.DataResponse_Binary{ - Binary: &transportpb.BinaryFrame{ - Payload: []byte("bin"), - MimeType: "text/plain", - }, - }, - } - } - - srv := &stubConnectorServer{ - data: func(stream transportpb.TransportConnectorService_DataServer) error { - for _, seq := range []uint64{1, 2, 3} { - require.NoError(t, stream.Send(mkResp(seq))) - } - return nil - }, - } - - client := newTestTransportConnectorClient(t, srv) - ctx := t.Context() - stream, err := client.Data(ctx) - require.NoError(t, err) - - in := make(chan engram.InboundMessage, 8) - err = connectorDataRecvLoop(ctx, stream, in, streamRuntimeOptions{ - packetDeduper: newPacketDeduper(2), - }) - require.ErrorIs(t, err, errPacketDeduperPendingOverflow) - require.Len(t, in, 2) -} - -func TestConnectorDataRecvLoop_ChannelSendTimeoutDropDoesNotSuppressLaterDuplicateInSameSession(t *testing.T) { - payload, err := structpb.NewStruct(map[string]any{"foo": "bar"}) - require.NoError(t, err) - - packet := &transportpb.DataResponse{ - Metadata: map[string]string{ - metadataEnvelopeKindKey: "data", - }, - Envelope: &transportpb.StreamEnvelope{ - StreamId: "downstream-step", - Sequence: 9, - Partition: "p0", - }, - Payload: payload, - Frame: &transportpb.DataResponse_Binary{ - Binary: &transportpb.BinaryFrame{ - Payload: []byte("bin"), - MimeType: "text/plain", - }, - }, - } - - srv := &stubConnectorServer{ - data: func(stream transportpb.TransportConnectorService_DataServer) error { - require.NoError(t, stream.Send(packet)) - time.Sleep(25 * time.Millisecond) - require.NoError(t, stream.Send(packet)) - return nil - }, - } - - client := newTestTransportConnectorClient(t, srv) - ctx := t.Context() - stream, err := client.Data(ctx) - require.NoError(t, err) - - in := make(chan engram.InboundMessage) - delivered := make(chan engram.InboundMessage, 1) - go func() { - time.Sleep(15 * time.Millisecond) - delivered <- <-in - }() - - require.NoError(t, connectorDataRecvLoop(ctx, stream, in, streamRuntimeOptions{ - channelSendTimeout: 10 * time.Millisecond, - })) - - select { - case msg := <-delivered: - require.NotNil(t, msg.Envelope) - require.Equal(t, "downstream-step", msg.Envelope.GetStreamId()) - require.Equal(t, uint64(9), msg.Envelope.GetSequence()) - require.Equal(t, "p0", msg.Envelope.GetPartition()) - case <-time.After(500 * time.Millisecond): - t.Fatal("expected duplicate packet replay to be delivered after initial timeout-drop") - } -} - -func TestConnectorDataRecvLoop_KeepsPartitionScopedSequencesDistinct(t *testing.T) { - payload, err := structpb.NewStruct(map[string]any{"foo": "bar"}) - require.NoError(t, err) - - srv := &stubConnectorServer{ - data: func(stream transportpb.TransportConnectorService_DataServer) error { - for _, partition := range []string{"p0", "p1"} { - require.NoError(t, stream.Send(&transportpb.DataResponse{ - Metadata: map[string]string{ - metadataEnvelopeKindKey: "data", - }, - Envelope: &transportpb.StreamEnvelope{ - StreamId: "downstream-step", - Sequence: 7, - Partition: partition, - }, - Payload: payload, - Frame: &transportpb.DataResponse_Binary{ - Binary: &transportpb.BinaryFrame{ - Payload: []byte(partition), - MimeType: "text/plain", - }, - }, - })) - } - return nil - }, - } - - client := newTestTransportConnectorClient(t, srv) - ctx := t.Context() - stream, err := client.Data(ctx) - require.NoError(t, err) - - in := make(chan engram.InboundMessage, 4) - require.NoError(t, connectorDataRecvLoop(ctx, stream, in, streamRuntimeOptions{})) - require.Len(t, in, 2) - - first := <-in - second := <-in - require.NotNil(t, first.Envelope) - require.NotNil(t, second.Envelope) - require.Equal(t, uint64(7), first.Envelope.GetSequence()) - require.Equal(t, uint64(7), second.Envelope.GetSequence()) - require.NotEqual(t, first.Envelope.GetPartition(), second.Envelope.GetPartition()) -} - -func TestConnectorDataRecvLoop_DeduplicatesHookMessageIDWithoutSequence(t *testing.T) { - payload := &envelope.Envelope{ - Version: envelope.LatestVersion, - Kind: envelope.KindHook, - MessageID: "ns/storyrun:storyrun.ready", - Payload: []byte(`{"type":"storyrun.ready","hook":{"event":"storyrun.ready"}}`), - Inputs: []byte(`{"userPrompt":"say hi"}`), - } - frame, err := envelope.ToBinaryFrame(payload) - require.NoError(t, err) - - srv := &stubConnectorServer{ - data: func(stream transportpb.TransportConnectorService_DataServer) error { - resp := &transportpb.DataResponse{ - Metadata: map[string]string{ - "kind": envelope.KindHook, - metadataEnvelopeMessageIDKey: "ns/storyrun:storyrun.ready", - }, - Frame: &transportpb.DataResponse_Binary{Binary: frame}, - } - require.NoError(t, stream.Send(resp)) - require.NoError(t, stream.Send(resp)) - return nil - }, - } - - client := newTestTransportConnectorClient(t, srv) - ctx := t.Context() - stream, err := client.Data(ctx) - require.NoError(t, err) - - in := make(chan engram.InboundMessage, 4) - require.NoError(t, connectorDataRecvLoop(ctx, stream, in, streamRuntimeOptions{})) - require.Len(t, in, 1) - - msg := <-in - require.Equal(t, envelope.KindHook, msg.Kind) - require.Equal(t, "ns/storyrun:storyrun.ready", msg.MessageID) - require.JSONEq(t, `{"type":"storyrun.ready","hook":{"event":"storyrun.ready"}}`, string(msg.Payload)) - require.JSONEq(t, `{"userPrompt":"say hi"}`, string(msg.Inputs)) -} - -func TestConnectorDataRecvLoop_DeduplicatesExplicitMessageIDWithoutSequence(t *testing.T) { - payload := &envelope.Envelope{ - Version: envelope.LatestVersion, - Kind: "data", - MessageID: "shared-message-id", - Payload: []byte(`{"ok":true}`), - } - frame, err := envelope.ToBinaryFrame(payload) - require.NoError(t, err) - - srv := &stubConnectorServer{ - data: func(stream transportpb.TransportConnectorService_DataServer) error { - resp := &transportpb.DataResponse{ - Metadata: map[string]string{ - "kind": "data", - metadataEnvelopeMessageIDKey: "shared-message-id", - }, - Frame: &transportpb.DataResponse_Binary{Binary: frame}, - } - require.NoError(t, stream.Send(resp)) - require.NoError(t, stream.Send(resp)) - return nil - }, - } - - client := newTestTransportConnectorClient(t, srv) - ctx := t.Context() - stream, err := client.Data(ctx) - require.NoError(t, err) - - in := make(chan engram.InboundMessage, 4) - require.NoError(t, connectorDataRecvLoop(ctx, stream, in, streamRuntimeOptions{})) - require.Len(t, in, 1) - - msg := <-in - require.Equal(t, "shared-message-id", msg.MessageID) - require.JSONEq(t, `{"ok":true}`, string(msg.Payload)) -} - -func TestConnectorDataRecvLoop_EmitsDeliveryReceiptForSequencedPackets(t *testing.T) { - payload, err := structpb.NewStruct(map[string]any{"foo": "bar"}) - require.NoError(t, err) - packet := &transportpb.DataPacket{ - Metadata: map[string]string{ - metadataEnvelopeKindKey: "data", - }, - Envelope: &transportpb.StreamEnvelope{ - StreamId: "downstream-step", - Sequence: 5, - Partition: "p0", - }, - Payload: payload, - Frame: &transportpb.DataPacket_Binary{ - Binary: &transportpb.BinaryFrame{ - Payload: []byte("bin"), - MimeType: "text/plain", - }, - }, - } - expectedSize := proto.Size(packet) - - srv := &stubConnectorServer{ - data: func(stream transportpb.TransportConnectorService_DataServer) error { - require.NoError(t, stream.Send(&transportpb.DataResponse{ - Metadata: packet.GetMetadata(), - Envelope: packet.GetEnvelope(), - Payload: packet.GetPayload(), - Frame: &transportpb.DataResponse_Binary{Binary: packet.GetBinary()}, - })) - return nil - }, - } - - client := newTestTransportConnectorClient(t, srv) - ctx := t.Context() - stream, err := client.Data(ctx) - require.NoError(t, err) - - in := make(chan engram.InboundMessage, 2) - receipts := make(chan *transportpb.ControlRequest, 2) - require.NoError(t, connectorDataRecvLoop(ctx, stream, in, streamRuntimeOptions{controlRequests: receipts})) - require.Len(t, in, 1) - - msg := <-in - select { - case receipt := <-receipts: - t.Fatalf("unexpected receipt before processing completion: %+v", receipt) - default: - } - - msg.Done() - receipt := <-receipts - require.Equal(t, downstreamDeliveryReceiptType, receipt.GetCustomAction()) - require.Equal(t, "downstream-step", receipt.GetMetadata()[deliveryReceiptStreamIDKey]) - require.Equal(t, "5", receipt.GetMetadata()[deliveryReceiptSequenceKey]) - require.Equal(t, "p0", receipt.GetMetadata()[deliveryReceiptPartitionKey]) - require.Equal(t, strconv.Itoa(expectedSize), receipt.GetMetadata()[deliveryReceiptSizeBytesKey]) - - msg.Done() - select { - case duplicate := <-receipts: - t.Fatalf("unexpected duplicate receipt: %+v", duplicate) - default: - } -} - -func TestConnectorDataRecvLoop_PartialChunkTrafficKeepsSessionAlive(t *testing.T) { - srv := &stubConnectorServer{ - data: func(stream transportpb.TransportConnectorService_DataServer) error { - base := &transportpb.StreamEnvelope{ - StreamId: "downstream-step", - Sequence: 12, - Partition: "p0", - ChunkId: "chunk-keepalive", - ChunkCount: 3, - } - parts := []string{"hel", "lo-", "world"} - for idx, part := range parts { - env := cloneStreamEnvelope(base) - env.ChunkIndex = uint32(idx) - env.ChunkBytes = uint32(len(part)) - require.NoError(t, stream.Send(&transportpb.DataResponse{ - Metadata: map[string]string{ - metadataEnvelopeKindKey: "data", - }, - Envelope: env, - Frame: &transportpb.DataResponse_Binary{ - Binary: &transportpb.BinaryFrame{ - Payload: []byte(part), - MimeType: "text/plain", - }, - }, - })) - if idx < len(parts)-1 { - time.Sleep(45 * time.Millisecond) - } - } - return nil - }, - } - - client := newTestTransportConnectorClient(t, srv) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - stream, err := client.Data(ctx) - require.NoError(t, err) - - hw := newHangWatcher(ctx, 70*time.Millisecond, cancel) - defer hw.Stop() - - in := make(chan engram.InboundMessage, 2) - err = connectorDataRecvLoop(ctx, stream, in, streamRuntimeOptions{ - hangWatcher: hw, - }) - require.NoError(t, err) - require.Len(t, in, 1) - - msg := <-in - require.Equal(t, uint64(12), msg.Envelope.GetSequence()) - require.Equal(t, []byte("hello-world"), msg.Binary.Payload) -} - -func TestConnectorDataRecvLoop_SharedDeduperSuppressesCompletedMessageIDAcrossSessions(t *testing.T) { - payload := &envelope.Envelope{ - Version: envelope.LatestVersion, - Kind: "data", - MessageID: "shared-message-id", - Payload: []byte(`{"ok":true}`), - } - frame, err := envelope.ToBinaryFrame(payload) - require.NoError(t, err) - - srv := &stubConnectorServer{ - data: func(stream transportpb.TransportConnectorService_DataServer) error { - return stream.Send(&transportpb.DataResponse{ - Metadata: map[string]string{ - "kind": "data", - metadataEnvelopeMessageIDKey: "shared-message-id", - }, - Frame: &transportpb.DataResponse_Binary{Binary: frame}, - }) - }, - } - - client := newTestTransportConnectorClient(t, srv) - ctx := t.Context() - sharedDeduper := newPacketDeduper(defaultPacketDedupeEntries) - - stream1, err := client.Data(ctx) - require.NoError(t, err) - first := make(chan engram.InboundMessage, 2) - require.NoError(t, connectorDataRecvLoop(ctx, stream1, first, streamRuntimeOptions{packetDeduper: sharedDeduper})) - require.Len(t, first, 1) - (<-first).Done() - - stream2, err := client.Data(ctx) - require.NoError(t, err) - second := make(chan engram.InboundMessage, 2) - require.NoError(t, connectorDataRecvLoop(ctx, stream2, second, streamRuntimeOptions{packetDeduper: sharedDeduper})) - require.Len(t, second, 0) -} - -func TestConnectorDataRecvLoop_RedeliversUnprocessedMessageIDAcrossSessions(t *testing.T) { - payload := &envelope.Envelope{ - Version: envelope.LatestVersion, - Kind: "data", - MessageID: "shared-message-id", - Payload: []byte(`{"ok":true}`), - } - frame, err := envelope.ToBinaryFrame(payload) - require.NoError(t, err) - - srv := &stubConnectorServer{ - data: func(stream transportpb.TransportConnectorService_DataServer) error { - return stream.Send(&transportpb.DataResponse{ - Metadata: map[string]string{ - "kind": "data", - metadataEnvelopeMessageIDKey: "shared-message-id", - }, - Frame: &transportpb.DataResponse_Binary{Binary: frame}, - }) - }, - } - - client := newTestTransportConnectorClient(t, srv) - ctx := t.Context() - sharedDeduper := newPacketDeduper(defaultPacketDedupeEntries) - - stream1, err := client.Data(ctx) - require.NoError(t, err) - first := make(chan engram.InboundMessage, 2) - require.NoError(t, connectorDataRecvLoop(ctx, stream1, first, streamRuntimeOptions{packetDeduper: sharedDeduper})) - require.Len(t, first, 1) - - stream2, err := client.Data(ctx) - require.NoError(t, err) - second := make(chan engram.InboundMessage, 2) - require.NoError(t, connectorDataRecvLoop(ctx, stream2, second, streamRuntimeOptions{packetDeduper: sharedDeduper})) - require.Len(t, second, 1) -} - -func TestConnectorDataRecvLoop_RedeliversUnprocessedSequencedPacketAcrossSessions(t *testing.T) { - payload, err := structpb.NewStruct(map[string]any{"foo": "bar"}) - require.NoError(t, err) - packet := &transportpb.DataPacket{ - Metadata: map[string]string{ - metadataEnvelopeKindKey: "data", - }, - Envelope: &transportpb.StreamEnvelope{ - StreamId: "downstream-step", - Sequence: 5, - Partition: "p0", - }, - Payload: payload, - Frame: &transportpb.DataPacket_Binary{ - Binary: &transportpb.BinaryFrame{ - Payload: []byte("bin"), - MimeType: "text/plain", - }, - }, - } - - srv := &stubConnectorServer{ - data: func(stream transportpb.TransportConnectorService_DataServer) error { - return stream.Send(&transportpb.DataResponse{ - Metadata: packet.GetMetadata(), - Envelope: packet.GetEnvelope(), - Payload: packet.GetPayload(), - Frame: &transportpb.DataResponse_Binary{Binary: packet.GetBinary()}, - }) - }, - } - - client := newTestTransportConnectorClient(t, srv) - ctx := t.Context() - sharedDeduper := newPacketDeduper(defaultPacketDedupeEntries) - receipts := make(chan *transportpb.ControlRequest, 2) - - stream1, err := client.Data(ctx) - require.NoError(t, err) - first := make(chan engram.InboundMessage, 2) - require.NoError(t, connectorDataRecvLoop(ctx, stream1, first, streamRuntimeOptions{ - packetDeduper: sharedDeduper, - controlRequests: receipts, - })) - require.Len(t, first, 1) - - stream2, err := client.Data(ctx) - require.NoError(t, err) - second := make(chan engram.InboundMessage, 2) - require.NoError(t, connectorDataRecvLoop(ctx, stream2, second, streamRuntimeOptions{ - packetDeduper: sharedDeduper, - controlRequests: receipts, - })) - require.Len(t, second, 1) - - select { - case receipt := <-receipts: - t.Fatalf("unexpected receipt before replayed packet completed: %+v", receipt) - default: - } - - (<-second).Done() - receipt := <-receipts - require.Equal(t, downstreamDeliveryReceiptType, receipt.GetCustomAction()) - require.Equal(t, "downstream-step", receipt.GetMetadata()[deliveryReceiptStreamIDKey]) - require.Equal(t, "5", receipt.GetMetadata()[deliveryReceiptSequenceKey]) - require.Equal(t, "p0", receipt.GetMetadata()[deliveryReceiptPartitionKey]) -} - -func TestConnectorDataRecvLoop_DeliveryReceiptQueueSaturatedDoesNotHangDoneAndAllowsReplay(t *testing.T) { - payload, err := structpb.NewStruct(map[string]any{"foo": "bar"}) - require.NoError(t, err) - packet := &transportpb.DataPacket{ - Metadata: map[string]string{ - metadataEnvelopeKindKey: "data", - }, - Envelope: &transportpb.StreamEnvelope{ - StreamId: "downstream-step", - Sequence: 5, - Partition: "p0", - }, - Payload: payload, - Frame: &transportpb.DataPacket_Binary{ - Binary: &transportpb.BinaryFrame{ - Payload: []byte("bin"), - MimeType: "text/plain", - }, - }, - } - - srv := &stubConnectorServer{ - data: func(stream transportpb.TransportConnectorService_DataServer) error { - return stream.Send(&transportpb.DataResponse{ - Metadata: packet.GetMetadata(), - Envelope: packet.GetEnvelope(), - Payload: packet.GetPayload(), - Frame: &transportpb.DataResponse_Binary{Binary: packet.GetBinary()}, - }) - }, - } - - client := newTestTransportConnectorClient(t, srv) - ctx := t.Context() - - sharedDeduper := newPacketDeduper(defaultPacketDedupeEntries) - controlRequests := make(chan *transportpb.ControlRequest, 1) - controlRequests <- &transportpb.ControlRequest{CustomAction: "pre-filled"} - - stream1, err := client.Data(ctx) - require.NoError(t, err) - first := make(chan engram.InboundMessage, 2) - require.NoError(t, connectorDataRecvLoop(ctx, stream1, first, streamRuntimeOptions{ - packetDeduper: sharedDeduper, - controlRequests: controlRequests, - })) - require.Len(t, first, 1) - - doneReturned := make(chan struct{}) - go func() { - (<-first).Done() - close(doneReturned) - }() - select { - case <-doneReturned: - case <-time.After(250 * time.Millisecond): - t.Fatal("expected processing completion to return even when delivery receipt queue is saturated") - } - - stream2, err := client.Data(ctx) - require.NoError(t, err) - second := make(chan engram.InboundMessage, 2) - require.NoError(t, connectorDataRecvLoop(ctx, stream2, second, streamRuntimeOptions{ - packetDeduper: sharedDeduper, - controlRequests: controlRequests, - })) - require.Len(t, second, 1) -} - -func TestConnectorDataRecvLoop_SuppressesCompletedSequencedPacketAcrossSessionsAndResendsReceipt(t *testing.T) { - payload, err := structpb.NewStruct(map[string]any{"foo": "bar"}) - require.NoError(t, err) - packet := &transportpb.DataPacket{ - Metadata: map[string]string{ - metadataEnvelopeKindKey: "data", - }, - Envelope: &transportpb.StreamEnvelope{ - StreamId: "downstream-step", - Sequence: 5, - Partition: "p0", - }, - Payload: payload, - Frame: &transportpb.DataPacket_Binary{ - Binary: &transportpb.BinaryFrame{ - Payload: []byte("bin"), - MimeType: "text/plain", - }, - }, - } - - srv := &stubConnectorServer{ - data: func(stream transportpb.TransportConnectorService_DataServer) error { - return stream.Send(&transportpb.DataResponse{ - Metadata: packet.GetMetadata(), - Envelope: packet.GetEnvelope(), - Payload: packet.GetPayload(), - Frame: &transportpb.DataResponse_Binary{Binary: packet.GetBinary()}, - }) - }, - } - - client := newTestTransportConnectorClient(t, srv) - ctx := t.Context() - sharedDeduper := newPacketDeduper(defaultPacketDedupeEntries) - receipts := make(chan *transportpb.ControlRequest, 4) - - stream1, err := client.Data(ctx) - require.NoError(t, err) - first := make(chan engram.InboundMessage, 2) - require.NoError(t, connectorDataRecvLoop(ctx, stream1, first, streamRuntimeOptions{ - packetDeduper: sharedDeduper, - controlRequests: receipts, - })) - require.Len(t, first, 1) - (<-first).Done() - - firstReceipt := <-receipts - require.Equal(t, downstreamDeliveryReceiptType, firstReceipt.GetCustomAction()) - require.Equal(t, "downstream-step", firstReceipt.GetMetadata()[deliveryReceiptStreamIDKey]) - require.Equal(t, "5", firstReceipt.GetMetadata()[deliveryReceiptSequenceKey]) - require.Equal(t, "p0", firstReceipt.GetMetadata()[deliveryReceiptPartitionKey]) - - stream2, err := client.Data(ctx) - require.NoError(t, err) - second := make(chan engram.InboundMessage, 2) - require.NoError(t, connectorDataRecvLoop(ctx, stream2, second, streamRuntimeOptions{ - packetDeduper: sharedDeduper, - controlRequests: receipts, - })) - require.Len(t, second, 0) - - replayReceipt := <-receipts - require.Equal(t, downstreamDeliveryReceiptType, replayReceipt.GetCustomAction()) - require.Equal(t, "downstream-step", replayReceipt.GetMetadata()[deliveryReceiptStreamIDKey]) - require.Equal(t, "5", replayReceipt.GetMetadata()[deliveryReceiptSequenceKey]) - require.Equal(t, "p0", replayReceipt.GetMetadata()[deliveryReceiptPartitionKey]) -} diff --git a/stream_context.go b/stream_context.go deleted file mode 100644 index 7a4e13c..0000000 --- a/stream_context.go +++ /dev/null @@ -1,240 +0,0 @@ -package sdk - -import ( - "encoding/json" - "fmt" - "strconv" - "strings" - "time" - - "github.com/bubustack/bubu-sdk-go/engram" - "github.com/bubustack/tractatus/envelope" - transportpb "github.com/bubustack/tractatus/gen/go/proto/transport/v1" - "google.golang.org/protobuf/types/known/structpb" -) - -const ( - metadataEnvelopeKindKey = "bubu.envelope.kind" - metadataEnvelopeMessageIDKey = "bubu.envelope.message_id" - metadataEnvelopeTimeKey = "bubu.envelope.timestamp_ms" -) - -func applyStreamContextToPublishRequest(req *transportpb.PublishRequest, msg engram.StreamMessage) error { - if req == nil { - return fmt.Errorf("publish request is nil") - } - - if meta := encodeStreamMetadata(msg); len(meta) > 0 { - req.Metadata = meta - } - - if shouldEncodeStructuredStreamPayload(req) { - payload, err := jsonBytesToStruct(msg.Payload) - if err != nil { - return fmt.Errorf("payload decode failed: %w", err) - } - if payload != nil { - req.Payload = payload - } - - inputs, err := jsonBytesToStruct(msg.Inputs) - if err != nil { - return fmt.Errorf("inputs decode failed: %w", err) - } - if inputs != nil { - req.Inputs = inputs - } - } - - transports, err := transportsToProto(msg.Transports) - if err != nil { - return err - } - if len(transports) > 0 { - req.Transports = transports - } - - if msg.Envelope != nil { - req.Envelope = cloneStreamEnvelope(msg.Envelope) - } - - return nil -} - -func shouldEncodeStructuredStreamPayload(req *transportpb.PublishRequest) bool { - if req == nil { - return true - } - binary := req.GetBinary() - if binary == nil { - return true - } - return strings.TrimSpace(binary.GetMimeType()) == envelope.MIMEType -} - -func mergeRequestContextIntoStreamMessage(req *transportpb.PublishRequest, msg *engram.StreamMessage) error { - if req == nil || msg == nil { - return nil - } - - hydrateStreamMessageMetadata(msg, req.GetMetadata()) - - if len(msg.Payload) == 0 { - payload, err := structToJSONBytes(req.GetPayload()) - if err != nil { - return fmt.Errorf("payload marshal failed: %w", err) - } - if len(payload) > 0 { - msg.Payload = payload - } - } - - if len(msg.Inputs) == 0 { - inputs, err := structToJSONBytes(req.GetInputs()) - if err != nil { - return fmt.Errorf("inputs marshal failed: %w", err) - } - if len(inputs) > 0 { - msg.Inputs = inputs - } - } - - if len(msg.Transports) == 0 { - msg.Transports = transportsFromProto(req.GetTransports()) - } - - if msg.Envelope == nil && req.GetEnvelope() != nil { - msg.Envelope = cloneStreamEnvelope(req.GetEnvelope()) - } - - return nil -} - -func cloneStreamEnvelope(env *transportpb.StreamEnvelope) *transportpb.StreamEnvelope { - if env == nil { - return nil - } - return &transportpb.StreamEnvelope{ - StreamId: env.GetStreamId(), - Sequence: env.GetSequence(), - Partition: env.GetPartition(), - ChunkId: env.GetChunkId(), - ChunkIndex: env.GetChunkIndex(), - ChunkCount: env.GetChunkCount(), - ChunkBytes: env.GetChunkBytes(), - TotalBytes: env.GetTotalBytes(), - } -} - -func encodeStreamMetadata(msg engram.StreamMessage) map[string]string { - meta := cloneStringMap(msg.Metadata) - if msg.Kind == "" && msg.MessageID == "" && msg.Timestamp.IsZero() { - return meta - } - if meta == nil { - meta = make(map[string]string, 3) - } - if msg.Kind != "" { - meta[metadataEnvelopeKindKey] = strings.TrimSpace(msg.Kind) - } - if msg.MessageID != "" { - meta[metadataEnvelopeMessageIDKey] = strings.TrimSpace(msg.MessageID) - } - if !msg.Timestamp.IsZero() { - meta[metadataEnvelopeTimeKey] = strconv.FormatInt(msg.Timestamp.UTC().UnixMilli(), 10) - } - return meta -} - -func hydrateStreamMessageMetadata(msg *engram.StreamMessage, metadata map[string]string) { - if msg == nil || len(metadata) == 0 { - return - } - cloned := cloneStringMap(metadata) - if kind, ok := cloned[metadataEnvelopeKindKey]; ok { - msg.Kind = strings.TrimSpace(kind) - delete(cloned, metadataEnvelopeKindKey) - } - if messageID, ok := cloned[metadataEnvelopeMessageIDKey]; ok { - msg.MessageID = strings.TrimSpace(messageID) - delete(cloned, metadataEnvelopeMessageIDKey) - } - if ts, ok := cloned[metadataEnvelopeTimeKey]; ok { - if parsed, err := strconv.ParseInt(ts, 10, 64); err == nil && parsed > 0 { - msg.Timestamp = time.UnixMilli(parsed).UTC() - } - delete(cloned, metadataEnvelopeTimeKey) - } - if len(cloned) > 0 { - msg.Metadata = cloned - } -} - -func jsonBytesToStruct(data []byte) (*structpb.Struct, error) { - if len(data) == 0 { - return nil, nil - } - var payload map[string]any - if err := json.Unmarshal(data, &payload); err != nil { - return nil, err - } - return structpb.NewStruct(payload) -} - -func structToJSONBytes(st *structpb.Struct) ([]byte, error) { - if st == nil { - return nil, nil - } - return st.MarshalJSON() -} - -func transportsToProto(transports []engram.TransportDescriptor) ([]*transportpb.TransportDescriptor, error) { - if len(transports) == 0 { - return nil, nil - } - out := make([]*transportpb.TransportDescriptor, len(transports)) - for i := range transports { - td := transports[i] - var cfg *structpb.Struct - if len(td.Config) > 0 { - var err error - cfg, err = structpb.NewStruct(td.Config) - if err != nil { - return nil, fmt.Errorf("transport %q config encode failed: %w", td.Name, err) - } - } - out[i] = &transportpb.TransportDescriptor{ - Name: td.Name, - Kind: td.Kind, - Mode: td.Mode, - Config: cfg, - } - } - return out, nil -} - -func transportsFromProto(src []*transportpb.TransportDescriptor) []engram.TransportDescriptor { - if len(src) == 0 { - return nil - } - out := make([]engram.TransportDescriptor, 0, len(src)) - for _, td := range src { - if td == nil { - continue - } - var cfg map[string]any - if st := td.GetConfig(); st != nil { - cfg = cloneConfigMap(st.AsMap()) - } - out = append(out, engram.TransportDescriptor{ - Name: td.GetName(), - Kind: td.GetKind(), - Mode: td.GetMode(), - Config: cfg, - }) - } - if len(out) == 0 { - return nil - } - return out -} diff --git a/stream_contract_test.go b/stream_contract_test.go deleted file mode 100644 index 435bee7..0000000 --- a/stream_contract_test.go +++ /dev/null @@ -1,79 +0,0 @@ -package sdk - -import ( - "encoding/json" - "testing" - - "github.com/bubustack/bubu-sdk-go/engram" - "github.com/bubustack/tractatus/envelope" - "github.com/stretchr/testify/require" -) - -// TestStreamMessageToPublishRequestRejectsEmpty verifies that a message with no -// Kind, no payload, no audio/video/binary, and no inputs is rejected. -func TestStreamMessageToPublishRequestRejectsEmpty(t *testing.T) { - msg := engram.StreamMessage{} // truly empty - _, err := streamMessageToPublishRequest(msg) - require.Error(t, err) -} - -// TestStreamMessageToPublishRequestAllowsKindOnly verifies that a metadata-only -// message with a non-empty Kind is published as an envelope (Fix 3.14). -func TestStreamMessageToPublishRequestAllowsKindOnly(t *testing.T) { - msg := engram.StreamMessage{ - Kind: "telemetry", - Metadata: map[string]string{"k": "v"}, - } - req, err := streamMessageToPublishRequest(msg) - require.NoError(t, err) - require.NotNil(t, req.GetBinary(), "expected envelope binary frame") -} - -func TestStreamMessageToPublishRequestEnvelopeForPayload(t *testing.T) { - payload := []byte(`{"ok":true}`) - msg := engram.StreamMessage{ - Kind: "data", - Payload: payload, - } - - req, err := streamMessageToPublishRequest(msg) - require.NoError(t, err) - require.NotNil(t, req.GetBinary()) - - frame := req.GetBinary() - require.Equal(t, envelope.MIMEType, frame.GetMimeType()) - env, err := envelope.FromBinaryFrame(frame) - require.NoError(t, err) - - var decoded map[string]any - require.NoError(t, json.Unmarshal(env.Payload, &decoded)) - require.Equal(t, true, decoded["ok"]) -} - -func TestStreamMessageToPublishRequestRawBinaryWithInputsUsesEnvelope(t *testing.T) { - payload := []byte(`{"text":"hello"}`) - inputs := []byte(`{"userPrompt":"hello"}`) - msg := engram.StreamMessage{ - Kind: "data", - Payload: payload, - Inputs: inputs, - Binary: &engram.BinaryFrame{ - Payload: payload, - MimeType: "application/json", - }, - } - - req, err := streamMessageToPublishRequest(msg) - require.NoError(t, err) - require.NotNil(t, req.GetBinary()) - require.Equal(t, envelope.MIMEType, req.GetBinary().GetMimeType()) - require.NotNil(t, req.GetInputs()) - - roundTrip, err := publishRequestToStreamMessage(req) - require.NoError(t, err) - require.Equal(t, "data", roundTrip.Kind) - require.Equal(t, payload, roundTrip.Payload) - require.Equal(t, inputs, roundTrip.Inputs) - require.NotNil(t, roundTrip.Binary) - require.Equal(t, "application/json", roundTrip.Binary.MimeType) -} diff --git a/stream_error.go b/stream_error.go deleted file mode 100644 index 1ed7cbd..0000000 --- a/stream_error.go +++ /dev/null @@ -1,55 +0,0 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package sdk - -import ( - "encoding/json" - "fmt" - "strings" - - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - "github.com/bubustack/bubu-sdk-go/engram" -) - -// NewStreamErrorMessage wraps a StructuredError into a StreamMessage payload with Kind "error". -func NewStreamErrorMessage( - errObj runsv1alpha1.StructuredError, - opts ...StreamMessageOption) (engram.StreamMessage, - error, -) { - raw, err := json.Marshal(errObj) - if err != nil { - return engram.StreamMessage{}, fmt.Errorf("marshal structured error: %w", err) - } - options := append([]StreamMessageOption{WithJSONPayload(raw)}, opts...) - return NewStreamMessage(engram.StreamMessageKindError, options...), nil -} - -// ParseStreamErrorMessage extracts StructuredError payloads from StreamMessage Kind "error". -func ParseStreamErrorMessage(msg engram.StreamMessage) (runsv1alpha1.StructuredError, bool, error) { - if strings.TrimSpace(msg.Kind) != engram.StreamMessageKindError { - return runsv1alpha1.StructuredError{}, false, nil - } - if len(msg.Payload) == 0 { - return runsv1alpha1.StructuredError{}, true, fmt.Errorf("stream error message missing payload") - } - var out runsv1alpha1.StructuredError - if err := json.Unmarshal(msg.Payload, &out); err != nil { - return runsv1alpha1.StructuredError{}, true, fmt.Errorf("stream error payload invalid: %w", err) - } - return out, true, nil -} diff --git a/stream_message_builder.go b/stream_message_builder.go deleted file mode 100644 index 3e0ed95..0000000 --- a/stream_message_builder.go +++ /dev/null @@ -1,128 +0,0 @@ -package sdk - -import ( - "encoding/json" - "maps" - "strings" - "time" - - "github.com/bubustack/bubu-sdk-go/engram" - transportpb "github.com/bubustack/tractatus/gen/go/proto/transport/v1" -) - -// StreamMessageOption configures an engram.StreamMessage produced via NewStreamMessage. -type StreamMessageOption func(*engram.StreamMessage) - -// NewStreamMessage constructs an engram.StreamMessage pre-populated with the provided options. -// It trims the kind identifier and applies any options in order. Note that metadata-only -// messages are invalid at send time; at least one of audio/video/binary payloads, JSON payload, -// inputs, or transports must be populated for the message to be published. -func NewStreamMessage(kind string, opts ...StreamMessageOption) engram.StreamMessage { - msg := engram.StreamMessage{Kind: strings.TrimSpace(kind)} - for _, opt := range opts { - if opt != nil { - opt(&msg) - } - } - return msg -} - -// WithMessageID sets the message identifier for correlation across steps. -func WithMessageID(id string) StreamMessageOption { - return func(msg *engram.StreamMessage) { - msg.MessageID = strings.TrimSpace(id) - } -} - -// WithTimestamp overrides the message timestamp. Zero values are ignored. -func WithTimestamp(ts time.Time) StreamMessageOption { - return func(msg *engram.StreamMessage) { - if !ts.IsZero() { - msg.Timestamp = ts.UTC() - } - } -} - -// WithMetadata merges the supplied metadata into the message, cloning the map to prevent -// callers from mutating shared state. -func WithMetadata(metadata map[string]string) StreamMessageOption { - return func(msg *engram.StreamMessage) { - if len(metadata) == 0 { - return - } - if msg.Metadata == nil { - msg.Metadata = make(map[string]string, len(metadata)) - } - maps.Copy(msg.Metadata, metadata) - } -} - -// WithJSONPayload attaches a JSON payload (already marshaled). The byte slice is copied. -func WithJSONPayload(payload []byte) StreamMessageOption { - return func(msg *engram.StreamMessage) { - if len(payload) == 0 { - return - } - msg.Payload = copyBytes(payload) - } -} - -// WithInputs attaches CEL-evaluated inputs (already marshaled JSON). -func WithInputs(inputs []byte) StreamMessageOption { - return func(msg *engram.StreamMessage) { - if len(inputs) == 0 { - return - } - msg.Inputs = copyBytes(inputs) - } -} - -// WithTransports records the story's declared transports for downstream inspection. -func WithTransports(descriptors []engram.TransportDescriptor) StreamMessageOption { - return func(msg *engram.StreamMessage) { - if len(descriptors) == 0 { - return - } - msg.Transports = make([]engram.TransportDescriptor, len(descriptors)) - for i := range descriptors { - msg.Transports[i] = descriptors[i].Clone() - } - } -} - -// WithStreamEnvelope attaches transport-layer stream sequencing metadata. -func WithStreamEnvelope(env *transportpb.StreamEnvelope) StreamMessageOption { - return func(msg *engram.StreamMessage) { - if env == nil { - return - } - msg.Envelope = cloneStreamEnvelope(env) - } -} - -// WithBinaryPayload attaches an arbitrary binary payload plus MIME type. The payload is copied. -func WithBinaryPayload(mime string, payload []byte, timestamp time.Duration) StreamMessageOption { - return func(msg *engram.StreamMessage) { - if len(payload) == 0 { - return - } - msg.Binary = &engram.BinaryFrame{ - Payload: copyBytes(payload), - MimeType: strings.TrimSpace(mime), - Timestamp: timestamp, - } - } -} - -// WithJSONData marshals the provided value to JSON and attaches it as the payload. It returns -// an option alongside any marshaling error so callers can handle failures inline. -func WithJSONData(v any) (StreamMessageOption, error) { - if v == nil { - return nil, nil - } - bytes, err := json.Marshal(v) - if err != nil { - return nil, err - } - return WithJSONPayload(bytes), nil -} diff --git a/stream_message_builder_test.go b/stream_message_builder_test.go deleted file mode 100644 index ff0b9af..0000000 --- a/stream_message_builder_test.go +++ /dev/null @@ -1,97 +0,0 @@ -package sdk - -import ( - "testing" - "time" - - "github.com/bubustack/bubu-sdk-go/engram" - "github.com/stretchr/testify/require" -) - -func TestNewStreamMessageOptions(t *testing.T) { - ts := time.Unix(1717171717, 0).UTC() - msg := NewStreamMessage( - "telemetry", - WithMessageID("abc-123"), - WithTimestamp(ts), - WithMetadata(map[string]string{"foo": "bar"}), - WithJSONPayload([]byte(`{"ok":true}`)), - WithInputs([]byte(`{"with":1}`)), - WithTransports([]engram.TransportDescriptor{{Name: "default", Kind: "livekit"}}), - ) - - require.Equal(t, "telemetry", msg.Kind) - require.Equal(t, "abc-123", msg.MessageID) - require.Equal(t, ts, msg.Timestamp) - require.Equal(t, map[string]string{"foo": "bar"}, msg.Metadata) - require.Equal(t, []byte(`{"ok":true}`), msg.Payload) - require.Equal(t, []byte(`{"with":1}`), msg.Inputs) - require.Len(t, msg.Transports, 1) - require.Equal(t, "default", msg.Transports[0].Name) - require.Equal(t, "livekit", msg.Transports[0].Kind) -} - -func TestWithBinaryPayload(t *testing.T) { - ts := time.Second - msg := NewStreamMessage("binary", WithBinaryPayload("application/octet-stream", []byte{0x01, 0x02}, ts)) - require.NotNil(t, msg.Binary) - require.Equal(t, []byte{0x01, 0x02}, msg.Binary.Payload) - require.Equal(t, "application/octet-stream", msg.Binary.MimeType) - require.Equal(t, ts, msg.Binary.Timestamp) -} - -func TestWithJSONData(t *testing.T) { - opt, err := WithJSONData(map[string]any{"value": 42}) - require.NoError(t, err) - - msg := NewStreamMessage("json", opt) - require.Equal(t, []byte(`{"value":42}`), msg.Payload) -} - -func TestWithJSONDataError(t *testing.T) { - ch := make(chan struct{}) // channels cannot be marshaled - opt, err := WithJSONData(ch) - require.Nil(t, opt) - require.Error(t, err) -} - -func TestWithMetadataCopies(t *testing.T) { - meta := map[string]string{"foo": "bar"} - msg := NewStreamMessage("copy", WithMetadata(meta)) - meta["foo"] = "baz" - require.Equal(t, "bar", msg.Metadata["foo"]) -} - -func TestWithTransportsClone(t *testing.T) { - transports := []engram.TransportDescriptor{{Name: "a"}} - msg := NewStreamMessage("transports", WithTransports(transports)) - require.Equal(t, "a", msg.Transports[0].Name) - transports[0].Name = "b" - require.Equal(t, "a", msg.Transports[0].Name) -} - -func TestNewStreamMessageTrimsKind(t *testing.T) { - msg := NewStreamMessage(" telemetry ") - require.Equal(t, "telemetry", msg.Kind) -} - -func TestWithJSONPayloadCopies(t *testing.T) { - payload := []byte(`{"a":1}`) - msg := NewStreamMessage("payload", WithJSONPayload(payload)) - payload[0] = '!' - require.Equal(t, []byte(`{"a":1}`), msg.Payload) -} - -func TestWithInputsCopies(t *testing.T) { - inputs := []byte(`{"b":2}`) - msg := NewStreamMessage("inputs", WithInputs(inputs)) - inputs[0] = '!' - require.Equal(t, []byte(`{"b":2}`), msg.Inputs) -} - -func TestWithJSONDataNil(t *testing.T) { - opt, err := WithJSONData(nil) - require.NoError(t, err) - msg := NewStreamMessage("nil", opt) - require.Nil(t, msg.Payload) -} diff --git a/stream_test.go b/stream_test.go deleted file mode 100644 index e6fc41d..0000000 --- a/stream_test.go +++ /dev/null @@ -1,1582 +0,0 @@ -package sdk - -import ( - "context" - "errors" - "fmt" - "io" - "runtime" - "strings" - "sync" - "testing" - "time" - - "github.com/bubustack/bubu-sdk-go/engram" - "github.com/bubustack/core/contracts" - coretransport "github.com/bubustack/core/runtime/transport" - transportpb "github.com/bubustack/tractatus/gen/go/proto/transport/v1" - "google.golang.org/grpc" - "google.golang.org/grpc/codes" - "google.golang.org/grpc/metadata" - "google.golang.org/grpc/status" -) - -func TestResolveChannelBufferSizeFromEnv(t *testing.T) { - env := newEnvResolver(map[string]string{contracts.GRPCChannelBufferSizeEnv: "64"}) - if size := resolveChannelBufferSize(env); size != 64 { - t.Fatalf("expected buffer size 64, got %d", size) - } -} - -func TestResolveChannelBufferSizeDefault(t *testing.T) { - env := newEnvResolver(map[string]string{contracts.GRPCChannelBufferSizeEnv: "invalid"}) - if size := resolveChannelBufferSize(env); size != DefaultChannelBufferSize { - t.Fatalf("expected default buffer size %d, got %d", DefaultChannelBufferSize, size) - } -} - -func TestResolveReconnectPolicyDefaults(t *testing.T) { - env := newEnvResolver(nil) - policy := resolveReconnectPolicy(env) - if policy.base != defaultReconnectBaseBackoff { - t.Fatalf("expected base backoff %s, got %s", defaultReconnectBaseBackoff, policy.base) - } - if policy.max != defaultReconnectMaxBackoff { - t.Fatalf("expected max backoff %s, got %s", defaultReconnectMaxBackoff, policy.max) - } - if policy.maxRetries != defaultReconnectMaxRetries { - t.Fatalf("expected max retries %d, got %d", defaultReconnectMaxRetries, policy.maxRetries) - } -} - -func TestResolveReconnectPolicyFromEnv(t *testing.T) { - env := newEnvResolver(map[string]string{ - contracts.GRPCReconnectBaseBackoffEnv: "750ms", - contracts.GRPCReconnectMaxBackoffEnv: "45s", - contracts.GRPCReconnectMaxRetriesEnv: "5", - }) - - policy := resolveReconnectPolicy(env) - if policy.base != 750*time.Millisecond { - t.Fatalf("expected base backoff 750ms, got %s", policy.base) - } - if policy.max != 45*time.Second { - t.Fatalf("expected max backoff 45s, got %s", policy.max) - } - if policy.maxRetries != 5 { - t.Fatalf("expected max retries 5, got %d", policy.maxRetries) - } -} - -func TestResolveMessageTimeoutDefault(t *testing.T) { - env := newEnvResolver(nil) - if timeout := resolveMessageTimeout(env); timeout != defaultMessageTimeout { - t.Fatalf("expected default message timeout %s, got %s", defaultMessageTimeout, timeout) - } -} - -func TestResolveMessageTimeoutOverride(t *testing.T) { - env := newEnvResolver(map[string]string{contracts.GRPCMessageTimeoutEnv: "45s"}) - if timeout := resolveMessageTimeout(env); timeout != 45*time.Second { - t.Fatalf("expected override timeout 45s, got %s", timeout) - } -} - -func TestResolvePublishHeartbeatIntervalDefault(t *testing.T) { - env := newEnvResolver(nil) - if interval := resolvePublishHeartbeatInterval(env, defaultMessageTimeout); interval != defaultPublishHeartbeatInterval { //nolint:lll - t.Fatalf("expected default publish heartbeat %s, got %s", defaultPublishHeartbeatInterval, interval) - } -} - -func TestResolvePublishHeartbeatIntervalFromEnv(t *testing.T) { - env := newEnvResolver(map[string]string{contracts.GRPCHeartbeatIntervalEnv: "7s"}) - if interval := resolvePublishHeartbeatInterval(env, defaultMessageTimeout); interval != 7*time.Second { - t.Fatalf("expected publish heartbeat override 7s, got %s", interval) - } -} - -func TestResolvePublishHeartbeatIntervalClampsBelowMessageTimeout(t *testing.T) { - env := newEnvResolver(map[string]string{contracts.GRPCHeartbeatIntervalEnv: "45s"}) - want := 30*time.Second - time.Nanosecond - if interval := resolvePublishHeartbeatInterval(env, 30*time.Second); interval != want { - t.Fatalf("expected clamped publish heartbeat %s, got %s", want, interval) - } -} - -func TestResolveChannelSendTimeout(t *testing.T) { - env := newEnvResolver(map[string]string{contracts.GRPCChannelSendTimeoutEnv: "5s"}) - if timeout := resolveChannelSendTimeout(env); timeout != 5*time.Second { - t.Fatalf("expected channel send timeout 5s, got %s", timeout) - } -} - -func TestResolveHangTimeout(t *testing.T) { - env := newEnvResolver(map[string]string{contracts.GRPCHangTimeoutEnv: "0"}) - if timeout := resolveHangTimeout(env); timeout != 0 { - t.Fatalf("expected zero timeout when env is 0, got %s", timeout) - } - env = newEnvResolver(map[string]string{contracts.GRPCHangTimeoutEnv: "20s"}) - if timeout := resolveHangTimeout(env); timeout != 20*time.Second { - t.Fatalf("expected hang timeout 20s, got %s", timeout) - } -} - -func TestDrainStreamMessagesDrainsBufferedValues(t *testing.T) { - out := make(chan engram.StreamMessage, 2) - out <- engram.StreamMessage{Kind: "one"} - out <- engram.StreamMessage{Kind: "two"} - - drainStreamMessages(out) - - select { - case msg := <-out: - t.Fatalf("expected buffered messages to be drained, got %+v", msg) - default: - } -} - -func TestDrainStreamMessagesDoesNotBlockOnOpenChannel(t *testing.T) { - out := make(chan engram.StreamMessage) - done := make(chan struct{}) - - go func() { - drainStreamMessages(out) - close(done) - }() - - select { - case <-done: - case <-time.After(100 * time.Millisecond): - t.Fatal("drainStreamMessages blocked on an open channel") - } -} - -func TestSendControlMessageRespectsCanceledContextWhenChannelFull(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - recvCh := make(chan controlMessage, 1) - recvCh <- controlMessage{response: nil} - cancel() - - done := make(chan bool, 1) - go func() { - done <- sendControlMessage(ctx, recvCh, controlMessage{err: context.Canceled}) - }() - - select { - case delivered := <-done: - if delivered { - t.Fatal("expected sendControlMessage to stop when context is canceled") - } - case <-time.After(100 * time.Millisecond): - t.Fatal("sendControlMessage blocked on a full channel after cancellation") - } -} - -func TestQueueControlRequestClosedChannelDoesNotPanic(t *testing.T) { - controlRequests := make(chan *transportpb.ControlRequest) - close(controlRequests) - req := &transportpb.ControlRequest{CustomAction: "test"} - - done := make(chan struct{}, 1) - go func() { - queueControlRequest(context.Background(), controlRequests, req) - close(done) - }() - - select { - case <-done: - case <-time.After(100 * time.Millisecond): - t.Fatal("queueControlRequest blocked on closed channel") - } -} - -func TestQueueControlRequestCanceledContextSkipsSendWhenQueueAvailable(t *testing.T) { - controlRequests := make(chan *transportpb.ControlRequest, 1) - req := &transportpb.ControlRequest{CustomAction: "test"} - ctx, cancel := context.WithCancel(context.Background()) - cancel() - - queueControlRequest(ctx, controlRequests, req) - - select { - case got := <-controlRequests: - t.Fatalf("expected canceled context to skip send, got %+v", got) - default: - } -} - -func TestQueueControlRequestCanceledContextReturnsWhenQueueFull(t *testing.T) { - controlRequests := make(chan *transportpb.ControlRequest, 1) - controlRequests <- &transportpb.ControlRequest{CustomAction: "existing"} - req := &transportpb.ControlRequest{CustomAction: "test"} - ctx, cancel := context.WithCancel(context.Background()) - cancel() - - done := make(chan struct{}, 1) - go func() { - queueControlRequest(ctx, controlRequests, req) - close(done) - }() - - select { - case <-done: - case <-time.After(100 * time.Millisecond): - t.Fatal("queueControlRequest blocked with canceled context and full queue") - } - if got := len(controlRequests); got != 1 { - t.Fatalf("expected queue length 1, got %d", got) - } -} - -func TestQueueControlRequestFullQueueReturnsWithoutBlocking(t *testing.T) { - controlRequests := make(chan *transportpb.ControlRequest, 1) - first := &transportpb.ControlRequest{CustomAction: "first"} - second := &transportpb.ControlRequest{CustomAction: "second"} - controlRequests <- first - - done := make(chan struct{}) - go func() { - queueControlRequest(context.Background(), controlRequests, second) - close(done) - }() - - select { - case <-done: - case <-time.After(100 * time.Millisecond): - t.Fatal("queueControlRequest blocked on a full queue with live context") - } - - got := <-controlRequests - if got.GetCustomAction() != "first" { - t.Fatalf("expected existing request to remain queued, got %q", got.GetCustomAction()) - } -} - -func TestQueueControlRequestCanceledContextStillWinsWhenQueueFull(t *testing.T) { - controlRequests := make(chan *transportpb.ControlRequest, 1) - controlRequests <- &transportpb.ControlRequest{CustomAction: "existing"} - req := &transportpb.ControlRequest{CustomAction: "blocked"} - ctx, cancel := context.WithCancel(context.Background()) - cancel() - - done := make(chan struct{}) - go func() { - queueControlRequest(ctx, controlRequests, req) - close(done) - }() - - select { - case <-done: - case <-time.After(100 * time.Millisecond): - t.Fatal("queueControlRequest blocked with canceled context and full queue") - } - - got := <-controlRequests - if got.GetCustomAction() != "existing" { - t.Fatalf("expected existing request to remain queued, got %q", got.GetCustomAction()) - } -} - -func TestQueueDownstreamDeliveryReceiptSkipsNonSequencedRequest(t *testing.T) { - controlRequests := make(chan *transportpb.ControlRequest, 1) - opts := streamRuntimeOptions{controlRequests: controlRequests} - req := &transportpb.PublishRequest{ - Envelope: &transportpb.StreamEnvelope{ - StreamId: "step", - Partition: "p0", - Sequence: 0, - }, - } - - queueDownstreamDeliveryReceipt(context.Background(), opts, req) - - select { - case got := <-controlRequests: - t.Fatalf("expected no receipt for non-sequenced request, got %+v", got) - default: - } -} - -func TestQueueDownstreamDeliveryReceiptSkipsRequestWithoutStreamID(t *testing.T) { - controlRequests := make(chan *transportpb.ControlRequest, 1) - opts := streamRuntimeOptions{controlRequests: controlRequests} - req := &transportpb.PublishRequest{ - Envelope: &transportpb.StreamEnvelope{ - Sequence: 9, - Partition: "p0", - }, - } - - queueDownstreamDeliveryReceipt(context.Background(), opts, req) - - select { - case got := <-controlRequests: - t.Fatalf("expected no receipt for request without stream id, got %+v", got) - default: - } -} - -func TestEnqueueStreamMessageTimeoutDropsWhenChannelFull(t *testing.T) { - dst := make(chan engram.InboundMessage, 1) - dst <- engram.NewInboundMessage(engram.StreamMessage{Kind: "existing"}) - msg := engram.NewInboundMessage(engram.StreamMessage{Kind: "new"}) - - start := time.Now() - delivered, err := enqueueStreamMessage(context.Background(), dst, msg, 10*time.Millisecond) - if err != nil { - t.Fatalf("expected nil error when dropping on timeout, got %v", err) - } - if delivered { - t.Fatal("expected enqueue timeout drop to report non-delivery") - } - if elapsed := time.Since(start); elapsed > 100*time.Millisecond { - t.Fatalf("enqueueStreamMessage took too long to drop message: %s", elapsed) - } - if got := len(dst); got != 1 { - t.Fatalf("expected full queue to remain unchanged after drop, got len=%d", got) - } -} - -func TestEnqueueStreamMessageTimeoutRespectsCanceledContext(t *testing.T) { - dst := make(chan engram.InboundMessage, 1) - dst <- engram.NewInboundMessage(engram.StreamMessage{Kind: "occupied"}) - ctx, cancel := context.WithCancel(context.Background()) - cancel() - - delivered, err := enqueueStreamMessage(ctx, dst, engram.NewInboundMessage(engram.StreamMessage{Kind: "new"}), 50*time.Millisecond) //nolint:lll - if !errors.Is(err, context.Canceled) { - t.Fatalf("expected context canceled, got %v", err) - } - if delivered { - t.Fatal("expected canceled enqueue to report non-delivery") - } - if got := len(dst); got != 1 { - t.Fatalf("expected canceled enqueue to preserve existing queue item, got len=%d", got) - } -} - -func TestLossyEnqueueKeepsPendingPacketWithoutEmittingReceipt(t *testing.T) { - deduper := newPacketDeduper(defaultPacketDedupeEntries) - deduper.StartSession() - req := &transportpb.PublishRequest{ - Envelope: &transportpb.StreamEnvelope{ - StreamId: "downstream-step", - Sequence: 7, - Partition: "p0", - }, - } - status, key, generation := deduper.Begin(req) //nolint:revive - if status != packetNew { - t.Fatalf("expected packetNew, got %v", status) - } - - receipts := make(chan *transportpb.ControlRequest, 1) - inbound := attachDownstreamProcessingReceipt( - context.Background(), - streamRuntimeOptions{controlRequests: receipts}, - engram.NewInboundMessage(engram.StreamMessage{Envelope: req.GetEnvelope()}), - req, - deduper, - key, - generation, - ) - - dst := make(chan engram.InboundMessage, 1) - dst <- engram.NewInboundMessage(engram.StreamMessage{Kind: "occupied"}) - delivered, err := enqueueStreamMessage(context.Background(), dst, inbound, 10*time.Millisecond) - if err != nil { - t.Fatalf("expected nil error for lossy drop, got %v", err) - } - if delivered { - t.Fatal("expected lossy drop to report non-delivery") - } - select { - case receipt := <-receipts: - t.Fatalf("expected no receipt emission on dropped message, got %+v", receipt) - default: - } - - statusAfterDrop, _, _ := deduper.Begin(req) - if statusAfterDrop != packetDuplicatePending { - t.Fatalf("expected dropped packet to remain pending at helper level, got %v", statusAfterDrop) - } -} - -func TestPublishRequestToStreamMessageBinaryPassthroughMirrorsSinglePayloadCopy(t *testing.T) { - src := []byte("binary") - req := &transportpb.PublishRequest{ - Frame: &transportpb.PublishRequest_Binary{ - Binary: &transportpb.BinaryFrame{ - Payload: src, - MimeType: "application/octet-stream", - TimestampMs: 11, - }, - }, - } - - msg, err := publishRequestToStreamMessage(req) - if err != nil { - t.Fatalf("publishRequestToStreamMessage() failed: %v", err) - } - if msg.Binary == nil { - t.Fatal("expected binary frame in decoded stream message") - } - if string(msg.Payload) != "binary" { //nolint:goconst - t.Fatalf("unexpected payload: %q", string(msg.Payload)) - } - if string(msg.Binary.Payload) != "binary" { - t.Fatalf("unexpected binary payload: %q", string(msg.Binary.Payload)) - } - - // Ensure decoded payload remains isolated from source request mutation. - src[0] = 'X' - if string(msg.Payload) != "binary" { - t.Fatalf("decoded payload should not alias source request payload, got %q", string(msg.Payload)) - } - - // Binary passthrough keeps payload and binary payload mirrored. - msg.Payload[0] = 'z' - if string(msg.Binary.Payload) != "zinary" { - t.Fatalf("expected mirrored payload/binary payload, got %q", string(msg.Binary.Payload)) - } -} - -func TestNewPacketDeduperDefaultsMaxEntriesWhenNonPositive(t *testing.T) { - deduper := newPacketDeduper(0) - if deduper == nil { - t.Fatal("expected non-nil deduper") - } - if deduper.maxEntries != defaultPacketDedupeEntries { - t.Fatalf("expected default max entries %d, got %d", defaultPacketDedupeEntries, deduper.maxEntries) - } - if deduper.pending == nil || deduper.completed == nil { - t.Fatal("expected pending/completed maps to be initialized") - } -} - -func TestPacketDeduperBeginRepeatedKeepsPendingStateBounded(t *testing.T) { - deduper := newPacketDeduper(defaultPacketDedupeEntries) - deduper.StartSession() - req := &transportpb.PublishRequest{ - Envelope: &transportpb.StreamEnvelope{ - StreamId: "step-a", - Sequence: 11, - Partition: "p0", - }, - } - - for i := range 25 { - status, _, _ := deduper.Begin(req) //nolint:revive - if i == 0 && status != packetNew { - t.Fatalf("first Begin expected packetNew, got %v", status) - } - if i > 0 && status != packetDuplicatePending { - t.Fatalf("repeated Begin expected packetDuplicatePending, got %v", status) - } - } - - if got := len(deduper.pending); got != 1 { - t.Fatalf("expected pending state to stay bounded at 1 for repeated Begin, got %d", got) - } -} - -func TestPacketDeduperBeginReturnsOverflowWhenPendingCapacityExceeded(t *testing.T) { - deduper := newPacketDeduper(2) - deduper.StartSession() - - reqs := []*transportpb.PublishRequest{ - {Envelope: &transportpb.StreamEnvelope{StreamId: "step-a", Sequence: 1, Partition: "p0"}}, - {Envelope: &transportpb.StreamEnvelope{StreamId: "step-a", Sequence: 2, Partition: "p0"}}, - {Envelope: &transportpb.StreamEnvelope{StreamId: "step-a", Sequence: 3, Partition: "p0"}}, - } - - for i, req := range reqs[:2] { - status, _, _ := deduper.Begin(req) //nolint:revive - if status != packetNew { - t.Fatalf("begin %d expected packetNew, got %v", i, status) - } - } - - status, _, _ := deduper.Begin(reqs[2]) //nolint:revive - if status != packetPendingOverflow { - t.Fatalf("expected packetPendingOverflow, got %v", status) - } - if got := len(deduper.pending); got != 2 { - t.Fatalf("expected pending entries to remain capped at 2, got %d", got) - } - - status, _, _ = deduper.Begin(reqs[0]) - if status != packetDuplicatePending { - t.Fatalf("expected oldest in-flight packet to remain pending, got %v", status) - } -} - -func TestPacketDeduperStartSessionClearsPendingState(t *testing.T) { - deduper := newPacketDeduper(defaultPacketDedupeEntries) - deduper.StartSession() - - req1 := &transportpb.PublishRequest{ - Envelope: &transportpb.StreamEnvelope{ - StreamId: "step-a", - Sequence: 1, - Partition: "p0", - }, - } - req2 := &transportpb.PublishRequest{ - Envelope: &transportpb.StreamEnvelope{ - StreamId: "step-a", - Sequence: 2, - Partition: "p0", - }, - } - - if status, _, _ := deduper.Begin(req1); status != packetNew { //nolint:revive - t.Fatalf("expected req1 to be new, got %v", status) - } - if status, _, _ := deduper.Begin(req2); status != packetNew { //nolint:revive - t.Fatalf("expected req2 to be new, got %v", status) - } - if got := len(deduper.pending); got != 2 { - t.Fatalf("expected 2 pending entries before session restart, got %d", got) - } - - deduper.StartSession() - if got := len(deduper.pending); got != 0 { - t.Fatalf("expected pending state to be cleared on StartSession, got %d", got) - } - if status, _, _ := deduper.Begin(req1); status != packetNew { //nolint:revive - t.Fatalf("expected req1 to be new after pending cleanup, got %v", status) - } -} - -func TestPacketDeduperReleaseStaleGenerationDoesNotClearCurrentPending(t *testing.T) { - deduper := newPacketDeduper(defaultPacketDedupeEntries) - deduper.StartSession() - - req := &transportpb.PublishRequest{ - Envelope: &transportpb.StreamEnvelope{ - StreamId: "step-a", - Sequence: 3, - Partition: "p0", - }, - } - - _, key, staleGeneration := deduper.Begin(req) - deduper.StartSession() // increments generation and clears previous pending state - status, _, currentGeneration := deduper.Begin(req) //nolint:revive - if status != packetNew { - t.Fatalf("expected packetNew in new generation, got %v", status) - } - if currentGeneration == staleGeneration { - t.Fatal("expected generation to advance after StartSession") - } - - deduper.Release(key, staleGeneration) - statusAfterStaleRelease, _, _ := deduper.Begin(req) - if statusAfterStaleRelease != packetDuplicatePending { - t.Fatalf("expected current pending entry to survive stale-generation release, got %v", statusAfterStaleRelease) - } -} - -type noopStreamingEngram struct{} - -func (noopStreamingEngram) Init(context.Context, struct{}, *engram.Secrets) error { return nil } - -func (noopStreamingEngram) Stream(context.Context, <-chan engram.InboundMessage, chan<- engram.StreamMessage) error { - return nil -} - -type blockingStreamingEngram struct{} - -func (blockingStreamingEngram) Init(context.Context, struct{}, *engram.Secrets) error { return nil } - -func (blockingStreamingEngram) Stream(ctx context.Context, _ <-chan engram.InboundMessage, _ chan<- engram.StreamMessage) error { //nolint:lll - <-ctx.Done() - return ctx.Err() -} - -type singleMessageStreamingEngram struct { - msg engram.StreamMessage -} - -func (singleMessageStreamingEngram) Init(context.Context, struct{}, *engram.Secrets) error { - return nil -} - -func (e singleMessageStreamingEngram) Stream(ctx context.Context, - _ <-chan engram.InboundMessage, out chan<- engram.StreamMessage) error { - select { - case out <- e.msg: - case <-ctx.Done(): - return ctx.Err() - } - <-ctx.Done() - return ctx.Err() -} - -func TestCallSendWithTimeoutTracksTimedOutWorkerUntilReleased(t *testing.T) { - tracker := newTimedSendTracker() - blocked := make(chan struct{}) - release := make(chan struct{}) - - err := callSendWithTimeout(context.Background(), 10*time.Millisecond, nil, "data send", tracker, func() error { - close(blocked) - <-release - return nil - }) - if err == nil { - t.Fatal("expected timeout error") - } - <-blocked - - if tracker.Wait(10 * time.Millisecond) { - t.Fatal("expected timed send worker to remain tracked while blocked") - } - - close(release) - if !tracker.Wait(100 * time.Millisecond) { - t.Fatal("expected timed send worker to finish after release") - } -} - -func TestTimedSendTrackerWaitDoesNotAllocatePerWaiterGoroutine(t *testing.T) { - tracker := newTimedSendTracker() - release := make(chan struct{}) - started := make(chan struct{}) - - if err := startTimedSend(tracker, func() { - close(started) - <-release - }); err != nil { - t.Fatalf("startTimedSend: %v", err) - } - <-started - - // Establish a conservative baseline before issuing repeated timed waits. - runtime.GC() - time.Sleep(10 * time.Millisecond) - baseline := runtime.NumGoroutine() - - const waitCalls = 200 - for i := range waitCalls { - if tracker.Wait(1 * time.Millisecond) { - t.Fatalf("expected Wait call %d to time out while send remains blocked", i) - } - } - - afterWaits := runtime.NumGoroutine() - delta := afterWaits - baseline - // Guard against per-Wait goroutine allocation: repeated timed waits should not - // leave one blocked waiter goroutine per call while work is still pending. - if delta > 25 { - t.Fatalf("expected bounded waiter goroutines after repeated Wait calls, baseline=%d after=%d delta=%d", baseline, afterWaits, delta) //nolint:lll - } - - close(release) - if !tracker.Wait(100 * time.Millisecond) { - t.Fatal("expected tracker to drain after releasing blocked send") - } -} - -func TestWaitForTimedSendCleanupReturnsTimeoutError(t *testing.T) { - tracker := newTimedSendTracker() - release := make(chan struct{}) - started := make(chan struct{}) - - if err := startTimedSend(tracker, func() { - close(started) - <-release - }); err != nil { - t.Fatalf("startTimedSend: %v", err) - } - <-started - - err := waitForTimedSendCleanup(nil, tracker, 10*time.Millisecond) - if !errors.Is(err, errTimedSendCleanupTimeout) { - t.Fatalf("expected timed send cleanup timeout, got %v", err) - } - - close(release) - if !tracker.Wait(100 * time.Millisecond) { - t.Fatal("expected tracker to drain after releasing blocked send") - } -} - -func TestCallSendWithTimeoutHonorsCanceledContextWhenTimeoutDisabled(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - cancel() - - called := false - err := callSendWithTimeout(ctx, 0, nil, "data send", nil, func() error { - called = true - return nil - }) - if !errors.Is(err, context.Canceled) { - t.Fatalf("expected context canceled, got %v", err) - } - if called { - t.Fatal("expected fn not to be called when context is already canceled") - } -} - -func TestCallSendWithTimeoutTimeoutDisabledReturnsOnCancellationAndDrainsTrackedWorker(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - tracker := newTimedSendTracker() - started := make(chan struct{}) - release := make(chan struct{}) - - errCh := make(chan error, 1) - go func() { - errCh <- callSendWithTimeout(ctx, 0, nil, "data send", tracker, func() error { - close(started) - <-release - return nil - }) - }() - - select { - case <-started: - case <-time.After(100 * time.Millisecond): - t.Fatal("expected timeout-disabled send worker to start") - } - - if tracker.Wait(10 * time.Millisecond) { - t.Fatal("expected tracked timeout-disabled send worker to remain pending while blocked") - } - - cancel() - - select { - case err := <-errCh: - if !errors.Is(err, context.Canceled) { - t.Fatalf("expected context canceled, got %v", err) - } - case <-time.After(100 * time.Millisecond): - t.Fatal("expected callSendWithTimeout to return on context cancellation") - } - - if tracker.Wait(10 * time.Millisecond) { - t.Fatal("expected tracked worker to remain pending until send is released") - } - - close(release) - - if !tracker.Wait(100 * time.Millisecond) { - t.Fatal("expected tracked timeout-disabled send worker to drain after release") - } -} - -func TestCallSendWithTimeoutHonorsCanceledContextWhenTimeoutEnabled(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - cancel() - - called := false - err := callSendWithTimeout(ctx, 100*time.Millisecond, nil, "data send", nil, func() error { - called = true - return nil - }) - if !errors.Is(err, context.Canceled) { - t.Fatalf("expected context canceled, got %v", err) - } - if called { - t.Fatal("expected fn not to be called when context is already canceled") - } -} - -func TestRunTransportConnectorStreamDoesNotRetryNonRetriableErrors(t *testing.T) { - attempts := 0 - sleeps := 0 - sessionErr := status.Error(codes.InvalidArgument, "bad connector config") - - err := runTransportConnectorStreamWithDeps( - context.Background(), - "connector:9000", - bindingReference{}, - noopStreamingEngram{}, - newEnvResolver(map[string]string{contracts.GRPCReconnectMaxRetriesEnv: "3"}), - func(context.Context, string, bindingReference, engram.StreamingEngram[struct{}], envResolver) error { - attempts++ - return sessionErr - }, - func(context.Context, time.Duration) error { - sleeps++ - return nil - }, - func(wait time.Duration, max time.Duration) time.Duration { - return wait - }, - ) - if !errors.Is(err, sessionErr) { - t.Fatalf("expected original non-retriable error, got %v", err) - } - if attempts != 1 { - t.Fatalf("expected one attempt, got %d", attempts) - } - if sleeps != 0 { - t.Fatalf("expected no reconnect sleeps, got %d", sleeps) - } -} - -func TestIsRetriableTransportSessionError_TimedSendCleanupTimeoutNotRetriable(t *testing.T) { - cleanupTimeoutErr := fmt.Errorf("%w: timeout=%s", errTimedSendCleanupTimeout, time.Second) - if isRetriableTransportSessionError(cleanupTimeoutErr) { - t.Fatalf("expected timed send cleanup timeout to be non-retriable, got retriable") - } -} - -func TestRunTransportConnectorStreamDoesNotRetryTimedSendCleanupTimeout(t *testing.T) { - attempts := 0 - sleeps := 0 - sessionErr := errors.Join(errors.New("temporary transport failure"), errTimedSendCleanupTimeout) - - err := runTransportConnectorStreamWithDeps( - context.Background(), - "connector:9000", - bindingReference{}, - noopStreamingEngram{}, - newEnvResolver(map[string]string{contracts.GRPCReconnectMaxRetriesEnv: "3"}), - func(context.Context, string, bindingReference, engram.StreamingEngram[struct{}], envResolver) error { - attempts++ - return sessionErr - }, - func(context.Context, time.Duration) error { - sleeps++ - return nil - }, - func(wait time.Duration, max time.Duration) time.Duration { - return wait - }, - ) - if !errors.Is(err, errTimedSendCleanupTimeout) { - t.Fatalf("expected timed send cleanup timeout, got %v", err) - } - if attempts != 1 { - t.Fatalf("expected one attempt for cleanup-timeout error, got %d", attempts) - } - if sleeps != 0 { - t.Fatalf("expected no reconnect sleeps on cleanup-timeout error, got %d", sleeps) - } -} - -func TestRunTransportConnectorStreamDoesNotRetryControlStartupHandshakeTimeout(t *testing.T) { - attempts := 0 - sleeps := 0 - sessionErr := fmt.Errorf("%w: timeout=%s", errControlStartupHandshakeTimeout, time.Second) - - err := runTransportConnectorStreamWithDeps( - context.Background(), - "connector:9000", - bindingReference{}, - noopStreamingEngram{}, - newEnvResolver(map[string]string{contracts.GRPCReconnectMaxRetriesEnv: "3"}), - func(context.Context, string, bindingReference, engram.StreamingEngram[struct{}], envResolver) error { - attempts++ - return sessionErr - }, - func(context.Context, time.Duration) error { - sleeps++ - return nil - }, - func(wait time.Duration, max time.Duration) time.Duration { - return wait - }, - ) - if !errors.Is(err, errControlStartupHandshakeTimeout) { - t.Fatalf("expected control startup handshake timeout, got %v", err) - } - if attempts != 1 { - t.Fatalf("expected one attempt for startup-handshake error, got %d", attempts) - } - if sleeps != 0 { - t.Fatalf("expected no reconnect sleeps on startup-handshake error, got %d", sleeps) - } -} - -func TestRunTransportConnectorStreamDoesNotRetryBlockedDataSendCleanupTimeout(t *testing.T) { - prevDial := connectorDial - t.Cleanup(func() { - connectorDial = prevDial - }) - - sendStarted := make(chan struct{}) - sendRelease := make(chan struct{}) - client := &fakeSessionTransportClient{ - dataStream: &blockingDataSessionStream{ - sendStarted: sendStarted, - sendBlock: sendRelease, - }, - controlStream: &blockingControlSessionStream{}, - } - attempts := 0 - connectorDial = func(context.Context, string, envResolver, ...grpc.DialOption) (*TransportConnectorClient, error) { - attempts++ - return &TransportConnectorClient{client: client}, nil - } - - sleeps := 0 - errCh := make(chan error, 1) - go func() { - errCh <- runTransportConnectorStreamWithDeps( - context.Background(), - "connector:9000", - bindingReference{}, - singleMessageStreamingEngram{msg: engram.StreamMessage{Payload: []byte(`{"ok":true}`)}}, - newEnvResolver(map[string]string{ - contracts.GRPCMessageTimeoutEnv: "15ms", - contracts.GRPCReconnectMaxRetriesEnv: "3", - }), - nil, - func(context.Context, time.Duration) error { - sleeps++ - return nil - }, - func(wait time.Duration, max time.Duration) time.Duration { - return wait - }, - ) - }() - - select { - case <-sendStarted: - case <-time.After(200 * time.Millisecond): - t.Fatal("expected data send loop to enter blocked Send path") - } - - var err error - select { - case err = <-errCh: - case <-time.After(5 * time.Second): - t.Fatal("expected blocked send cleanup timeout to terminate outer reconnect loop") - } - close(sendRelease) - - if !errors.Is(err, errTimedSendCleanupTimeout) { - t.Fatalf("expected timed send cleanup timeout, got %v", err) - } - if attempts != 1 { - t.Fatalf("expected one connector dial attempt, got %d", attempts) - } - if sleeps != 0 { - t.Fatalf("expected no reconnect sleeps after blocked send cleanup timeout, got %d", sleeps) - } -} - -func TestRunTransportConnectorStreamRetriesTransientErrorsWithBackoff(t *testing.T) { - attempts := 0 - var sleeps []time.Duration - - err := runTransportConnectorStreamWithDeps( - context.Background(), - "connector:9000", - bindingReference{}, - noopStreamingEngram{}, - newEnvResolver(map[string]string{ - contracts.GRPCReconnectBaseBackoffEnv: "100ms", - contracts.GRPCReconnectMaxBackoffEnv: "200ms", - contracts.GRPCReconnectMaxRetriesEnv: "3", - }), - func(context.Context, string, bindingReference, engram.StreamingEngram[struct{}], envResolver) error { - attempts++ - if attempts < 3 { - return errors.New("temporary transport failure") - } - return nil - }, - func(_ context.Context, d time.Duration) error { - sleeps = append(sleeps, d) - return nil - }, - func(wait time.Duration, max time.Duration) time.Duration { - return wait - }, - ) - if err != nil { - t.Fatalf("expected retries to recover, got %v", err) - } - if attempts != 3 { - t.Fatalf("expected 3 attempts, got %d", attempts) - } - if len(sleeps) != 2 { - t.Fatalf("expected 2 reconnect sleeps, got %d", len(sleeps)) - } - if sleeps[0] != 100*time.Millisecond || sleeps[1] != 200*time.Millisecond { - t.Fatalf("unexpected reconnect sleeps: %#v", sleeps) - } -} - -func TestRunTransportConnectorStreamReusesPacketDeduperAcrossRetries(t *testing.T) { - attempts := 0 - var dedupers []*packetDeduper - - err := runTransportConnectorStreamWithDeps( - context.Background(), - "connector:9000", - bindingReference{}, - noopStreamingEngram{}, - newEnvResolver(map[string]string{contracts.GRPCReconnectMaxRetriesEnv: "2"}), - func(ctx context.Context, _ string, _ bindingReference, _ engram.StreamingEngram[struct{}], _ envResolver) error { - attempts++ - deduper := packetDeduperFromContext(ctx) - if deduper == nil { - t.Fatal("expected packet deduper in transport session context") - } - dedupers = append(dedupers, deduper) - if attempts < 2 { - return errors.New("temporary transport failure") - } - return nil - }, - func(context.Context, time.Duration) error { - return nil - }, - func(wait time.Duration, max time.Duration) time.Duration { - return wait - }, - ) - if err != nil { - t.Fatalf("expected retries to recover, got %v", err) - } - if attempts != 2 { - t.Fatalf("expected 2 attempts, got %d", attempts) - } - if len(dedupers) != 2 { - t.Fatalf("expected 2 deduper captures, got %d", len(dedupers)) - } - if dedupers[0] != dedupers[1] { - t.Fatal("expected reconnect attempts to reuse the same packet deduper") - } -} - -func TestRunTransportConnectorStreamResetsControlRequestQueueAcrossRetries(t *testing.T) { - attempts := 0 - var queues []chan *transportpb.ControlRequest - - err := runTransportConnectorStreamWithDeps( - context.Background(), - "connector:9000", - bindingReference{}, - noopStreamingEngram{}, - newEnvResolver(map[string]string{contracts.GRPCReconnectMaxRetriesEnv: "2"}), - func(ctx context.Context, _ string, _ bindingReference, _ engram.StreamingEngram[struct{}], _ envResolver) error { - attempts++ - queue := controlRequestQueueFromContext(ctx) - if queue == nil { - t.Fatal("expected control request queue in transport session context") - } - queues = append(queues, queue) - if attempts == 1 { - queue <- &transportpb.ControlRequest{CustomAction: downstreamDeliveryReceiptType} - return errors.New("temporary transport failure") - } - select { - case req := <-queue: - t.Fatalf("unexpected stale control request carried across retry: %+v", req) - default: - } - return nil - }, - func(context.Context, time.Duration) error { - return nil - }, - func(wait time.Duration, max time.Duration) time.Duration { - return wait - }, - ) - if err != nil { - t.Fatalf("expected retries to recover, got %v", err) - } - if attempts != 2 { - t.Fatalf("expected 2 attempts, got %d", attempts) - } - if len(queues) != 2 { - t.Fatalf("expected 2 queue captures, got %d", len(queues)) - } - if queues[0] == queues[1] { - t.Fatal("expected reconnect attempts to use a fresh control request queue") - } -} - -func TestRunTransportSessionReturnsRetriableEOFWhenDataRecvClosesUnexpectedly(t *testing.T) { - prevDial := connectorDial - t.Cleanup(func() { - connectorDial = prevDial - }) - - client := &fakeSessionTransportClient{ - dataStream: &fakeDataSendLoopStream{}, - controlStream: &blockingControlSessionStream{}, - } - connectorDial = func(context.Context, string, envResolver, ...grpc.DialOption) (*TransportConnectorClient, error) { - return &TransportConnectorClient{client: client}, nil - } - - errCh := make(chan error, 1) - go func() { - errCh <- runTransportSession( - context.Background(), - "connector:9000", - bindingReference{}, - blockingStreamingEngram{}, - newEnvResolver(nil), - ) - }() - - select { - case err := <-errCh: - if !errors.Is(err, io.EOF) { - t.Fatalf("expected data recv EOF session error, got %v", err) - } - if !isRetriableTransportSessionError(err) { - t.Fatalf("expected unexpected data recv EOF to be retriable, got %v", err) - } - case <-time.After(500 * time.Millisecond): - t.Fatal("expected session to terminate when data recv stream closes unexpectedly") - } -} - -func TestRunTransportSessionReturnsNilWhenEngramStreamEndsGracefully(t *testing.T) { - prevDial := connectorDial - t.Cleanup(func() { - connectorDial = prevDial - }) - - client := &fakeSessionTransportClient{ - dataStream: &blockingDataSessionStream{}, - controlStream: &blockingControlSessionStream{}, - } - connectorDial = func(context.Context, string, envResolver, ...grpc.DialOption) (*TransportConnectorClient, error) { - return &TransportConnectorClient{client: client}, nil - } - - errCh := make(chan error, 1) - go func() { - errCh <- runTransportSession( - context.Background(), - "connector:9000", - bindingReference{}, - noopStreamingEngram{}, - newEnvResolver(nil), - ) - }() - - select { - case err := <-errCh: - if err != nil { - t.Fatalf("expected graceful stream completion to end session cleanly, got %v", err) - } - case <-time.After(500 * time.Millisecond): - t.Fatal("expected session to terminate when engram stream exits cleanly") - } -} - -func TestJitterReconnectDelayClampsToMax(t *testing.T) { - prev := reconnectJitterFloat64 - reconnectJitterFloat64 = func() float64 { return 1.0 } - t.Cleanup(func() { - reconnectJitterFloat64 = prev - }) - - delay := jitterReconnectDelay(100*time.Millisecond, 110*time.Millisecond) - if delay != 110*time.Millisecond { - t.Fatalf("expected jittered delay to clamp to max, got %s", delay) - } -} - -type fakeDataSendLoopStream struct { - closeSendErr error - closeSendCalls int -} - -func (f *fakeDataSendLoopStream) Send(*transportpb.DataRequest) error { return nil } -func (f *fakeDataSendLoopStream) Recv() (*transportpb.DataResponse, error) { - return nil, io.EOF -} -func (f *fakeDataSendLoopStream) CloseSend() error { - f.closeSendCalls++ - return f.closeSendErr -} -func (f *fakeDataSendLoopStream) Header() (metadata.MD, error) { return metadata.MD{}, nil } -func (f *fakeDataSendLoopStream) Trailer() metadata.MD { return metadata.MD{} } -func (f *fakeDataSendLoopStream) Context() context.Context { return context.Background() } -func (f *fakeDataSendLoopStream) SendMsg(any) error { return nil } -func (f *fakeDataSendLoopStream) RecvMsg(any) error { return nil } - -type fakeControlLoopStream struct { - closeSendErr error - closeSendCalls int - sendErr error - recvErr error - sendStarted chan struct{} - sendBlock <-chan struct{} - recvWait <-chan struct{} - sendOnce sync.Once -} - -func (f *fakeControlLoopStream) Send(*transportpb.ControlRequest) error { - if f.sendStarted != nil { - f.sendOnce.Do(func() { close(f.sendStarted) }) - } - if f.sendBlock != nil { - <-f.sendBlock - } - return f.sendErr -} -func (f *fakeControlLoopStream) Recv() (*transportpb.ControlResponse, error) { - if f.recvWait != nil { - <-f.recvWait - } - if f.recvErr != nil { - return nil, f.recvErr - } - return nil, io.EOF -} -func (f *fakeControlLoopStream) CloseSend() error { - f.closeSendCalls++ - return f.closeSendErr -} -func (f *fakeControlLoopStream) Header() (metadata.MD, error) { return metadata.MD{}, nil } -func (f *fakeControlLoopStream) Trailer() metadata.MD { return metadata.MD{} } -func (f *fakeControlLoopStream) Context() context.Context { return context.Background() } -func (f *fakeControlLoopStream) SendMsg(any) error { return nil } -func (f *fakeControlLoopStream) RecvMsg(any) error { return nil } - -type fakeControlLoopClient struct { - stream transportpb.TransportConnectorService_ControlClient -} - -func (f *fakeControlLoopClient) Data(context.Context, ...grpc.CallOption) (transportpb.TransportConnectorService_DataClient, error) { //nolint:lll - return nil, errors.New("unexpected Data call in control loop test") -} - -func (f *fakeControlLoopClient) Control(context.Context, ...grpc.CallOption) (transportpb.TransportConnectorService_ControlClient, error) { //nolint:lll - return f.stream, nil -} - -func (f *fakeControlLoopClient) HubPush(context.Context, ...grpc.CallOption) (transportpb.TransportConnectorService_HubPushClient, error) { //nolint:lll - return nil, errors.New("unexpected HubPush call in control loop test") -} - -type fakeSessionTransportClient struct { - dataStream transportpb.TransportConnectorService_DataClient - controlStream transportpb.TransportConnectorService_ControlClient -} - -func (f *fakeSessionTransportClient) Data(ctx context.Context, _ ...grpc.CallOption) (transportpb.TransportConnectorService_DataClient, error) { //nolint:lll - if f.dataStream == nil { - return nil, errors.New("missing data stream") - } - if s, ok := f.dataStream.(*blockingDataSessionStream); ok { - s.ctx = ctx - } - return f.dataStream, nil -} - -func (f *fakeSessionTransportClient) Control( - ctx context.Context, - _ ...grpc.CallOption, -) (transportpb.TransportConnectorService_ControlClient, error) { - if f.controlStream == nil { - return nil, errors.New("missing control stream") - } - if s, ok := f.controlStream.(*blockingControlSessionStream); ok { - s.ctx = ctx - } - return f.controlStream, nil -} - -func (f *fakeSessionTransportClient) HubPush(context.Context, ...grpc.CallOption) (transportpb.TransportConnectorService_HubPushClient, error) { //nolint:lll - return nil, errors.New("unexpected HubPush call in session test") -} - -type blockingControlSessionStream struct { - ctx context.Context - readyOnce sync.Once -} - -func (s *blockingControlSessionStream) Send(*transportpb.ControlRequest) error { return nil } -func (s *blockingControlSessionStream) Recv() (*transportpb.ControlResponse, error) { - var ready *transportpb.ControlResponse - s.readyOnce.Do(func() { - ready = &transportpb.ControlResponse{ - Action: transportpb.ControlAction_CONTROL_ACTION_CONNECTOR_READY, - Metadata: map[string]string{ - coretransport.StartupCapabilitiesMetadataKey: coretransport.StartupCapabilitiesNone, - }, - } - }) - if ready != nil { - return ready, nil - } - if s.ctx == nil { - return nil, io.EOF - } - <-s.ctx.Done() - return nil, s.ctx.Err() -} -func (s *blockingControlSessionStream) CloseSend() error { return nil } -func (s *blockingControlSessionStream) Header() (metadata.MD, error) { return metadata.MD{}, nil } -func (s *blockingControlSessionStream) Trailer() metadata.MD { return metadata.MD{} } -func (s *blockingControlSessionStream) Context() context.Context { - if s.ctx != nil { - return s.ctx - } - return context.Background() -} -func (s *blockingControlSessionStream) SendMsg(any) error { return nil } -func (s *blockingControlSessionStream) RecvMsg(any) error { return nil } - -type blockingDataSessionStream struct { - ctx context.Context - sendStarted chan struct{} - sendBlock <-chan struct{} - sendOnce sync.Once -} - -func (s *blockingDataSessionStream) Send(*transportpb.DataRequest) error { - if s.sendStarted != nil { - s.sendOnce.Do(func() { close(s.sendStarted) }) - } - if s.sendBlock != nil { - <-s.sendBlock - } - return nil -} -func (s *blockingDataSessionStream) Recv() (*transportpb.DataResponse, error) { - if s.ctx == nil { - return nil, io.EOF - } - <-s.ctx.Done() - return nil, s.ctx.Err() -} -func (s *blockingDataSessionStream) CloseSend() error { return nil } -func (s *blockingDataSessionStream) Header() (metadata.MD, error) { return metadata.MD{}, nil } -func (s *blockingDataSessionStream) Trailer() metadata.MD { return metadata.MD{} } -func (s *blockingDataSessionStream) Context() context.Context { - if s.ctx != nil { - return s.ctx - } - return context.Background() -} -func (s *blockingDataSessionStream) SendMsg(any) error { return nil } -func (s *blockingDataSessionStream) RecvMsg(any) error { return nil } - -func TestConnectorDataSendLoopCanceledExitAttemptsCloseSendButReturnsContextCanceled(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - cancel() - - stream := &fakeDataSendLoopStream{closeSendErr: errors.New("close send failed")} - err := connectorDataSendLoop(ctx, stream, make(chan engram.StreamMessage), streamRuntimeOptions{}) - - if !errors.Is(err, context.Canceled) { - t.Fatalf("expected context canceled on canceled teardown, got %v", err) - } - if stream.closeSendCalls != 1 { - t.Fatalf("expected CloseSend to be attempted once on canceled teardown, got %d", stream.closeSendCalls) - } - if strings.Contains(err.Error(), "close send failed") { - t.Fatalf("expected canceled error to remain primary, got %v", err) - } -} - -func TestConnectorControlLoopCanceledExitAttemptsCloseSendButReturnsContextCanceled(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - cancel() - - stream := &fakeControlLoopStream{closeSendErr: errors.New("close send failed")} - client := &fakeControlLoopClient{stream: stream} - err := connectorControlLoop( - ctx, - client, - bindingReference{}, - defaultControlDirectiveHandler{}, - streamRuntimeOptions{ - controlRequests: make(chan *transportpb.ControlRequest), - }, - ) - - if !errors.Is(err, context.Canceled) { - t.Fatalf("expected context canceled on canceled teardown, got %v", err) - } - if stream.closeSendCalls != 1 { - t.Fatalf("expected CloseSend to be attempted once on canceled teardown, got %d", stream.closeSendCalls) - } - if strings.Contains(err.Error(), "close send failed") { - t.Fatalf("expected canceled error to remain primary, got %v", err) - } -} - -func TestConnectorControlLoopEOFReturnsRetriableError(t *testing.T) { - stream := &fakeControlLoopStream{} - client := &fakeControlLoopClient{stream: stream} - err := connectorControlLoop( - context.Background(), - client, - bindingReference{}, - defaultControlDirectiveHandler{}, - streamRuntimeOptions{ - controlRequests: make(chan *transportpb.ControlRequest), - }, - ) - if !errors.Is(err, io.EOF) { - t.Fatalf("expected EOF-wrapped control loop error, got %v", err) - } - if !isRetriableTransportSessionError(err) { - t.Fatalf("expected control stream EOF to be retriable, got %v", err) - } -} - -func TestConnectorControlLoopInternalSendTimeoutIsTrackedAndDrainsAfterRelease(t *testing.T) { - sendTracker := newTimedSendTracker() - sendStarted := make(chan struct{}) - sendRelease := make(chan struct{}) - stream := &fakeControlLoopStream{ - sendStarted: sendStarted, - sendBlock: sendRelease, - recvWait: sendStarted, // keep recv side idle until send path is entered - } - client := &fakeControlLoopClient{stream: stream} - - ctx := t.Context() - - controlRequests := make(chan *transportpb.ControlRequest, 1) - controlRequests <- &transportpb.ControlRequest{CustomAction: "blocked-send"} - - errCh := make(chan error, 1) - go func() { - errCh <- connectorControlLoop( - ctx, - client, - bindingReference{}, - defaultControlDirectiveHandler{}, - streamRuntimeOptions{ - controlRequests: controlRequests, - messageTimeout: 15 * time.Millisecond, - controlHeartbeatInterval: time.Hour, - sendTracker: sendTracker, - }, - ) - }() - - select { - case <-sendStarted: - case <-time.After(200 * time.Millisecond): - t.Fatal("expected control loop to start blocked internal send") - } - - if err := waitForTimedSendCleanup(nil, sendTracker, 10*time.Millisecond); !errors.Is(err, errTimedSendCleanupTimeout) { - t.Fatalf("expected timed send cleanup timeout while control send is blocked, got %v", err) - } - - select { - case err := <-errCh: - if !strings.Contains(err.Error(), "control stream send failed") || !strings.Contains(err.Error(), "timed out") { - t.Fatalf("expected control send failure, got %v", err) - } - case <-time.After(500 * time.Millisecond): - t.Fatal("expected connectorControlLoop to return timeout while send worker remains blocked") - } - - close(sendRelease) - if err := waitForTimedSendCleanup(nil, sendTracker, 200*time.Millisecond); err != nil { - t.Fatalf("expected timed send tracker to drain after release, got %v", err) - } -} - -func TestProcessControlMessageSendTimeoutIsTrackedAndDrainsAfterRelease(t *testing.T) { - sendTracker := newTimedSendTracker() - sendStarted := make(chan struct{}) - sendRelease := make(chan struct{}) - stream := &fakeControlLoopStream{ - sendStarted: sendStarted, - sendBlock: sendRelease, - } - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - errCh := make(chan error, 1) - go func() { - errCh <- processControlMessage( - ctx, - stream, - defaultControlDirectiveHandler{}, - controlMessage{ - response: &transportpb.ControlResponse{ - Action: transportpb.ControlAction_CONTROL_ACTION_NOOP, - }, - }, - 15*time.Millisecond, - cancel, - sendTracker, - ) - }() - - select { - case <-sendStarted: - case <-time.After(200 * time.Millisecond): - t.Fatal("expected processControlMessage to start blocked response send") - } - - if err := waitForTimedSendCleanup(nil, sendTracker, 10*time.Millisecond); !errors.Is(err, errTimedSendCleanupTimeout) { - t.Fatalf("expected timed send cleanup timeout while control response send is blocked, got %v", err) - } - - select { - case err := <-errCh: - if !strings.Contains(err.Error(), "control stream send failed") || !strings.Contains(err.Error(), "timed out") { - t.Fatalf("expected timed-out control response send failure, got %v", err) - } - case <-time.After(500 * time.Millisecond): - t.Fatal("expected processControlMessage to return timeout while send worker remains blocked") - } - - close(sendRelease) - - if err := waitForTimedSendCleanup(nil, sendTracker, 200*time.Millisecond); err != nil { - t.Fatalf("expected timed send tracker to drain after release, got %v", err) - } -} - -func TestSendControlHeartbeatTimeoutIsTrackedAndDrainsAfterRelease(t *testing.T) { - sendTracker := newTimedSendTracker() - sendStarted := make(chan struct{}) - sendRelease := make(chan struct{}) - stream := &fakeControlLoopStream{ - sendStarted: sendStarted, - sendBlock: sendRelease, - } - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - errCh := make(chan error, 1) - go func() { - errCh <- sendControlHeartbeat(ctx, stream, 15*time.Millisecond, cancel, sendTracker) - }() - - select { - case <-sendStarted: - case <-time.After(200 * time.Millisecond): - t.Fatal("expected sendControlHeartbeat to start blocked heartbeat send") - } - - if err := waitForTimedSendCleanup(nil, sendTracker, 10*time.Millisecond); !errors.Is(err, errTimedSendCleanupTimeout) { - t.Fatalf("expected timed send cleanup timeout while control heartbeat is blocked, got %v", err) - } - - select { - case err := <-errCh: - if !strings.Contains(err.Error(), "control stream heartbeat failed") || !strings.Contains(err.Error(), "timed out") { - t.Fatalf("expected timed-out control heartbeat send failure, got %v", err) - } - case <-time.After(500 * time.Millisecond): - t.Fatal("expected sendControlHeartbeat to return timeout while send worker remains blocked") - } - - close(sendRelease) - - if err := waitForTimedSendCleanup(nil, sendTracker, 200*time.Millisecond); err != nil { - t.Fatalf("expected timed send tracker to drain after release, got %v", err) - } -} diff --git a/stream_trace.go b/stream_trace.go deleted file mode 100644 index 1520c3b..0000000 --- a/stream_trace.go +++ /dev/null @@ -1,30 +0,0 @@ -package sdk - -import ( - "context" - - "github.com/bubustack/bubu-sdk-go/engram" - "github.com/bubustack/bubu-sdk-go/pkg/observability" - "go.opentelemetry.io/otel/propagation" -) - -func injectTraceContext(ctx context.Context, msg *engram.StreamMessage) { - if msg == nil || !observability.TracePropagationEnabled() { - return - } - metadata := cloneStringMap(msg.Metadata) - if metadata == nil { - metadata = make(map[string]string, 2) - } - observability.Propagator().Inject(ctx, propagation.MapCarrier(metadata)) - msg.Metadata = metadata -} - -// ExtractTraceContext restores tracing context from a StreamMessage's metadata so Engrams can -// start child spans that are linked to upstream steps. -func ExtractTraceContext(ctx context.Context, msg *engram.StreamMessage) context.Context { - if msg == nil || len(msg.Metadata) == 0 || !observability.TracePropagationEnabled() { - return ctx - } - return observability.Propagator().Extract(ctx, propagation.MapCarrier(msg.Metadata)) -} diff --git a/stream_trace_test.go b/stream_trace_test.go deleted file mode 100644 index 3d2a65f..0000000 --- a/stream_trace_test.go +++ /dev/null @@ -1,43 +0,0 @@ -package sdk - -import ( - "context" - "fmt" - "testing" - - "github.com/bubustack/bubu-sdk-go/engram" - "github.com/bubustack/core/contracts" - "go.opentelemetry.io/otel/trace" -) - -func TestExtractTraceContext(t *testing.T) { - t.Setenv(contracts.TracePropagationEnv, "true") - traceIDHex := "0af7651916cd43dd8448eb211c80319c" - spanIDHex := "b9c7c989f97918e1" - msg := &engram.StreamMessage{ - Metadata: map[string]string{ - "traceparent": fmt.Sprintf("00-%s-%s-01", traceIDHex, spanIDHex), - }, - } - - ctx := ExtractTraceContext(context.Background(), msg) - sc := trace.SpanContextFromContext(ctx) - if !sc.IsValid() { - t.Fatalf("expected valid span context from metadata") - } - if sc.TraceID().String() != traceIDHex { - t.Fatalf("expected traceID %s, got %s", traceIDHex, sc.TraceID().String()) - } - if sc.SpanID().String() != spanIDHex { - t.Fatalf("expected spanID %s, got %s", spanIDHex, sc.SpanID().String()) - } -} - -func TestExtractTraceContextNoMetadata(t *testing.T) { - t.Setenv(contracts.TracePropagationEnv, "true") - baseCtx := context.Background() - ctx := ExtractTraceContext(baseCtx, &engram.StreamMessage{}) - if ctx != baseCtx { - t.Fatalf("expected context to be unchanged when metadata missing") - } -} diff --git a/structured_error.go b/structured_error.go deleted file mode 100644 index d7479a0..0000000 --- a/structured_error.go +++ /dev/null @@ -1,143 +0,0 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package sdk - -import ( - "encoding/json" - - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - "github.com/bubustack/bobrapet/pkg/enums" - k8sruntime "k8s.io/apimachinery/pkg/runtime" -) - -// StructuredErrorProvider allows errors to supply a versioned StructuredError payload. -type StructuredErrorProvider interface { - StructuredError() runsv1alpha1.StructuredError -} - -type structuredError struct { - structured runsv1alpha1.StructuredError - cause error -} - -// StructuredError returns the versioned error payload to attach to StepRun.status.error. -func (e *structuredError) StructuredError() runsv1alpha1.StructuredError { - return e.structured -} - -// Error implements the error interface. -func (e *structuredError) Error() string { - if e == nil { - return "engram failed" - } - if e.structured.Message != "" { - return e.structured.Message - } - if e.cause != nil { - return e.cause.Error() - } - return "engram failed" -} - -// Unwrap exposes the underlying cause. -func (e *structuredError) Unwrap() error { - if e == nil { - return nil - } - return e.cause -} - -// StructuredErrorOption mutates a structured error before it is returned. -type StructuredErrorOption func(*structuredError) - -// WithStructuredErrorRetryable annotates the error as retryable/terminal. -func WithStructuredErrorRetryable(retryable bool) StructuredErrorOption { - return func(e *structuredError) { - e.structured.Retryable = &retryable - } -} - -// WithStructuredErrorExitClass sets the desired exit class ("retry", "terminal", etc.). -func WithStructuredErrorExitClass(exitClass enums.ExitClass) StructuredErrorOption { - return func(e *structuredError) { - e.structured.ExitClass = runsv1alpha1.StructuredErrorExitClass(exitClass) - } -} - -// WithStructuredErrorCode sets a component-specific error code. -func WithStructuredErrorCode(code string) StructuredErrorOption { - return func(e *structuredError) { - e.structured.Code = code - } -} - -// WithStructuredErrorDetails attaches structured metadata for diagnostics. -func WithStructuredErrorDetails(details map[string]any) StructuredErrorOption { - return func(e *structuredError) { - if details == nil { - e.structured.Details = nil - return - } - raw, err := json.Marshal(details) - if err != nil { - e.structured.Details = fallbackStructuredErrorDetails(details, err) - return - } - e.structured.Details = &k8sruntime.RawExtension{Raw: raw} - } -} - -func fallbackStructuredErrorDetails(details map[string]any, marshalErr error) *k8sruntime.RawExtension { - fallback := map[string]any{ - "unserializable": true, - "marshalError": marshalErr.Error(), - } - kind, summary := signalTypeSummary(details) - fallback["type"] = kind - if len(summary) > 0 { - fallback["details"] = summary - } - raw, err := json.Marshal(fallback) - if err != nil { - return nil - } - return &k8sruntime.RawExtension{Raw: raw} -} - -// WithStructuredErrorCause preserves the underlying error for wrapping/unwrapping. -func WithStructuredErrorCause(cause error) StructuredErrorOption { - return func(e *structuredError) { - e.cause = cause - } -} - -// NewStructuredError returns an error that carries a StructuredError payload. -func NewStructuredError(typ runsv1alpha1.StructuredErrorType, message string, opts ...StructuredErrorOption) error { - serr := &structuredError{ - structured: runsv1alpha1.StructuredError{ - Version: runsv1alpha1.StructuredErrorVersionV1, - Type: typ, - Message: message, - }, - } - for _, opt := range opts { - if opt != nil { - opt(serr) - } - } - return serr -} diff --git a/structured_error_test.go b/structured_error_test.go deleted file mode 100644 index ce692c8..0000000 --- a/structured_error_test.go +++ /dev/null @@ -1,288 +0,0 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package sdk - -import ( - "encoding/json" - "errors" - "fmt" - "strings" - "testing" - "time" - - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - "github.com/bubustack/bobrapet/pkg/enums" - "github.com/bubustack/bubu-sdk-go/runtime" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -func requireStructuredStatusError(t *testing.T, status *runsv1alpha1.StepRunStatus) runsv1alpha1.StructuredError { - t.Helper() - require.NotNil(t, status) - require.NotNil(t, status.Error) - return *status.Error -} - -func TestAppendStructuredError_Succeeded(t *testing.T) { - status := &runsv1alpha1.StepRunStatus{} - appendStructuredError(status, enums.PhaseSucceeded, false, nil) - assert.Nil(t, status.Error, "no error should be set on success") -} - -func TestAppendStructuredError_NilError(t *testing.T) { - status := &runsv1alpha1.StepRunStatus{} - appendStructuredError(status, enums.PhaseFailed, false, nil) - assert.Nil(t, status.Error, "no error should be set when finalErr is nil") -} - -func TestAppendStructuredError_ExecutionError(t *testing.T) { - status := &runsv1alpha1.StepRunStatus{ - ExitCode: 1, - ExitClass: enums.ExitClassTerminal, - } - appendStructuredError(status, enums.PhaseFailed, false, fmt.Errorf("engram process returned non-zero")) - - errObj := requireStructuredStatusError(t, status) - - assert.Equal(t, runsv1alpha1.StructuredErrorVersionV1, errObj.Version) - assert.Equal(t, runsv1alpha1.StructuredErrorTypeExecution, errObj.Type) - assert.Equal(t, "engram process returned non-zero", errObj.Message) - require.NotNil(t, errObj.ExitCode) - assert.Equal(t, int32(1), *errObj.ExitCode) - assert.Equal(t, runsv1alpha1.StructuredErrorExitClass(enums.ExitClassTerminal), errObj.ExitClass) - require.NotNil(t, errObj.Retryable) - assert.False(t, *errObj.Retryable) -} - -func TestAppendStructuredError_TimeoutError(t *testing.T) { - status := &runsv1alpha1.StepRunStatus{ - ExitCode: 124, - ExitClass: enums.ExitClassRetry, - } - appendStructuredError(status, enums.PhaseTimeout, true, fmt.Errorf("step exceeded 30s timeout")) - - errObj := requireStructuredStatusError(t, status) - - assert.Equal(t, runsv1alpha1.StructuredErrorVersionV1, errObj.Version) - assert.Equal(t, runsv1alpha1.StructuredErrorTypeTimeout, errObj.Type) - assert.Contains(t, errObj.Message, "30s") - require.NotNil(t, errObj.ExitCode) - assert.Equal(t, int32(124), *errObj.ExitCode) - assert.Equal(t, runsv1alpha1.StructuredErrorExitClass(enums.ExitClassRetry), errObj.ExitClass) - require.NotNil(t, errObj.Retryable) - assert.True(t, *errObj.Retryable) -} - -func TestAppendStructuredError_StorageError(t *testing.T) { - status := &runsv1alpha1.StepRunStatus{ExitCode: 1, ExitClass: enums.ExitClassTerminal} - appendStructuredError(status, enums.PhaseFailed, false, fmt.Errorf("failed to dehydrate output: storage backend unavailable")) //nolint:lll - - errObj := requireStructuredStatusError(t, status) - assert.Equal(t, runsv1alpha1.StructuredErrorTypeStorage, errObj.Type) -} - -func TestAppendStructuredError_ValidationError(t *testing.T) { - status := &runsv1alpha1.StepRunStatus{ExitCode: 1, ExitClass: enums.ExitClassTerminal} - appendStructuredError(status, enums.PhaseFailed, false, fmt.Errorf("input schema validation failed")) - - errObj := requireStructuredStatusError(t, status) - assert.Equal(t, runsv1alpha1.StructuredErrorTypeValidation, errObj.Type) -} - -func TestAppendStructuredError_SerializationError(t *testing.T) { - status := &runsv1alpha1.StepRunStatus{ExitCode: 1, ExitClass: enums.ExitClassTerminal} - appendStructuredError(status, enums.PhaseFailed, false, fmt.Errorf("failed to unmarshal config: invalid JSON")) - - errObj := requireStructuredStatusError(t, status) - assert.Equal(t, runsv1alpha1.StructuredErrorTypeSerialization, errObj.Type) -} - -func TestAppendStructuredError_InitializationError(t *testing.T) { - status := &runsv1alpha1.StepRunStatus{ExitCode: 1, ExitClass: enums.ExitClassTerminal} - appendStructuredError(status, enums.PhaseFailed, false, fmt.Errorf("engram initialization failed: missing API key")) - - errObj := requireStructuredStatusError(t, status) - assert.Equal(t, runsv1alpha1.StructuredErrorTypeInitialization, errObj.Type) -} - -func TestAppendStructuredError_RedactsAndTruncatesProviderMessageAfterMerge(t *testing.T) { - status := &runsv1alpha1.StepRunStatus{ - ExitCode: 1, - ExitClass: enums.ExitClassTerminal, - } - longSensitive := `password="s3cr3t" ` + strings.Repeat("x", maxErrorMessageBytes+64) - - appendStructuredError( - status, - enums.PhaseFailed, - false, - NewStructuredError(runsv1alpha1.StructuredErrorTypeExecution, longSensitive), - ) - - errObj := requireStructuredStatusError(t, status) - assert.LessOrEqual(t, len(errObj.Message), maxErrorMessageBytes) - assert.NotContains(t, errObj.Message, "s3cr3t") - assert.Contains(t, errObj.Message, "[REDACTED]") -} - -func TestWithStructuredErrorDetailsFallsBackWhenMarshalFails(t *testing.T) { - err := NewStructuredError( - runsv1alpha1.StructuredErrorTypeExecution, - "public failure", - WithStructuredErrorDetails(map[string]any{ - "bad": func() {}, - }), - ) - - provider, ok := err.(StructuredErrorProvider) - require.True(t, ok) - serr := provider.StructuredError() - require.NotNil(t, serr.Details) - - var payload map[string]any - require.NoError(t, json.Unmarshal(serr.Details.Raw, &payload)) - assert.Equal(t, true, payload["unserializable"]) - assert.Equal(t, "map", payload["type"]) - assert.Equal(t, "json: unsupported type: func()", payload["marshalError"]) - - details, ok := payload["details"].(map[string]any) - require.True(t, ok) - assert.Equal(t, float64(1), details["len"]) -} - -func TestStructuredErrorErrorPrefersStructuredMessageOverCause(t *testing.T) { - cause := errors.New("internal database password mismatch") - err := NewStructuredError( - runsv1alpha1.StructuredErrorTypeExecution, - "public failure", - WithStructuredErrorCause(cause), - ) - - assert.Equal(t, "public failure", err.Error()) - assert.ErrorIs(t, err, cause) -} - -func TestStructuredErrorErrorFallsBackToCauseWhenMessageEmpty(t *testing.T) { - cause := errors.New("internal fallback") - err := NewStructuredError( - runsv1alpha1.StructuredErrorTypeExecution, - "", - WithStructuredErrorCause(cause), - ) - - assert.Equal(t, "internal fallback", err.Error()) - assert.ErrorIs(t, err, cause) -} - -func TestClassifyError(t *testing.T) { - tests := []struct { - name string - err error - expected runsv1alpha1.StructuredErrorType - }{ - {"nil", nil, runsv1alpha1.StructuredErrorTypeUnknown}, - {"storage", fmt.Errorf("storage backend timeout"), runsv1alpha1.StructuredErrorTypeStorage}, - {"dehydrate", fmt.Errorf("failed to dehydrate output"), runsv1alpha1.StructuredErrorTypeStorage}, - {"unmarshal", fmt.Errorf("failed to unmarshal config"), runsv1alpha1.StructuredErrorTypeSerialization}, - {"marshal", fmt.Errorf("output marshal error"), runsv1alpha1.StructuredErrorTypeSerialization}, - {"schema", fmt.Errorf("input schema validation failed"), runsv1alpha1.StructuredErrorTypeValidation}, - {"validation", fmt.Errorf("validation error"), runsv1alpha1.StructuredErrorTypeValidation}, - {"init", fmt.Errorf("engram initialization failed"), runsv1alpha1.StructuredErrorTypeInitialization}, - {"generic", fmt.Errorf("something went wrong"), runsv1alpha1.StructuredErrorTypeExecution}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - assert.Equal(t, tt.expected, classifyError(tt.err)) - }) - } -} - -func TestNewStepRunStatus_SetsErrorOnFailure(t *testing.T) { - execCtx := &runtime.ExecutionContextData{ - StartedAt: metav1.Time{Time: time.Now().Add(-5 * time.Second)}, - } - status := newStepRunStatus(execCtx, enums.PhaseFailed, false, fmt.Errorf("test failure")) - - assert.Equal(t, enums.PhaseFailed, status.Phase) - errObj := requireStructuredStatusError(t, &status) - assert.Equal(t, runsv1alpha1.StructuredErrorTypeExecution, errObj.Type) - assert.Equal(t, "test failure", errObj.Message) -} - -func TestNewStepRunStatus_NoErrorOnSuccess(t *testing.T) { - execCtx := &runtime.ExecutionContextData{ - StartedAt: metav1.Time{Time: time.Now().Add(-1 * time.Second)}, - } - status := newStepRunStatus(execCtx, enums.PhaseSucceeded, false, nil) - - assert.Equal(t, enums.PhaseSucceeded, status.Phase) - assert.Nil(t, status.Error, "status.error should be nil on success") -} - -func TestNewStepRunStatus_TimeoutSetsErrorType(t *testing.T) { - execCtx := &runtime.ExecutionContextData{ - StartedAt: metav1.Time{Time: time.Now().Add(-30 * time.Second)}, - } - status := newStepRunStatus(execCtx, enums.PhaseTimeout, true, fmt.Errorf("timeout exceeded")) - - errObj := requireStructuredStatusError(t, &status) - assert.Equal(t, runsv1alpha1.StructuredErrorTypeTimeout, errObj.Type) -} - -func TestNewStepRunStatus_SanitizesPersistedErrorFields(t *testing.T) { - execCtx := &runtime.ExecutionContextData{ - StartedAt: metav1.Time{Time: time.Now().Add(-3 * time.Second)}, - } - err := fmt.Errorf(`upstream rejected request: Authorization: Bearer top-secret password="super-secret"`) - - status := newStepRunStatus(execCtx, enums.PhaseFailed, false, err) - lastFailureStatus := runsv1alpha1.StepRunStatus{} - applyStatusOverride(&lastFailureStatus, nil, err, false, enums.PhaseFailed, err) - - assert.NotContains(t, lastFailureStatus.LastFailureMsg, "top-secret") - assert.NotContains(t, lastFailureStatus.LastFailureMsg, "super-secret") - assert.Contains(t, lastFailureStatus.LastFailureMsg, "[REDACTED]") - require.NotEmpty(t, status.Conditions) - assert.NotContains(t, status.Conditions[0].Message, "top-secret") - assert.NotContains(t, status.Conditions[0].Message, "super-secret") - assert.Contains(t, status.Conditions[0].Message, "[REDACTED]") - - errObj := requireStructuredStatusError(t, &status) - assert.NotContains(t, errObj.Message, "top-secret") - assert.NotContains(t, errObj.Message, "super-secret") - assert.Contains(t, errObj.Message, "[REDACTED]") -} - -func TestNewStepRunStatus_RetryableStructuredErrorSetsExitClass(t *testing.T) { - execCtx := &runtime.ExecutionContextData{ - StartedAt: metav1.Time{Time: time.Now().Add(-2 * time.Second)}, - } - err := NewStructuredError( - runsv1alpha1.StructuredErrorTypeExecution, - "transient failure", - WithStructuredErrorRetryable(true), - ) - - status := newStepRunStatus(execCtx, enums.PhaseFailed, false, err) - - assert.Equal(t, enums.ExitClassRetry, status.ExitClass) - errObj := requireStructuredStatusError(t, &status) - require.NotNil(t, errObj.Retryable) - assert.True(t, *errObj.Retryable) -} diff --git a/testkit/harness.go b/testkit/harness.go deleted file mode 100644 index 2e7fd41..0000000 --- a/testkit/harness.go +++ /dev/null @@ -1,156 +0,0 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package testkit - -import ( - "context" - "errors" - "fmt" - "log/slog" - - "github.com/bubustack/bubu-sdk-go/engram" - "go.opentelemetry.io/otel/trace" -) - -// BatchHarness runs a BatchEngram with explicit config/inputs and a synthetic ExecutionContext. -// It is intended for unit tests that want to exercise Init + Process without a controller. -type BatchHarness[C any, I any] struct { - // Engram is the implementation under test. - Engram engram.BatchEngram[C, I] - // Config is passed to Engram.Init. - Config C - // Inputs is passed to Engram.Process. - Inputs I - // Secrets are expanded the same way as in SDK runtime before Init is called. - Secrets map[string]string - // StoryInfo seeds the synthetic execution context passed to Process. - StoryInfo engram.StoryInfo - // Logger overrides the default logger used in the synthetic execution context. - Logger *slog.Logger - // Tracer overrides the default no-op tracer used in the synthetic execution context. - Tracer trace.Tracer - // CELContext is injected into the synthetic execution context for template/CEL consumers. - CELContext map[string]any -} - -// Run executes Init + Process and returns the resulting output. -func (h BatchHarness[C, I]) Run(ctx context.Context) (*engram.Result, error) { - if ctx == nil { - return nil, errors.New("context is nil") - } - if h.Engram == nil { - return nil, errors.New("engram is nil") - } - logger := h.Logger - if logger == nil { - logger = slog.Default() - } - tracer := h.Tracer - if tracer == nil { - tracer = trace.NewNoopTracerProvider().Tracer("bubu-sdk-go/testkit") //nolint:staticcheck - } - secrets, err := engram.NewSecretsWithError(ctx, h.Secrets) - if err != nil { - return nil, fmt.Errorf("failed to expand secrets: %w", err) - } - if err := h.Engram.Init(ctx, h.Config, secrets); err != nil { - return nil, err - } - execCtx := engram.NewExecutionContextWithCELContext(logger, tracer, h.StoryInfo, h.CELContext) - return h.Engram.Process(ctx, execCtx, h.Inputs) -} - -// StreamHarness runs a StreamingEngram with provided input messages and returns collected outputs. -// It mimics the SDK runtime by closing the output channel after Stream returns. -type StreamHarness[C any] struct { - // Engram is the implementation under test. - Engram engram.StreamingEngram[C] - // Config is passed to Engram.Init. - Config C - // Secrets are expanded the same way as in SDK runtime before Init is called. - Secrets map[string]string - // StoryInfo seeds the synthetic execution context used by Stream. - StoryInfo engram.StoryInfo - // Logger overrides the default logger used in the synthetic execution context. - Logger *slog.Logger - // Tracer overrides the default no-op tracer used in the synthetic execution context. - Tracer trace.Tracer - // CELContext is injected into the synthetic execution context for template/CEL consumers. - CELContext map[string]any - // Inputs are converted to InboundMessages and delivered to Stream in order. - Inputs []engram.StreamMessage - // OnInputProcessed runs when an input message calls Done through the SDK - // receipt hook. Tests can use this to verify acknowledgement behavior. - OnInputProcessed func(engram.StreamMessage) -} - -// Run executes Init + Stream and returns the collected output messages. -func (h StreamHarness[C]) Run(ctx context.Context) ([]engram.StreamMessage, error) { - if ctx == nil { - return nil, errors.New("context is nil") - } - if h.Engram == nil { - return nil, errors.New("engram is nil") - } - logger := h.Logger - if logger == nil { - logger = slog.Default() //nolint:ineffassign,staticcheck - } - tracer := h.Tracer - if tracer == nil { - tracer = trace.NewNoopTracerProvider().Tracer("bubu-sdk-go/testkit") //nolint:ineffassign,staticcheck - } - secrets, err := engram.NewSecretsWithError(ctx, h.Secrets) - if err != nil { - return nil, fmt.Errorf("failed to expand secrets: %w", err) - } - if err := h.Engram.Init(ctx, h.Config, secrets); err != nil { - return nil, err - } - - in := make(chan engram.InboundMessage, len(h.Inputs)) - out := make(chan engram.StreamMessage, len(h.Inputs)) - for _, msg := range h.Inputs { - inbound := engram.NewInboundMessage(msg) - if h.OnInputProcessed != nil { - current := msg - inbound = engram.BindProcessingReceipt(inbound, func() { - h.OnInputProcessed(current) - }) - } - in <- inbound - } - close(in) - - errCh := make(chan error, 1) - go func() { - defer close(out) - defer func() { - if recovered := recover(); recovered != nil { - errCh <- fmt.Errorf("engram stream panicked: %v", recovered) - } - }() - errCh <- h.Engram.Stream(ctx, in, out) - }() - - outputs := make([]engram.StreamMessage, 0) - for msg := range out { - outputs = append(outputs, msg) - } - - return outputs, <-errCh -} diff --git a/testkit/harness_test.go b/testkit/harness_test.go deleted file mode 100644 index 306b9a2..0000000 --- a/testkit/harness_test.go +++ /dev/null @@ -1,142 +0,0 @@ -package testkit - -import ( - "context" - "path/filepath" - "sync/atomic" - "testing" - - "github.com/bubustack/bubu-sdk-go/engram" - "github.com/stretchr/testify/require" -) - -type secretFailBatchEngram struct { - initCalled bool - processCalled bool -} - -func (e *secretFailBatchEngram) Init(ctx context.Context, config struct{}, secrets *engram.Secrets) error { - e.initCalled = true - return nil -} - -func (e *secretFailBatchEngram) Process( - ctx context.Context, - execCtx *engram.ExecutionContext, - inputs struct{}, -) (*engram.Result, error) { - e.processCalled = true - return engram.NewResultFrom("ok"), nil -} - -type secretFailStreamEngram struct { - initCalled bool - streamCalled bool -} - -func (e *secretFailStreamEngram) Init(ctx context.Context, config struct{}, secrets *engram.Secrets) error { - e.initCalled = true - return nil -} - -func (e *secretFailStreamEngram) Stream( - ctx context.Context, - in <-chan engram.InboundMessage, - out chan<- engram.StreamMessage, -) error { - e.streamCalled = true - return nil -} - -type panicStreamEngram struct{} - -func (e *panicStreamEngram) Init(ctx context.Context, config struct{}, secrets *engram.Secrets) error { - return nil -} - -func (e *panicStreamEngram) Stream( - ctx context.Context, - in <-chan engram.InboundMessage, - out chan<- engram.StreamMessage, -) error { - panic("boom") -} - -func TestBatchHarnessRunFailsOnSecretExpansionError(t *testing.T) { - missingDir := filepath.Join(t.TempDir(), "missing") - eng := &secretFailBatchEngram{} - - _, err := (BatchHarness[struct{}, struct{}]{ - Engram: eng, - Secrets: map[string]string{"db": "file:" + missingDir}, - }).Run(context.Background()) - - require.Error(t, err) - require.Contains(t, err.Error(), "failed to expand secrets") - require.Contains(t, err.Error(), `secret "db" (file)`) - require.NotContains(t, err.Error(), missingDir) - require.False(t, eng.initCalled, "Init should not run when secret expansion fails") - require.False(t, eng.processCalled, "Process should not run when secret expansion fails") -} - -func TestStreamHarnessRunFailsOnSecretExpansionError(t *testing.T) { - missingDir := filepath.Join(t.TempDir(), "missing") - eng := &secretFailStreamEngram{} - - _, err := (StreamHarness[struct{}]{ - Engram: eng, - Secrets: map[string]string{"db": "file:" + missingDir}, - }).Run(context.Background()) - - require.Error(t, err) - require.Contains(t, err.Error(), "failed to expand secrets") - require.Contains(t, err.Error(), `secret "db" (file)`) - require.NotContains(t, err.Error(), missingDir) - require.False(t, eng.initCalled, "Init should not run when secret expansion fails") - require.False(t, eng.streamCalled, "Stream should not run when secret expansion fails") -} - -func TestStreamHarnessRunConvertsStreamPanicToError(t *testing.T) { - _, err := (StreamHarness[struct{}]{ - Engram: &panicStreamEngram{}, - }).Run(context.Background()) - - require.Error(t, err) - require.Contains(t, err.Error(), "engram stream panicked") - require.Contains(t, err.Error(), "boom") -} - -type ackingStreamEngram struct{} - -func (e *ackingStreamEngram) Init(ctx context.Context, config struct{}, secrets *engram.Secrets) error { - return nil -} - -func (e *ackingStreamEngram) Stream( - ctx context.Context, - in <-chan engram.InboundMessage, - out chan<- engram.StreamMessage, -) error { - for msg := range in { - msg.Done() - } - return nil -} - -func TestStreamHarnessRunNotifiesOnInputProcessed(t *testing.T) { - var processed atomic.Int32 - - _, err := (StreamHarness[struct{}]{ - Engram: &ackingStreamEngram{}, - Inputs: []engram.StreamMessage{ - {Payload: []byte(`{"a":1}`)}, - {Payload: []byte(`{"b":2}`)}, - }, - OnInputProcessed: func(engram.StreamMessage) { - processed.Add(1) - }, - }).Run(context.Background()) - - require.NoError(t, err) - require.Equal(t, int32(2), processed.Load()) -} diff --git a/testkit/validators.go b/testkit/validators.go deleted file mode 100644 index 09c40b8..0000000 --- a/testkit/validators.go +++ /dev/null @@ -1,124 +0,0 @@ -/* -Copyright 2025 BubuStack. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package testkit - -import ( - "encoding/json" - "fmt" - "strings" - "testing" - - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - "github.com/bubustack/bubu-sdk-go/engram" -) - -// ValidateStructuredError checks that a StructuredError payload matches the v1 contract. -func ValidateStructuredError(errObj runsv1alpha1.StructuredError) error { - if strings.TrimSpace(errObj.Version) == "" { - return fmt.Errorf("structured error version is required") - } - if errObj.Version != runsv1alpha1.StructuredErrorVersionV1 { - return fmt.Errorf("unsupported structured error version %q", errObj.Version) - } - if errObj.Type == "" { - return fmt.Errorf("structured error type is required") - } - switch errObj.Type { - case runsv1alpha1.StructuredErrorTypeTimeout, - runsv1alpha1.StructuredErrorTypeStorage, - runsv1alpha1.StructuredErrorTypeSerialization, - runsv1alpha1.StructuredErrorTypeValidation, - runsv1alpha1.StructuredErrorTypeInitialization, - runsv1alpha1.StructuredErrorTypeExecution, - runsv1alpha1.StructuredErrorTypeUnknown: - default: - return fmt.Errorf("unsupported structured error type %q", errObj.Type) - } - if strings.TrimSpace(errObj.Message) == "" { - return fmt.Errorf("structured error message is required") - } - if exitClass := strings.TrimSpace(string(errObj.ExitClass)); exitClass != "" { - switch runsv1alpha1.StructuredErrorExitClass(exitClass) { - case runsv1alpha1.StructuredErrorExitClassSuccess, - runsv1alpha1.StructuredErrorExitClassRetry, - runsv1alpha1.StructuredErrorExitClassTerminal, - runsv1alpha1.StructuredErrorExitClassRateLimited, - runsv1alpha1.StructuredErrorExitClassUnknown: - default: - return fmt.Errorf("unsupported structured error exitClass %q", exitClass) - } - } - return nil -} - -// RequireStructuredError fails the test when the StructuredError payload is invalid. -func RequireStructuredError(t testing.TB, errObj runsv1alpha1.StructuredError) { - t.Helper() - if err := ValidateStructuredError(errObj); err != nil { - t.Fatalf("invalid structured error: %v", err) - } -} - -// ValidateStreamMessage verifies the minimum validity requirements for streaming messages. -func ValidateStreamMessage(msg engram.StreamMessage) error { - if strings.TrimSpace(msg.Kind) == engram.StreamMessageKindError { - if err := ValidateStreamErrorMessage(msg); err != nil { - return err - } - } - if err := msg.Validate(); err != nil { - return err - } - if msg.Audio == nil && msg.Video == nil && msg.Binary == nil && len(msg.Payload) == 0 && len(msg.Inputs) == 0 && len(msg.Transports) == 0 { //nolint:lll - return fmt.Errorf("stream message missing payload, inputs, transports, or media") - } - return nil -} - -// ValidateStreamErrorMessage verifies the StructuredError envelope for error StreamMessages. -func ValidateStreamErrorMessage(msg engram.StreamMessage) error { - if strings.TrimSpace(msg.Kind) != engram.StreamMessageKindError { - return fmt.Errorf("stream message kind %q is not error", msg.Kind) - } - if len(msg.Payload) == 0 { - return fmt.Errorf("stream error message missing payload") - } - var errObj runsv1alpha1.StructuredError - if err := json.Unmarshal(msg.Payload, &errObj); err != nil { - return fmt.Errorf("stream error payload invalid: %w", err) - } - if err := ValidateStructuredError(errObj); err != nil { - return fmt.Errorf("stream error payload invalid: %w", err) - } - return nil -} - -// RequireStreamMessage fails the test when the message is invalid. -func RequireStreamMessage(t testing.TB, msg engram.StreamMessage) { - t.Helper() - if err := ValidateStreamMessage(msg); err != nil { - t.Fatalf("invalid stream message: %v", err) - } -} - -// RequireStreamErrorMessage fails the test when the error envelope is invalid. -func RequireStreamErrorMessage(t testing.TB, msg engram.StreamMessage) { - t.Helper() - if err := ValidateStreamErrorMessage(msg); err != nil { - t.Fatalf("invalid stream error message: %v", err) - } -} diff --git a/testkit/validators_test.go b/testkit/validators_test.go deleted file mode 100644 index e69cf32..0000000 --- a/testkit/validators_test.go +++ /dev/null @@ -1,112 +0,0 @@ -package testkit - -import ( - "encoding/json" - "strings" - "testing" - - runsv1alpha1 "github.com/bubustack/bobrapet/api/runs/v1alpha1" - "github.com/bubustack/bubu-sdk-go/engram" - "github.com/bubustack/tractatus/envelope" - "github.com/stretchr/testify/require" -) - -func TestValidateStructuredError(t *testing.T) { - good := runsv1alpha1.StructuredError{ - Version: runsv1alpha1.StructuredErrorVersionV1, - Type: runsv1alpha1.StructuredErrorTypeExecution, - Message: "boom", - } - require.NoError(t, ValidateStructuredError(good)) - - bad := good - bad.Version = "" - require.Error(t, ValidateStructuredError(bad)) - - bad = good - bad.ExitClass = runsv1alpha1.StructuredErrorExitClass("bogus") - require.Error(t, ValidateStructuredError(bad)) -} - -func TestValidateStreamMessage(t *testing.T) { - msg := engram.StreamMessage{Payload: []byte(`{"ok":true}`)} - require.NoError(t, ValidateStreamMessage(msg)) - - require.Error(t, ValidateStreamMessage(engram.StreamMessage{})) -} - -func TestValidateStreamMessage_InvalidMediaFrame(t *testing.T) { - msg := engram.StreamMessage{ - Audio: &engram.AudioFrame{ - SampleRateHz: 16000, - Channels: 1, - }, - } - - require.ErrorIs(t, ValidateStreamMessage(msg), engram.ErrInvalidStreamMessage) -} - -func TestValidateStreamMessage_InvalidMetadataKey(t *testing.T) { - msg := engram.StreamMessage{ - Payload: []byte(`{"ok":true}`), - Metadata: map[string]string{" bad ": "value"}, - } - - require.ErrorIs(t, ValidateStreamMessage(msg), engram.ErrInvalidStreamMessage) -} - -func TestValidateStreamMessage_RejectsReservedEnvelopeMimeWithoutEnvelopeFields(t *testing.T) { - msg := engram.StreamMessage{ - Binary: &engram.BinaryFrame{ - Payload: []byte("raw"), - MimeType: envelope.MIMEType, - }, - } - - require.ErrorIs(t, ValidateStreamMessage(msg), engram.ErrInvalidStreamMessage) -} - -func TestValidateStreamMessage_RejectsReservedEnvelopeMimeWithParametersWithoutEnvelopeFields(t *testing.T) { - msg := engram.StreamMessage{ - Binary: &engram.BinaryFrame{ - Payload: []byte("raw"), - MimeType: envelope.MIMEType + "; charset=utf-8", - }, - } - - require.ErrorIs(t, ValidateStreamMessage(msg), engram.ErrInvalidStreamMessage) -} - -func TestValidateStreamMessage_RejectsReservedEnvelopeMimeCaseInsensitivePayloadMismatch(t *testing.T) { - msg := engram.StreamMessage{ - Kind: "telemetry", - Payload: []byte(`{"ok":true}`), - Binary: &engram.BinaryFrame{ - Payload: []byte(`{"ok":false}`), - MimeType: strings.ToUpper(envelope.MIMEType), - }, - } - - require.ErrorIs(t, ValidateStreamMessage(msg), engram.ErrInvalidStreamMessage) -} - -func TestValidateStreamMessage_ErrorEnvelope(t *testing.T) { - payload, err := json.Marshal(runsv1alpha1.StructuredError{ - Version: runsv1alpha1.StructuredErrorVersionV1, - Type: runsv1alpha1.StructuredErrorTypeExecution, - Message: "boom", - }) - require.NoError(t, err) - - msg := engram.StreamMessage{ - Kind: engram.StreamMessageKindError, - Payload: payload, - } - require.NoError(t, ValidateStreamMessage(msg)) - - bad := engram.StreamMessage{ - Kind: engram.StreamMessageKindError, - Payload: []byte(`{"type":"execution"}`), - } - require.Error(t, ValidateStreamMessage(bad)) -} diff --git a/transport_binding.go b/transport_binding.go deleted file mode 100644 index 300721c..0000000 --- a/transport_binding.go +++ /dev/null @@ -1,77 +0,0 @@ -package sdk - -import ( - "errors" - "fmt" - "maps" - "os" - "strings" - - "github.com/bubustack/bubu-sdk-go/k8s" - "github.com/bubustack/core/contracts" - coretransport "github.com/bubustack/core/runtime/transport" - transportpb "github.com/bubustack/tractatus/gen/go/proto/transport/v1" -) - -var errBindingEnvMissing = errors.New("BUBU_TRANSPORT_BINDING not set") - -type bindingReference struct { - Name string - Namespace string - Raw string - Info *transportpb.BindingInfo -} - -func bindingReferenceFromEnv() (bindingReference, error) { - value := strings.TrimSpace(os.Getenv(contracts.TransportBindingEnv)) - if value == "" { - return bindingReference{}, errBindingEnvMissing - } - payload, err := coretransport.ParseBindingPayload(value) - if err != nil { - return bindingReference{}, err - } - ref := bindingReference{ - Name: payload.Reference.Name, - Namespace: payload.Reference.Namespace, - Info: payload.Info, - Raw: payload.Raw, - } - if err := validateBindingInfoProtocol(ref.Info); err != nil { - return bindingReference{}, err - } - if ref.Namespace == "" { - ref.Namespace = k8s.ResolvePodNamespace() - } - if ref.Raw == "" { - ref.Raw = value - } - return ref, nil -} - -func validateBindingInfoProtocol(info *transportpb.BindingInfo) error { - if info == nil { - return nil - } - if err := coretransport.ValidateProtocolVersion(info.GetProtocolVersion()); err != nil { - return fmt.Errorf("invalid transport protocol version in binding: %w", err) - } - return nil -} - -func (ref bindingReference) endpoint() string { - if ref.Info == nil { - return "" - } - return strings.TrimSpace(ref.Info.GetEndpoint()) -} - -func (ref bindingReference) envOverrides() map[string]string { - overrides := coretransport.BindingEnvOverrides(ref.Info) - if len(overrides) == 0 { - return nil - } - copyMap := make(map[string]string, len(overrides)) - maps.Copy(copyMap, overrides) - return copyMap -} diff --git a/transport_binding_test.go b/transport_binding_test.go deleted file mode 100644 index 3109e27..0000000 --- a/transport_binding_test.go +++ /dev/null @@ -1,80 +0,0 @@ -package sdk - -import ( - "encoding/json" - "testing" - - "github.com/bubustack/core/contracts" - coretransport "github.com/bubustack/core/runtime/transport" - transportpb "github.com/bubustack/tractatus/gen/go/proto/transport/v1" - "github.com/stretchr/testify/require" - "google.golang.org/protobuf/encoding/protojson" -) - -func TestBindingReferenceFromEnvEnvelope(t *testing.T) { - info := &transportpb.BindingInfo{ - Driver: "demo", - Endpoint: "connector:9000", - ProtocolVersion: coretransport.ProtocolVersion, - } - bindingJSON, err := protojson.Marshal(info) - require.NoError(t, err) - - envPayload, err := json.Marshal(map[string]any{ - "name": "binding-a", - "namespace": "demo", - "binding": json.RawMessage(bindingJSON), - }) - require.NoError(t, err) - - t.Setenv(contracts.TransportBindingEnv, string(envPayload)) - ref, err := bindingReferenceFromEnv() - require.NoError(t, err) - require.Equal(t, "binding-a", ref.Name) - require.Equal(t, "demo", ref.Namespace) - require.NotNil(t, ref.Info) - require.Equal(t, "demo", ref.Info.GetDriver()) - require.Equal(t, "connector:9000", ref.Info.GetEndpoint()) -} - -func TestBindingReferenceFromEnvRejectsBareBindingInfoPayload(t *testing.T) { - info := &transportpb.BindingInfo{ - Driver: "livekit", - Endpoint: "unix:///tmp/connector.sock", - ProtocolVersion: coretransport.ProtocolVersion, - } - payload, err := protojson.Marshal(info) - require.NoError(t, err) - - t.Setenv(contracts.TransportBindingEnv, string(payload)) - _, err = bindingReferenceFromEnv() - require.ErrorContains(t, err, "binding envelope name is required") -} - -func TestBindingReferenceFromEnvErrorsWithoutInlinePayload(t *testing.T) { - t.Setenv(contracts.TransportBindingEnv, "binding-legacy") - _, err := bindingReferenceFromEnv() - require.Error(t, err) -} - -func TestBindingReferenceFromEnvRejectsProtocolVersionMismatch(t *testing.T) { - info := &transportpb.BindingInfo{ - Driver: "demo", - Endpoint: "connector:9000", - ProtocolVersion: "9.9.9", - } - bindingJSON, err := protojson.Marshal(info) - require.NoError(t, err) - - envPayload, err := json.Marshal(map[string]any{ - "name": "binding-a", - "namespace": "demo", - "binding": json.RawMessage(bindingJSON), - }) - require.NoError(t, err) - - t.Setenv(contracts.TransportBindingEnv, string(envPayload)) - _, err = bindingReferenceFromEnv() - require.ErrorContains(t, err, "invalid transport protocol version in binding") - require.ErrorContains(t, err, "unsupported transport protocol version") -} diff --git a/transport_connector.go b/transport_connector.go deleted file mode 100644 index 41a6653..0000000 --- a/transport_connector.go +++ /dev/null @@ -1,206 +0,0 @@ -package sdk - -import ( - "context" - "crypto/tls" - "fmt" - "log/slog" - "net" - "strings" - "time" - - "github.com/bubustack/bubu-sdk-go/pkg/observability" - "github.com/bubustack/core/contracts" - transportconnector "github.com/bubustack/core/runtime/transport/connector" - transportpb "github.com/bubustack/tractatus/gen/go/proto/transport/v1" - "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" - "google.golang.org/grpc" - "google.golang.org/grpc/credentials" - "google.golang.org/grpc/credentials/insecure" -) - -const defaultDialTimeout = 10 * time.Second -const ( - defaultReconnectBaseBackoff = 500 * time.Millisecond - defaultReconnectMaxBackoff = 30 * time.Second - defaultReconnectMaxRetries = 10 -) - -// TransportConnectorClient wraps the generic Engram↔connector gRPC contract. -// It dials the connector endpoint advertised via the TransportBinding env payload. -type TransportConnectorClient struct { - conn *grpc.ClientConn - client transportpb.TransportConnectorServiceClient -} - -// DialTransportConnector establishes a gRPC client connection to the generic transport connector. -// Endpoint may be tcp host:port or a unix domain socket (prefixed with unix://). -func DialTransportConnector( - ctx context.Context, - endpoint string, - opts ...grpc.DialOption, -) (*TransportConnectorClient, error) { - return dialTransportConnector(ctx, endpoint, newEnvResolver(nil), opts...) -} - -func dialTransportConnector( - ctx context.Context, - endpoint string, - env envResolver, - opts ...grpc.DialOption, -) (*TransportConnectorClient, error) { - if strings.TrimSpace(endpoint) == "" { - return nil, fmt.Errorf("transport connector endpoint empty") - } - - baseDialOpts, err := defaultTransportDialOptions(endpoint, env) - if err != nil { - return nil, err - } - dialOpts := append(baseDialOpts, opts...) - if isUnixEndpoint(endpoint) { - dialOpts = append(dialOpts, grpc.WithContextDialer(unixDialer())) - endpoint = strings.TrimPrefix(endpoint, "unix://") - } - - conn, err := grpc.NewClient(endpoint, dialOpts...) - if err != nil { - return nil, err - } - - waitCtx, cancel := contextWithDialTimeout(ctx, env) - defer cancel() - if err := transportconnector.WaitForReady(waitCtx, conn); err != nil { - _ = conn.Close() - return nil, err - } - - if isDebugEnabled() { - LoggerFromContext(ctx).Debug("Connector dial complete", - slog.String("endpoint", endpoint), - slog.Bool("tls", !usesPlaintextLocalConnector(endpoint)), - ) - } - - return &TransportConnectorClient{ - conn: conn, - client: transportpb.NewTransportConnectorServiceClient(conn), - }, nil -} - -// Client exposes the underlying generated gRPC client. -func (c *TransportConnectorClient) Client() transportpb.TransportConnectorServiceClient { - if c == nil { - return nil - } - return c.client -} - -// Close tears down the connector connection. -func (c *TransportConnectorClient) Close() error { - if c == nil || c.conn == nil { - return nil - } - return c.conn.Close() -} - -func defaultTransportDialOptions(endpoint string, env envResolver) ([]grpc.DialOption, error) { - if err := validateTransportSecurityMode(env); err != nil { - return nil, err - } - callOpts := transportconnector.ClientCallOptions( - env, - transportconnector.DefaultMaxMessageSize, - transportconnector.DefaultMaxMessageSize, - ) - dialTimeout := transportconnector.DialTimeout(env, defaultDialTimeout) - connectParams := grpc.ConnectParams{} - if dialTimeout > 0 { - connectParams.MinConnectTimeout = dialTimeout - } - - creds := defaultTransportCredentials(endpoint) - - opts := []grpc.DialOption{ - grpc.WithTransportCredentials(creds), - grpc.WithConnectParams(connectParams), - } - if len(callOpts) > 0 { - opts = append(opts, grpc.WithDefaultCallOptions(callOpts...)) - } - if observability.TracePropagationEnabled() { - opts = append(opts, grpc.WithStatsHandler(otelgrpc.NewClientHandler())) - } - return opts, nil -} - -func defaultTransportCredentials(endpoint string) credentials.TransportCredentials { - if usesPlaintextLocalConnector(endpoint) { - return insecure.NewCredentials() - } - return credentials.NewTLS(&tls.Config{MinVersion: tls.VersionTLS13}) -} - -func unixDialer() func(context.Context, string) (net.Conn, error) { - return func(ctx context.Context, addr string) (net.Conn, error) { - d := &net.Dialer{} - return d.DialContext(ctx, "unix", addr) - } -} - -func isUnixEndpoint(endpoint string) bool { - return strings.HasPrefix(endpoint, "unix://") -} - -func usesPlaintextLocalConnector(endpoint string) bool { - endpoint = strings.TrimSpace(endpoint) - if endpoint == "" { - return false - } - if isUnixEndpoint(endpoint) { - return true - } - host := endpoint - if strings.Contains(endpoint, ":") { - parsedHost, _, err := net.SplitHostPort(endpoint) - if err != nil { - return false - } - host = parsedHost - } - host = strings.Trim(host, "[]") - if strings.EqualFold(host, "localhost") { - return true - } - ip := net.ParseIP(host) - return ip != nil && ip.IsLoopback() -} - -var connectorDial = func( - ctx context.Context, - endpoint string, - env envResolver, - opts ...grpc.DialOption, -) (*TransportConnectorClient, error) { - return dialTransportConnector(ctx, endpoint, env, opts...) -} - -func contextWithDialTimeout(ctx context.Context, env envResolver) (context.Context, context.CancelFunc) { - if _, ok := ctx.Deadline(); ok { - return ctx, func() {} - } - timeout := transportconnector.DialTimeout(env, defaultDialTimeout) - if timeout <= 0 { - return context.WithCancel(ctx) - } - return context.WithTimeout(ctx, timeout) -} - -func validateTransportSecurityMode(env envResolver) error { - mode := strings.ToLower(strings.TrimSpace(env.lookup(contracts.TransportSecurityModeEnv))) - if mode == "" || mode == contracts.TransportSecurityModeTLS { - return nil - } - return fmt.Errorf("invalid %s %q: only %q is supported", contracts.TransportSecurityModeEnv, mode, - contracts.TransportSecurityModeTLS) -} diff --git a/transport_connector_test.go b/transport_connector_test.go deleted file mode 100644 index a930759..0000000 --- a/transport_connector_test.go +++ /dev/null @@ -1,77 +0,0 @@ -package sdk - -import ( - "context" - "net" - "testing" - "time" - - "github.com/bubustack/core/contracts" - transportconnector "github.com/bubustack/core/runtime/transport/connector" - "google.golang.org/grpc" -) - -func TestValidateTransportSecurityMode(t *testing.T) { - env := newEnvResolver(map[string]string{contracts.TransportSecurityModeEnv: "plaintext"}) - if err := validateTransportSecurityMode(env); err == nil { - t.Fatalf("expected plaintext security mode to be rejected") - } - env = newEnvResolver(map[string]string{contracts.TransportSecurityModeEnv: contracts.TransportSecurityModeTLS}) - if err := validateTransportSecurityMode(env); err != nil { - t.Fatalf("expected tls security mode to be accepted, got %v", err) - } - env = newEnvResolver(nil) - if err := validateTransportSecurityMode(env); err != nil { - t.Fatalf("expected default security mode to be accepted, got %v", err) - } -} - -func TestIsUnixEndpoint(t *testing.T) { - if !isUnixEndpoint("unix:///tmp/connector.sock") { - t.Fatalf("expected unix endpoint to be detected") - } - if isUnixEndpoint("tcp://127.0.0.1:9000") { - t.Fatalf("expected tcp endpoint not to be treated as unix") - } -} - -func TestResolveDialTimeout(t *testing.T) { - env := newEnvResolver(map[string]string{"BUBU_GRPC_DIAL_TIMEOUT": "3s"}) - if timeout := transportconnector.DialTimeout(env, defaultDialTimeout); timeout != 3*time.Second { - t.Fatalf("expected dial timeout 3s, got %s", timeout) - } - - env = newEnvResolver(map[string]string{"BUBU_GRPC_DIAL_TIMEOUT": "invalid"}) - if timeout := transportconnector.DialTimeout(env, defaultDialTimeout); timeout != defaultDialTimeout { - t.Fatalf("expected fallback dial timeout %s, got %s", defaultDialTimeout, timeout) - } -} - -func TestDialTransportConnectorUsesPlaintextForLoopbackEndpoints(t *testing.T) { - t.Parallel() - - listener, err := net.Listen("tcp", "127.0.0.1:0") - if err != nil { - t.Fatalf("listen: %v", err) - } - server := grpc.NewServer() - defer server.Stop() - go func() { - _ = server.Serve(listener) - }() - - env := newEnvResolver(map[string]string{ - contracts.TransportSecurityModeEnv: contracts.TransportSecurityModeTLS, - contracts.GRPCDialTimeoutEnv: "1s", - }) - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) - defer cancel() - - conn, err := dialTransportConnector(ctx, listener.Addr().String(), env) - if err != nil { - t.Fatalf("dialTransportConnector: %v", err) - } - defer func() { - _ = conn.Close() - }() -} diff --git a/transport_envelope.go b/transport_envelope.go deleted file mode 100644 index ddd8eb5..0000000 --- a/transport_envelope.go +++ /dev/null @@ -1,226 +0,0 @@ -package sdk - -import ( - "encoding/json" - "maps" - "reflect" - "strings" - "time" - - "github.com/bubustack/bubu-sdk-go/engram" - "github.com/bubustack/tractatus/envelope" -) - -const defaultEnvelopePayloadMIME = "application/json" - -func streamMessageEnvelope(msg engram.StreamMessage) *envelope.Envelope { - env := &envelope.Envelope{Version: envelope.LatestVersion} - var populated bool - - if copyEnvelopeHeaders(env, msg) { - populated = true - } - if copyEnvelopeMetadataField(env, msg.Metadata) { - populated = true - } - if copyEnvelopePayload(env, msg.Payload) { - populated = true - } - if copyEnvelopeInputs(env, msg.Inputs) { - populated = true - } - if copyEnvelopeTransports(env, msg.Transports) { - populated = true - } - - if !populated { - return nil - } - return env -} - -func copyEnvelopeHeaders(env *envelope.Envelope, msg engram.StreamMessage) bool { - var updated bool - if msg.Kind != "" { - env.Kind = msg.Kind - updated = true - } - if msg.MessageID != "" { - env.MessageID = msg.MessageID - updated = true - } - if !msg.Timestamp.IsZero() { - env.TimestampMs = msg.Timestamp.UTC().UnixMilli() - updated = true - } - return updated -} - -func copyEnvelopeMetadataField(env *envelope.Envelope, metadata map[string]string) bool { - if len(metadata) == 0 { - return false - } - env.Metadata = cloneStringMap(metadata) - return true -} - -func copyEnvelopePayload(env *envelope.Envelope, payload []byte) bool { - if len(payload) == 0 { - return false - } - env.Payload = json.RawMessage(copyBytes(payload)) - return true -} - -func copyEnvelopeInputs(env *envelope.Envelope, inputs []byte) bool { - if len(inputs) == 0 { - return false - } - env.Inputs = json.RawMessage(copyBytes(inputs)) - return true -} - -func copyEnvelopeTransports(env *envelope.Envelope, transports []engram.TransportDescriptor) bool { - if len(transports) == 0 { - return false - } - env.Transports = make([]envelope.TransportDescriptor, len(transports)) - for i := range transports { - src := transports[i] - env.Transports[i] = envelope.TransportDescriptor{ - Name: src.Name, - Kind: src.Kind, - Mode: src.Mode, - Config: cloneConfigMap(src.Config), - } - } - return true -} - -func populateMessageFromEnvelope(msg *engram.StreamMessage, env *envelope.Envelope) { - if env == nil || msg == nil { - return - } - msg.Kind = strings.TrimSpace(env.Kind) - msg.MessageID = strings.TrimSpace(env.MessageID) - if env.TimestampMs > 0 { - msg.Timestamp = time.UnixMilli(env.TimestampMs).UTC() - } - if len(env.Metadata) > 0 { - msg.Metadata = cloneStringMap(env.Metadata) - } - if len(env.Payload) > 0 { - payloadCopy := copyBytes(env.Payload) - msg.Payload = payloadCopy - msg.Binary = &engram.BinaryFrame{ - // Keep payload and binary payload mirrored without a second copy on - // the structured-envelope decode path. - Payload: payloadCopy, - MimeType: defaultEnvelopePayloadMIME, - } - if env.TimestampMs > 0 { - msg.Binary.Timestamp = time.Duration(env.TimestampMs) * time.Millisecond - } - } - if len(env.Inputs) > 0 { - msg.Inputs = copyBytes(env.Inputs) - } - if len(env.Transports) > 0 { - msg.Transports = make([]engram.TransportDescriptor, len(env.Transports)) - for i := range env.Transports { - src := env.Transports[i] - msg.Transports[i] = engram.TransportDescriptor{ - Name: src.Name, - Kind: src.Kind, - Mode: src.Mode, - Config: cloneConfigMap(src.Config), - } - } - } -} - -func cloneStringMap(src map[string]string) map[string]string { - if len(src) == 0 { - return nil - } - dst := make(map[string]string, len(src)) - maps.Copy(dst, src) - return dst -} - -func cloneConfigMap(src map[string]any) map[string]any { - if src == nil { - return nil - } - dst := make(map[string]any, len(src)) - for k, v := range src { - dst[k] = cloneConfigValue(v) - } - return dst -} - -func cloneConfigValue(v any) any { - if v == nil { - return nil - } - return cloneConfigReflectValue(reflect.ValueOf(v)).Interface() -} - -func cloneConfigReflectValue(value reflect.Value) reflect.Value { - if !value.IsValid() { - return value - } - switch value.Kind() { - case reflect.Interface: - if value.IsNil() { - return reflect.Zero(value.Type()) - } - cloned := cloneConfigReflectValue(value.Elem()) - out := reflect.New(value.Type()).Elem() - out.Set(cloned) - return out - case reflect.Pointer: - if value.IsNil() { - return reflect.Zero(value.Type()) - } - out := reflect.New(value.Type().Elem()) - out.Elem().Set(cloneConfigReflectValue(value.Elem())) - return out - case reflect.Map: - if value.IsNil() { - return reflect.Zero(value.Type()) - } - out := reflect.MakeMapWithSize(value.Type(), value.Len()) - iter := value.MapRange() - for iter.Next() { - out.SetMapIndex(iter.Key(), cloneConfigReflectValue(iter.Value())) - } - return out - case reflect.Slice: - if value.IsNil() { - return reflect.Zero(value.Type()) - } - out := reflect.MakeSlice(value.Type(), value.Len(), value.Len()) - for i := 0; i < value.Len(); i++ { - out.Index(i).Set(cloneConfigReflectValue(value.Index(i))) - } - return out - case reflect.Array: - out := reflect.New(value.Type()).Elem() - for i := 0; i < value.Len(); i++ { - out.Index(i).Set(cloneConfigReflectValue(value.Index(i))) - } - return out - default: - return value - } -} - -func copyBytes(src []byte) []byte { - if len(src) == 0 { - return nil - } - dst := make([]byte, len(src)) - copy(dst, src) - return dst -} diff --git a/transport_helpers.go b/transport_helpers.go deleted file mode 100644 index 49c6fbf..0000000 --- a/transport_helpers.go +++ /dev/null @@ -1,53 +0,0 @@ -package sdk - -import ( - "encoding/json" - "strings" - - "github.com/bubustack/bubu-sdk-go/engram" -) - -func cloneTransportDescriptors(src []engram.TransportDescriptor) []engram.TransportDescriptor { - if len(src) == 0 { - return nil - } - out := make([]engram.TransportDescriptor, len(src)) - for i := range src { - out[i] = src[i].Clone() - } - return out -} - -func storyMetadata(info engram.StoryInfo) map[string]string { - meta := make(map[string]string, 5) - if v := strings.TrimSpace(info.StoryName); v != "" { - meta["storyName"] = v - } - if v := strings.TrimSpace(info.StoryRunID); v != "" { - meta["storyRunID"] = v - } - if v := strings.TrimSpace(info.StepName); v != "" { - meta["stepName"] = v - } - if v := strings.TrimSpace(info.StepRunID); v != "" { - meta["stepRunID"] = v - } - if v := strings.TrimSpace(info.StepRunNamespace); v != "" { - meta["stepRunNamespace"] = v - } - if len(meta) == 0 { - return nil - } - return meta -} - -func inputsJSON(inputs map[string]any) ([]byte, error) { - if len(inputs) == 0 { - return nil, nil - } - payload, err := json.Marshal(inputs) - if err != nil { - return nil, err - } - return payload, nil -}