From 3568d114747f24bae3ddded8d321cd7f7da8787e Mon Sep 17 00:00:00 2001 From: Julio Jimenez Date: Sat, 25 Oct 2025 17:15:07 -0500 Subject: [PATCH 1/4] chore(feature/go): Init (#51) * chore(feature/go): Init Signed-off-by: Julio Jimenez * go Signed-off-by: Julio Jimenez * Dockerfile Signed-off-by: Julio Jimenez * .golangci Signed-off-by: Julio Jimenez * validation not defined Signed-off-by: Julio Jimenez * regexp and strings not used Signed-off-by: Julio Jimenez * io undefined Signed-off-by: Julio Jimenez * fix: pre-commit Signed-off-by: Julio Jimenez * fix: pre-commit Signed-off-by: Julio Jimenez * fix: pre-commit Signed-off-by: Julio Jimenez * fix: pre-commit Signed-off-by: Julio Jimenez * fix: pre-commit Signed-off-by: Julio Jimenez * fix: integration test Signed-off-by: Julio Jimenez * fix: integration test Signed-off-by: Julio Jimenez * fix: integration test Signed-off-by: Julio Jimenez * fix: integration test Signed-off-by: Julio Jimenez * fix: integration test Signed-off-by: Julio Jimenez * fix: integration test Signed-off-by: Julio Jimenez * fix: integration test Signed-off-by: Julio Jimenez * fix: lint Signed-off-by: Julio Jimenez * fix: docker build Signed-off-by: Julio Jimenez * test: e2e Signed-off-by: Julio Jimenez * test: fix benchmark Signed-off-by: Julio Jimenez * test: fix benchmark Signed-off-by: Julio Jimenez * test: fix security check Signed-off-by: Julio Jimenez --------- Signed-off-by: Julio Jimenez --- .github/dependabot.yml | 4 + .github/workflows/docker-security.yml | 26 +- .github/workflows/tests.yml | 297 ++- .gitignore | 2 +- .golangci.yml | 27 + .pre-commit-config.yaml | 99 + Dockerfile | 115 +- README.md | 16 +- cmd/clickbom/main.go | 176 ++ entrypoint.sh | 578 ---- go.mod | 27 + go.sum | 36 + internal/config/config.go | 315 +++ internal/config/config_test.go | 128 + internal/sbom/filter.go | 94 + internal/sbom/github.go | 169 ++ internal/sbom/mend.go | 382 +++ internal/sbom/processing.go | 137 + internal/sbom/processing_integration_test.go | 117 + internal/sbom/processing_test.go | 32 + internal/sbom/wiz.go | 219 ++ internal/storage/clickhouse.go | 325 +++ .../storage/clickhouse_integration_test.go | 104 + internal/storage/e2e_test.go | 141 + internal/storage/s3.go | 136 + internal/storage/s3_integration_test.go | 111 + internal/storage/s3_test.go | 66 + internal/validation/sanitize.go | 205 ++ internal/validation/sanitize_test.go | 158 ++ lib/common.sh | 41 - lib/github.sh | 136 - lib/mend.sh | 392 --- lib/sanitize.sh | 464 ---- lib/sbom-merging.sh | 533 ---- lib/sbom-processing.sh | 152 -- lib/validation.sh | 71 - lib/wiz.sh | 326 --- pkg/logger/logger.go | 64 + run-tests.sh | 311 --- setup-bats.sh | 112 - test/advanced.bats | 2326 ----------------- test/simple.bats | 832 ------ 42 files changed, 3638 insertions(+), 6364 deletions(-) create mode 100644 .golangci.yml create mode 100644 .pre-commit-config.yaml create mode 100644 cmd/clickbom/main.go delete mode 100755 entrypoint.sh create mode 100644 go.mod create mode 100644 go.sum create mode 100644 internal/config/config.go create mode 100644 internal/config/config_test.go create mode 100644 internal/sbom/filter.go create mode 100644 internal/sbom/github.go create mode 100644 internal/sbom/mend.go create mode 100644 internal/sbom/processing.go create mode 100644 internal/sbom/processing_integration_test.go create mode 100644 internal/sbom/processing_test.go create mode 100644 internal/sbom/wiz.go create mode 100644 internal/storage/clickhouse.go create mode 100644 internal/storage/clickhouse_integration_test.go create mode 100644 internal/storage/e2e_test.go create mode 100644 internal/storage/s3.go create mode 100644 internal/storage/s3_integration_test.go create mode 100644 internal/storage/s3_test.go create mode 100644 internal/validation/sanitize.go create mode 100644 internal/validation/sanitize_test.go delete mode 100644 lib/common.sh delete mode 100644 lib/github.sh delete mode 100644 lib/mend.sh delete mode 100644 lib/sanitize.sh delete mode 100644 lib/sbom-merging.sh delete mode 100644 lib/sbom-processing.sh delete mode 100644 lib/validation.sh delete mode 100644 lib/wiz.sh create mode 100644 pkg/logger/logger.go delete mode 100755 run-tests.sh delete mode 100755 setup-bats.sh delete mode 100644 test/advanced.bats delete mode 100644 test/simple.bats diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 672fb5e..a9545e4 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -8,3 +8,7 @@ updates: directory: "/" schedule: interval: "weekly" + - package-ecosystem: "gomod" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/workflows/docker-security.yml b/.github/workflows/docker-security.yml index d197404..643ef1d 100644 --- a/.github/workflows/docker-security.yml +++ b/.github/workflows/docker-security.yml @@ -11,7 +11,7 @@ jobs: docker_security_scan: name: ๐Ÿ” Container Security Scan runs-on: ubuntu-latest - + permissions: contents: read security-events: write @@ -53,7 +53,7 @@ jobs: echo "# ๐Ÿณ Container Security Report" > security-report.md echo "Generated on: $(date)" >> security-report.md echo "" >> security-report.md - + # Trivy Results Summary echo "## ๐Ÿ›ก๏ธ Trivy Scan Results" >> security-report.md if [ -f "trivy-results.json" ]; then @@ -61,7 +61,7 @@ jobs: HIGH=$(jq '[.Results[]?.Vulnerabilities[]? | select(.Severity == "HIGH")] | length' trivy-results.json 2>/dev/null || echo "0") MEDIUM=$(jq '[.Results[]?.Vulnerabilities[]? | select(.Severity == "MEDIUM")] | length' trivy-results.json 2>/dev/null || echo "0") LOW=$(jq '[.Results[]?.Vulnerabilities[]? | select(.Severity == "LOW")] | length' trivy-results.json 2>/dev/null || echo "0") - + echo "- ๐Ÿ”ด Critical: $CRITICAL" >> security-report.md echo "- ๐ŸŸ  High: $HIGH" >> security-report.md echo "- ๐ŸŸก Medium: $MEDIUM" >> security-report.md @@ -69,7 +69,7 @@ jobs: else echo "- No Trivy results found" >> security-report.md fi - + echo "" >> security-report.md echo "## ๐Ÿ“‹ Recommendations" >> security-report.md echo "1. Review critical and high severity vulnerabilities" >> security-report.md @@ -93,7 +93,7 @@ jobs: if [ -f "trivy-results.json" ]; then CRITICAL=$(jq '[.Results[]?.Vulnerabilities[]? | select(.Severity == "CRITICAL")] | length' trivy-results.json 2>/dev/null || echo "0") echo "Critical vulnerabilities found: $CRITICAL" - + if [ "$CRITICAL" -gt 0 ]; then echo "::error::Found $CRITICAL critical vulnerabilities in the container image" echo "::error::Please review and fix critical vulnerabilities before deploying" @@ -110,7 +110,7 @@ jobs: name: ๐Ÿ‹ Dockerfile Security Scan runs-on: ubuntu-latest - + steps: - name: ๐Ÿงพ Checkout uses: actions/checkout@v5 @@ -164,7 +164,7 @@ jobs: name: ๐Ÿ“‹ Generate Container SBOM runs-on: ubuntu-latest needs: docker_security_scan - + steps: - name: ๐Ÿงพ Checkout uses: actions/checkout@v5 @@ -184,7 +184,7 @@ jobs: run: | # Install Docker Scout CLI curl -sSfL https://raw.githubusercontent.com/docker/scout-cli/main/install.sh | sh -s -- - + # Generate SBOM docker scout sbom clickbom:latest --format spdx --output container-sbom-scout.spdx.json || echo "Docker Scout SBOM generation failed" @@ -202,7 +202,7 @@ jobs: runs-on: ubuntu-latest needs: [docker_security_scan, dockerfile_security_scan, container_sbom] if: always() - + steps: - name: ๐Ÿ“ฅ Download Security Artifacts uses: actions/download-artifact@v5 @@ -221,14 +221,14 @@ jobs: echo "# ๐Ÿ”’ ClickBOM Container Security Summary" >> $GITHUB_STEP_SUMMARY echo "**Scan Date:** $(date)" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY - + if [ -f "security-results/trivy-results.json" ]; then echo "## ๐Ÿ›ก๏ธ Vulnerability Scan Results" >> $GITHUB_STEP_SUMMARY CRITICAL=$(jq '[.Results[]?.Vulnerabilities[]? | select(.Severity == "CRITICAL")] | length' security-results/trivy-results.json 2>/dev/null || echo "0") HIGH=$(jq '[.Results[]?.Vulnerabilities[]? | select(.Severity == "HIGH")] | length' security-results/trivy-results.json 2>/dev/null || echo "0") MEDIUM=$(jq '[.Results[]?.Vulnerabilities[]? | select(.Severity == "MEDIUM")] | length' security-results/trivy-results.json 2>/dev/null || echo "0") LOW=$(jq '[.Results[]?.Vulnerabilities[]? | select(.Severity == "LOW")] | length' security-results/trivy-results.json 2>/dev/null || echo "0") - + echo "| Severity | Count |" >> $GITHUB_STEP_SUMMARY echo "|----------|-------|" >> $GITHUB_STEP_SUMMARY echo "| ๐Ÿ”ด Critical | $CRITICAL |" >> $GITHUB_STEP_SUMMARY @@ -236,14 +236,14 @@ jobs: echo "| ๐ŸŸก Medium | $MEDIUM |" >> $GITHUB_STEP_SUMMARY echo "| ๐ŸŸข Low | $LOW |" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY - + if [ "$CRITICAL" -gt 0 ] || [ "$HIGH" -gt 0 ]; then echo "โš ๏ธ **Action Required:** Critical or High severity vulnerabilities found!" >> $GITHUB_STEP_SUMMARY else echo "โœ… **Good News:** No critical or high severity vulnerabilities found!" >> $GITHUB_STEP_SUMMARY fi fi - + echo "" >> $GITHUB_STEP_SUMMARY echo "## ๐Ÿ“‹ Artifacts Generated" >> $GITHUB_STEP_SUMMARY echo "- Container vulnerability scan results (SARIF format)" >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8562cd4..3a20194 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,23 +1,298 @@ name: ๐Ÿ’ฃ ClickBOM Tests on: [push] - +permissions: + security-events: write + contents: write jobs: - test_clickbom_github: - name: ๐Ÿ’ฃ ClickBOM Tests + # Unit tests + test_unit: + name: ๐Ÿงช Unit Tests + runs-on: ubuntu-latest + + steps: + - name: ๐Ÿงพ Checkout + uses: actions/checkout@v5 + + - name: ๐Ÿ”ง Setup Go + uses: actions/setup-go@v5 + with: + go-version: '1.25' + cache: true + + - name: ๐Ÿ“ฆ Download dependencies + run: go mod download + + - name: ๐Ÿงช Run unit tests + run: go test -v -race -coverprofile=coverage.out -covermode=atomic ./... + + - name: ๐Ÿ“Š Upload coverage to Codecov + uses: codecov/codecov-action@v4 + with: + files: ./coverage.out + flags: unittests + name: codecov-umbrella + token: ${{ secrets.CODECOV_TOKEN }} + + # Integration tests + test_integration: + name: ๐Ÿ”— Integration Tests + runs-on: ubuntu-latest + + services: + # Mock S3 using LocalStack + localstack: + image: localstack/localstack:latest + env: + SERVICES: s3 + DEFAULT_REGION: us-east-1 + ports: + - 4566:4566 + options: >- + --health-cmd "awslocal s3 ls" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + + # Mock ClickHouse + clickhouse: + image: clickhouse/clickhouse-server:latest + ports: + - 8123:8123 + options: >- + --health-cmd "wget --spider -q localhost:8123/ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + + steps: + - name: ๐Ÿงพ Checkout + uses: actions/checkout@v5 + + - name: ๐Ÿ”ง Setup Go + uses: actions/setup-go@v5 + with: + go-version: '1.25' + cache: true + + - name: ๐Ÿ“ฆ Install CycloneDX CLI + run: | + wget -O /tmp/cyclonedx "https://github.com/CycloneDX/cyclonedx-cli/releases/download/v0.27.2/cyclonedx-linux-x64" + sudo mv /tmp/cyclonedx /usr/local/bin/cyclonedx + sudo chmod +x /usr/local/bin/cyclonedx + + - name: โš™๏ธ Setup LocalStack S3 + run: | + aws --endpoint-url=http://localhost:4566 s3 mb s3://test-bucket + env: + AWS_ACCESS_KEY_ID: test + AWS_SECRET_ACCESS_KEY: test + AWS_DEFAULT_REGION: us-east-1 + + # - name: ๐Ÿงช Run integration tests + # run: go test -v -tags=integration ./test/integration/... + # env: + # AWS_ENDPOINT_URL: http://localhost:4566 + # CLICKHOUSE_URL: http://localhost:8123 + # AWS_ACCESS_KEY_ID: test + # AWS_SECRET_ACCESS_KEY: test + # AWS_DEFAULT_REGION: us-east-1 + + # Lint and format checks + test_lint: + name: ๐Ÿ” Lint & Format + runs-on: ubuntu-latest + + steps: + - name: ๐Ÿงพ Checkout + uses: actions/checkout@v5 + + - name: ๐Ÿ”ง Setup Go + uses: actions/setup-go@v5 + with: + go-version: '1.25' + cache: true + + - name: ๐Ÿ” Run golangci-lint + uses: golangci/golangci-lint-action@v8 + with: + version: latest + args: --timeout=5m + + - name: ๐Ÿ“ Check formatting + run: | + if [ "$(gofmt -s -l . | wc -l)" -gt 0 ]; then + echo "Code is not formatted. Run 'gofmt -s -w .'" + gofmt -s -l . + exit 1 + fi + + - name: ๐Ÿ”’ Run gosec security scanner + uses: securego/gosec@master + with: + args: '-severity high -confidence high -no-fail -fmt sarif -out results.sarif ./...' + + - name: ๐Ÿ“ค Upload SARIF file + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: results.sarif + + # Build tests + test_build: + name: ๐Ÿ—๏ธ Build Tests + runs-on: ubuntu-latest + strategy: + matrix: + goos: [linux, darwin, windows] + goarch: [amd64, arm64] + exclude: + - goos: windows + goarch: arm64 + + steps: + - name: ๐Ÿงพ Checkout + uses: actions/checkout@v5 + + - name: ๐Ÿ”ง Setup Go + uses: actions/setup-go@v5 + with: + go-version: '1.21' + cache: true + + - name: ๐Ÿ—๏ธ Build for ${{ matrix.goos }}/${{ matrix.goarch }} + run: | + GOOS=${{ matrix.goos }} GOARCH=${{ matrix.goarch }} \ + go build -v -o clickbom-${{ matrix.goos }}-${{ matrix.goarch }} \ + ./cmd/clickbom + + - name: ๐Ÿ“ค Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: clickbom-${{ matrix.goos }}-${{ matrix.goarch }} + path: clickbom-${{ matrix.goos }}-${{ matrix.goarch }}* + + # Docker build test + test_docker: + name: ๐Ÿณ Docker Build + runs-on: ubuntu-latest + + steps: + - name: ๐Ÿงพ Checkout + uses: actions/checkout@v5 + + - name: ๐Ÿ”ง Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: ๐Ÿ—๏ธ Build Docker image + uses: docker/build-push-action@v5 + with: + context: . + push: false + tags: clickbom:test + cache-from: type=gha + cache-to: type=gha,mode=max + build-args: | + VERSION=test + BUILD_DATE=${{ github.event.head_commit.timestamp }} + VCS_REF=${{ github.sha }} + + - name: ๐Ÿงช Test Docker image + run: | + docker run --rm clickbom:test --version || true + docker run --rm clickbom:test --help || true + + # End-to-end tests with real GitHub API + test_e2e: + name: ๐ŸŽฏ E2E Tests + runs-on: ubuntu-latest + if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/feature/*') + + steps: + - name: ๐Ÿงพ Checkout + uses: actions/checkout@v5 + + - name: ๐Ÿ”ง Setup Go + uses: actions/setup-go@v5 + with: + go-version: '1.21' + cache: true + + - name: ๐Ÿ—๏ธ Build + run: go build -v -o clickbom ./cmd/clickbom + + - name: ๐Ÿ“ฆ Install dependencies + run: | + wget -O /tmp/cyclonedx "https://github.com/CycloneDX/cyclonedx-cli/releases/download/v0.27.2/cyclonedx-linux-x64" + sudo mv /tmp/cyclonedx /usr/local/bin/cyclonedx + sudo chmod +x /usr/local/bin/cyclonedx + + - name: ๐Ÿงช Run E2E test with GitHub + run: ./clickbom + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPOSITORY: ${{ github.repository }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + S3_BUCKET: ${{ secrets.TEST_S3_BUCKET }} + S3_KEY: test-e2e-${{ github.sha }}.json + SBOM_SOURCE: github + SBOM_FORMAT: cyclonedx + + # Benchmarks + benchmark: + name: โšก Benchmarks + runs-on: ubuntu-latest + if: github.event_name == 'push' + + steps: + - name: ๐Ÿงพ Checkout + uses: actions/checkout@v5 + + - name: ๐Ÿ”ง Setup Go + uses: actions/setup-go@v5 + with: + go-version: '1.21' + cache: true + + - name: โšก Run benchmarks + run: go test -bench=. -benchmem -run=^$ ./... | tee benchmark.txt + + - name: ๐Ÿ“Š Store benchmark result + uses: benchmark-action/github-action-benchmark@v1 + with: + tool: 'go' + output-file-path: benchmark.txt + github-token: ${{ secrets.GITHUB_TOKEN }} + auto-push: true + + # Dependency vulnerability scan + test_security: + name: ๐Ÿ”’ Security Scan runs-on: ubuntu-latest steps: - name: ๐Ÿงพ Checkout uses: actions/checkout@v5 - - name: โš™๏ธ Setup BATS - run: ./setup-bats.sh + - name: ๐Ÿ”ง Setup Go + uses: actions/setup-go@v5 + with: + go-version: '1.25' + cache: true - - name: ๐Ÿ“‹ Check Tests - run: ./run-tests.sh --setup + - name: ๐Ÿ” Run Trivy vulnerability scanner + uses: aquasecurity/trivy-action@master + with: + scan-type: 'fs' + scan-ref: '.' + format: 'sarif' + output: 'trivy-results.sarif' - - name: ๐Ÿงช Run Simple Tests - run: ./run-tests.sh --simple + - name: ๐Ÿ“ค Upload Trivy results to GitHub Security + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: 'trivy-results.sarif' - - name: ๐Ÿงช Run Advanced Tests - run: ./run-tests.sh --advanced \ No newline at end of file + - name: ๐Ÿ” Run govulncheck + run: | + go install golang.org/x/vuln/cmd/govulncheck@latest + govulncheck ./... diff --git a/.gitignore b/.gitignore index 496ee2c..e43b0f9 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -.DS_Store \ No newline at end of file +.DS_Store diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 0000000..bab89ec --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,27 @@ +version: "2" +linters: + enable: + - errcheck + - govet + - ineffassign + - staticcheck + - unused + - misspell + - unconvert + - unparam + - goconst + - gocyclo + - gosec + - revive + settings: + gosec: + severity: high + confidence: high + +run: + timeout: 5m + tests: true + +issues: + max-issues-per-linter: 0 + max-same-issues: 0 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..a338da5 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,99 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: + # General file checks + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: trailing-whitespace + args: [--markdown-linebreak-ext=md] + - id: end-of-file-fixer + - id: check-yaml + args: [--unsafe] # Allow custom YAML tags + - id: check-json + - id: check-toml + - id: check-added-large-files + args: [--maxkb=1000] + - id: check-case-conflict + - id: check-merge-conflict + - id: detect-private-key + - id: mixed-line-ending + - id: no-commit-to-branch + args: [--branch, main, --branch, master] + + # Go formatting + - repo: https://github.com/dnephin/pre-commit-golang + rev: v0.5.1 + hooks: + - id: go-fmt + name: Format Go code with gofmt + description: Runs `gofmt -s -w` on all Go files + + - id: go-imports + name: Format Go imports with goimports + description: Runs `goimports -w` on all Go files + args: [-local, github.com/ClickHouse/ClickBOM] + + - id: go-unit-tests + name: Go unit tests + description: Runs `go test` on all packages + args: [-short, -race] + + - id: go-build + name: Go build + description: Runs `go build` to ensure code compiles + + - id: go-mod-tidy + name: Tidy Go modules + description: Runs `go mod tidy` to clean up go.mod and go.sum + + # Additional Go checks + - repo: local + hooks: + - id: go-no-replacement + name: Check for replace directives in go.mod + entry: "bash -c 'if grep -q \"^replace \" go.mod; then echo \"Error: go.mod contains replace directives\"; exit 1; fi'" + language: system + files: go\.mod$ + + - id: go-critic + name: Go critic + entry: gocritic check ./... + language: system + pass_filenames: false + files: \.go$ + + - id: go-cyclo + name: Check cyclomatic complexity + entry: gocyclo -over 25 . + language: system + pass_filenames: false + files: \.go$ + + - id: golangci-lint + name: golangci-lint + entry: golangci-lint run --fix + language: system + pass_filenames: false + files: \.go$ + + # YAML linting + - repo: https://github.com/adrienverge/yamllint + rev: v1.37.1 + hooks: + - id: yamllint + args: [-d, relaxed] + + # Dockerfile linting + - repo: https://github.com/hadolint/hadolint + rev: v2.14.0 + hooks: + - id: hadolint-docker + args: [--ignore, DL3018] # Ignore pinning versions in apk add + + # Commit message linting + - repo: https://github.com/compilerla/conventional-pre-commit + rev: v4.3.0 + hooks: + - id: conventional-pre-commit + stages: [commit-msg] diff --git a/Dockerfile b/Dockerfile index 6ee0af7..19bfc09 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,74 +1,75 @@ -FROM ubuntu:24.04 +# hadolint global ignore=DL3047,DL4001 +# Multi-stage build for Go application +FROM golang:1.25.3-alpine3.22 AS builder + +# Ensure base packages are up-to-date to pick up security fixes before installing build deps +RUN apk update && apk upgrade --available --no-cache -# Add metadata labels for better container management LABEL maintainer="ClickHouse Security Team" \ description="ClickBOM - SBOM Management Tool" \ - version="1.0.0" \ - security.scan="enabled" + version="2.0.0" -# Avoid interactive prompts during package installation -ENV DEBIAN_FRONTEND=noninteractive +# Install build dependencies +RUN apk add --no-cache \ + git \ + ca-certificates \ + tzdata -# Create a non-root user early in the build process -RUN groupadd -r clickbom && useradd -r -g clickbom -s /bin/false clickbom +WORKDIR /build -# Install required packages -RUN apt-get update && apt-get install -y \ - curl \ - jq \ - python3 \ - python3-pip \ - unzip \ - wget \ - ca-certificates \ - libicu74 \ - vim-common \ - file \ - && rm -rf /var/lib/apt/lists/* \ - && apt-get autoremove -y \ - && apt-get autoclean +# Copy go mod files +COPY go.mod go.sum ./ +RUN go mod download -# Install AWS CLI -RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" \ - && unzip awscliv2.zip \ - && ./aws/install \ - && rm -rf awscliv2.zip aws/ +# Copy source code +COPY . . -# Install CycloneDX CLI (prebuilt binary) -RUN wget -O /usr/local/bin/cyclonedx "https://github.com/CycloneDX/cyclonedx-cli/releases/download/v0.27.2/cyclonedx-linux-x64" \ - && chmod +x /usr/local/bin/cyclonedx +# Build static binary +RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build \ + -ldflags='-w -s -extldflags "-static"' \ + -a \ + -o clickbom \ + ./cmd/clickbom -# Create necessary directories with proper permissions -RUN mkdir -p /app /app/temp && \ - chown -R clickbom:clickbom /app +# External tools stage +FROM alpine:3.19 AS tools -# Set working directory -WORKDIR /app +# Install AWS CLI +RUN apk add --no-cache curl unzip && \ + curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \ + unzip awscliv2.zip && \ + ./aws/install -# Copy application files with proper ownership -COPY --chown=clickbom:clickbom entrypoint.sh /app/entrypoint.sh -COPY --chown=clickbom:clickbom lib/ /app/lib/ -COPY --chown=clickbom:clickbom license-mappings.json /app/license-mappings.json +# Install CycloneDX CLI +RUN wget -O /cyclonedx "https://github.com/CycloneDX/cyclonedx-cli/releases/download/v0.27.2/cyclonedx-linux-x64" && \ + chmod +x /cyclonedx -# Make entrypoint executable -RUN chmod +x /app/entrypoint.sh +# Runtime stage - Distroless +FROM gcr.io/distroless/static-debian12:nonroot -# Switch to non-root user -USER clickbom +LABEL maintainer="ClickHouse Security Team" \ + description="ClickBOM - SBOM Management Tool" \ + version="2.0.0" \ + security.scan="enabled" -# Set secure environment variables -ENV PATH="/usr/local/bin:$PATH" \ - TEMP_DIR="/app/temp" \ - PYTHONDONTWRITEBYTECODE=1 \ - PYTHONUNBUFFERED=1 +# Copy from tools stage +COPY --from=tools /usr/local/aws-cli /usr/local/aws-cli +COPY --from=tools /usr/local/bin/aws /usr/local/bin/aws +COPY --from=tools /cyclonedx /usr/local/bin/cyclonedx + +# Copy the binary from builder +COPY --from=builder /build/clickbom /app/clickbom -# Health check to ensure the container is working properly -HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ - CMD ps aux | grep -v grep | grep -q entrypoint || exit 1 +# Copy license mappings +COPY license-mappings.json /app/license-mappings.json -# Use absolute path for entrypoint -ENTRYPOINT ["/app/entrypoint.sh"] +# Set working directory +WORKDIR /app + +# distroless runs as nonroot user by default (UID 65532) +# Set environment +ENV PATH="/usr/local/bin:$PATH" \ + TEMP_DIR="/tmp" -# Add security scanning metadata -LABEL security.trivy.enabled="true" \ - security.dockerfile.hadolint="true" +# Run the application +ENTRYPOINT ["/app/clickbom"] diff --git a/README.md b/README.md index dba816c..ab37dc9 100644 --- a/README.md +++ b/README.md @@ -115,7 +115,7 @@ on: push: branches: - main - + jobs: clickbom: name: ClickBOM @@ -158,7 +158,7 @@ on: push: branches: - main - + jobs: clickbom: name: ClickBOM @@ -205,7 +205,7 @@ on: push: branches: - main - + jobs: clickbom: name: ClickBOM @@ -260,7 +260,7 @@ on: push: branches: - main - + jobs: clickbom: strategy: @@ -324,7 +324,7 @@ on: push: branches: - main - + jobs: clickbom: strategy: @@ -376,7 +376,7 @@ jobs: clickhouse-database: ${{ secrets.CLICKHOUSE_DATABASE }} clickhouse-username: ${{ secrets.CLICKHOUSE_USERNAME }} clickhouse-password: ${{ secrets.CLICKHOUSE_PASSWORD }} - + clickbom_merge: needs: clickbom name: ClickBOM Merge @@ -430,7 +430,7 @@ on: push: branches: - main - + jobs: clickbom_merge: name: ClickBOM Merge with Filters @@ -492,7 +492,7 @@ on: push: branches: - main - + jobs: clickbom: name: ClickBOM diff --git a/cmd/clickbom/main.go b/cmd/clickbom/main.go new file mode 100644 index 0000000..c08aed8 --- /dev/null +++ b/cmd/clickbom/main.go @@ -0,0 +1,176 @@ +// Package main implements the ClickBOM GitHub Action for SBOM processing. +package main + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/ClickHouse/ClickBOM/internal/config" + "github.com/ClickHouse/ClickBOM/internal/sbom" + "github.com/ClickHouse/ClickBOM/internal/storage" + "github.com/ClickHouse/ClickBOM/pkg/logger" +) + +func main() { + if err := run(); err != nil { + logger.Fatal("Application error: %v", err) + } +} + +func run() error { + logger.Info("Starting ClickBOM GitHub Action for SBOM processing") + + // Load and validate configuration + cfg, err := config.LoadConfig() + if err != nil { + return fmt.Errorf("configuration error: %w", err) + } + + logger.SetDebug(cfg.Debug) + + ctx := context.Background() + + // Create temp directory + tempDir, err := os.MkdirTemp("", "clickbom-*") + if err != nil { + return fmt.Errorf("failed to create temp directory: %w", err) + } + defer func() { + if err := os.RemoveAll(tempDir); err != nil { + logger.Warning("Failed to remove temp directory: %v", err) + } + }() + + // Initialize S3 client + s3Client, err := storage.NewS3Client(ctx, cfg.AWSAccessKeyID, cfg.AWSSecretAccessKey, cfg.AWSRegion) + if err != nil { + return fmt.Errorf("failed to create S3 client: %w", err) + } + + if cfg.Merge { + return handleMergeMode(ctx, cfg, s3Client, tempDir) + } + + return handleNormalMode(ctx, cfg, s3Client, tempDir) +} + +func handleNormalMode(ctx context.Context, cfg *config.Config, s3Client *storage.S3Client, tempDir string) error { + logger.Info("Running in NORMAL mode - processing SBOM from %s", cfg.SBOMSource) + + originalSBOM := filepath.Join(tempDir, "original_sbom.json") + extractedSBOM := filepath.Join(tempDir, "extracted_sbom.json") + processedSBOM := filepath.Join(tempDir, "processed_sbom.json") + + // Download SBOM based on source + switch cfg.SBOMSource { + case "github": + logger.Info("Downloading SBOM from GitHub") + ghClient := sbom.NewGitHubClient(cfg.GitHubToken) + if err := ghClient.DownloadSBOM(ctx, cfg.Repository, originalSBOM); err != nil { + return fmt.Errorf("failed to download GitHub SBOM: %w", err) + } + + case "mend": + logger.Info("Downloading SBOM from Mend") + mendClient := sbom.NewMendClient(cfg) + if err := mendClient.RequestSBOMExport(ctx, originalSBOM); err != nil { + return fmt.Errorf("failed to download Mend SBOM: %w", err) + } + + case "wiz": + logger.Info("Downloading SBOM from Wiz") + wizClient := sbom.NewWizClient(cfg) + if err := wizClient.DownloadReport(ctx, originalSBOM); err != nil { + return fmt.Errorf("failed to download Wiz SBOM: %w", err) + } + + default: + return fmt.Errorf("unsupported SBOM source: %s", cfg.SBOMSource) + } + + // Extract from wrapper if needed + if err := sbom.ExtractSBOMFromWrapper(originalSBOM, extractedSBOM); err != nil { + return fmt.Errorf("failed to extract SBOM: %w", err) + } + + // Detect format + detectedFormat, err := sbom.DetectSBOMFormat(extractedSBOM) + if err != nil { + return fmt.Errorf("failed to detect SBOM format: %w", err) + } + logger.Info("Detected SBOM format: %s", detectedFormat) + + // Convert to desired format + targetFormat := sbom.Format(cfg.SBOMFormat) + if err := sbom.ConvertSBOM(extractedSBOM, processedSBOM, detectedFormat, targetFormat); err != nil { + return fmt.Errorf("failed to convert SBOM: %w", err) + } + + // Upload to S3 + if err := s3Client.Upload(ctx, processedSBOM, cfg.S3Bucket, cfg.S3Key, cfg.SBOMFormat); err != nil { + return fmt.Errorf("failed to upload to S3: %w", err) + } + + logger.Success("SBOM processing completed successfully!") + logger.Info("SBOM available at: s3://%s/%s", cfg.S3Bucket, cfg.S3Key) + + // ClickHouse operations + if cfg.ClickHouseURL != "" { + if err := handleClickHouse(ctx, cfg, processedSBOM); err != nil { + return fmt.Errorf("ClickHouse error: %w", err) + } + } + + return nil +} + +func handleMergeMode(_ context.Context, _ *config.Config, _ *storage.S3Client, _ string) error { + logger.Info("Running in MERGE mode - merging all CycloneDX SBOMs from S3") + + // Implementation for merge mode... + // This would involve downloading all SBOMs from S3, merging them, and uploading + + return nil +} + +func handleClickHouse(ctx context.Context, cfg *config.Config, sbomFile string) error { + logger.Info("Starting ClickHouse operations") + + chClient, err := storage.NewClickHouseClient(cfg) + if err != nil { + return err + } + + tableName := generateTableName(cfg) + + if err := chClient.SetupTable(ctx, tableName); err != nil { + return fmt.Errorf("failed to setup table: %w", err) + } + + if err := chClient.InsertSBOMData(ctx, sbomFile, tableName, cfg.SBOMFormat); err != nil { + return fmt.Errorf("failed to insert data: %w", err) + } + + logger.Success("ClickHouse operations completed successfully!") + return nil +} + +func generateTableName(cfg *config.Config) string { + switch cfg.SBOMSource { + case "github": + return strings.ReplaceAll(strings.ToLower(cfg.Repository), "/", "_") + case "mend": + uuid := cfg.MendProjectUUID + if uuid == "" { + uuid = cfg.MendProductUUID + } + return fmt.Sprintf("mend_%s", strings.ReplaceAll(uuid, "-", "_")) + case "wiz": + return fmt.Sprintf("wiz_%s", strings.ReplaceAll(cfg.WizReportID, "-", "_")) + default: + return "sbom_data" + } +} diff --git a/entrypoint.sh b/entrypoint.sh deleted file mode 100755 index 3fe54dc..0000000 --- a/entrypoint.sh +++ /dev/null @@ -1,578 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -# Get the directory where this script is located -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -# Source all library files -source "$SCRIPT_DIR/lib/sanitize.sh" -source "$SCRIPT_DIR/lib/common.sh" -source "$SCRIPT_DIR/lib/validation.sh" -source "$SCRIPT_DIR/lib/github.sh" -source "$SCRIPT_DIR/lib/mend.sh" -source "$SCRIPT_DIR/lib/wiz.sh" -source "$SCRIPT_DIR/lib/sbom-processing.sh" -source "$SCRIPT_DIR/lib/sbom-merging.sh" - -# Upload to S3 -upload_to_s3() { - local local_file="$1" - local s3_bucket="$2" - local s3_key="$3" - local desired_format="${SBOM_FORMAT:-cyclonedx}" - - log_info "Uploading $desired_format SBOM to s3://$s3_bucket/$s3_key" - - if aws s3 cp "$local_file" "s3://$s3_bucket/$s3_key" \ - --content-type "application/json" \ - --metadata "format=$desired_format,source=github-action"; then - log_success "SBOM uploaded successfully to S3" - else - log_error "Failed to upload SBOM to S3" - exit 1 - fi -} - -# Extract source document reference from an SBOM -extract_sbom_source_reference() { - local sbom_file="$1" - local fallback_name="$2" # Fallback name (e.g., filename) - - log_debug "Extracting source reference from $(basename "$sbom_file")" - - # Try multiple strategies to extract the source reference - local source_ref="" - - # Strategy 1: Check for spdx:document:name in properties (GitHub SBOMs) - if source_ref=$(jq -r '.metadata.properties[]? | select(.name == "spdx:document:name") | .value' "$sbom_file" 2>/dev/null); then - if [[ -n "$source_ref" && "$source_ref" != "null" ]]; then - log_debug "Found SPDX document name: $source_ref" - echo "$source_ref" - return 0 - fi - fi - - # Strategy 2: Check metadata.component.name (Wiz/Mend SBOMs) - if source_ref=$(jq -r '.metadata.component.name // empty' "$sbom_file" 2>/dev/null); then - if [[ -n "$source_ref" && "$source_ref" != "null" ]]; then - log_debug "Found component name: $source_ref" - echo "$source_ref" - return 0 - fi - fi - - # Strategy 3: Check metadata.component.bom-ref (Mend SBOMs) - if source_ref=$(jq -r '.metadata.component."bom-ref" // empty' "$sbom_file" 2>/dev/null); then - if [[ -n "$source_ref" && "$source_ref" != "null" ]]; then - log_debug "Found bom-ref: $source_ref" - echo "$source_ref" - return 0 - fi - fi - - # Strategy 4: Check top-level name field - if source_ref=$(jq -r '.name // empty' "$sbom_file" 2>/dev/null); then - if [[ -n "$source_ref" && "$source_ref" != "null" ]]; then - log_debug "Found top-level name: $source_ref" - echo "$source_ref" - return 0 - fi - fi - - # Strategy 5: Check metadata.tools for document name hints - if source_ref=$(jq -r '.metadata.tools[]?.name // empty' "$sbom_file" 2>/dev/null | grep -v "GitHub.com-Dependency\|protobom\|CycloneDX\|cyclonedx-merge" | head -1); then - if [[ -n "$source_ref" && "$source_ref" != "null" ]]; then - log_debug "Found tool name hint: $source_ref" - echo "$source_ref" - return 0 - fi - fi - - # Strategy 6: Use fallback (usually filename without extension) - if [[ -n "$fallback_name" ]]; then - local clean_fallback=$(basename "$fallback_name" .json) - log_debug "Using fallback name: $clean_fallback" - echo "$clean_fallback" - return 0 - fi - - # Final fallback - log_warning "Could not extract source reference, using 'unknown'" - echo "unknown" - return 0 -} - -# Enhanced component collection with source tracking -collect_components_with_source() { - local sbom_file="$1" - local source_ref="$2" - local output_file="$3" - - log_debug "Collecting components from $(basename "$sbom_file") with source: $source_ref" - - # Extract components and add source reference to each - if jq -c --arg source "$source_ref" ' - .components[]? // empty | - . + {"source": $source} - ' "$sbom_file" > "$output_file" 2>/dev/null; then - local component_count=$(wc -l < "$output_file" 2>/dev/null || echo "0") - log_debug "Collected $component_count components with source: $source_ref" - return 0 - else - log_warning "Failed to collect components from $(basename "$sbom_file")" - touch "$output_file" # Create empty file - return 1 - fi -} - -# Check if table needs migration for source column -check_and_migrate_table() { - local table_name="$1" - local clickhouse_url="$2" - local auth_params="$3" - - log_info "Checking if table $table_name needs migration for source column" - - # Check if source column exists - local column_exists - if column_exists=$(curl -s ${auth_params} --data "SELECT COUNT(*) FROM system.columns WHERE database='${CLICKHOUSE_DATABASE}' AND table='${table_name}' AND name='source'" "${clickhouse_url}"); then - if [[ "$column_exists" == "0" ]]; then - log_info "source column not found, migrating table: $table_name" - - # Add source column with default value - local alter_sql="ALTER TABLE ${CLICKHOUSE_DATABASE}.${table_name} ADD COLUMN source LowCardinality(String) DEFAULT 'unknown'" - - if curl -s ${auth_params} --data "$alter_sql" "${clickhouse_url}"; then - log_success "source column added to table $table_name" - return 0 - else - log_error "Failed to add source column to table $table_name" - return 1 - fi - else - log_info "source column already exists in table $table_name" - return 0 - fi - else - log_error "Failed to check column existence for table $table_name" - return 1 - fi -} - -# Set up ClickHouse table -setup_clickhouse_table() { - local table_name="$1" - - log_info "Setting up ClickHouse table: $table_name" - - # Build ClickHouse URL - local clickhouse_url="${CLICKHOUSE_URL}" - local auth_params="" - - # Use basic auth if username and password are provided - if [[ -n "${CLICKHOUSE_USERNAME:-}" ]] && [[ -n "${CLICKHOUSE_PASSWORD:-}" ]]; then - auth_params="-u ${CLICKHOUSE_USERNAME}:${CLICKHOUSE_PASSWORD}" - log_debug "Using basic auth with username: ${CLICKHOUSE_USERNAME}" - elif [[ -n "${CLICKHOUSE_USERNAME:-}" ]]; then - auth_params="-u ${CLICKHOUSE_USERNAME}:" - log_debug "Using basic auth with username only: ${CLICKHOUSE_USERNAME}" - else - log_debug "Using no authentication" - fi - - # Test connection first - log_debug "Testing ClickHouse connection..." - if [[ "${DEBUG:-false}" == "true" ]]; then - if ! curl -s ${auth_params} --data "SELECT 1" "${clickhouse_url}" > /dev/null; then - log_error "ClickHouse connection test failed" - log_error "Please verify your ClickHouse credentials and URL" - return 1 - fi - log_success "ClickHouse connection successful" - fi - - # Check if table exists - local table_exists - if table_exists=$(curl -s ${auth_params} --data "SELECT COUNT(*) FROM system.tables WHERE database='${CLICKHOUSE_DATABASE}' AND name='${table_name}'" "${clickhouse_url}"); then - if [[ "$table_exists" == "1" ]]; then - log_info "Table $table_name already exists" - - # Check and migrate table if needed - if ! check_and_migrate_table "$table_name" "$clickhouse_url" "$auth_params"; then - log_error "Table migration failed" - return 1 - fi - - if [[ "${TRUNCATE_TABLE:-false}" == "true" ]]; then - log_info "Truncating existing table: $table_name" - if curl -s ${auth_params} --data "TRUNCATE TABLE ${CLICKHOUSE_DATABASE}.${table_name}" "${clickhouse_url}"; then - log_success "Table $table_name truncated" - else - log_error "Failed to truncate table $table_name" - return 1 - fi - else - log_info "New data will be appended to existing table: $table_name" - fi - else - log_info "Creating new table: $table_name" - local create_table_sql=" - CREATE TABLE ${CLICKHOUSE_DATABASE}.${table_name} ( - name String, - version String, - license String, - source LowCardinality(String), - inserted_at DateTime DEFAULT now() - ) ENGINE = MergeTree() - ORDER BY (name, version, license); - " - - if curl -s ${auth_params} --data "$create_table_sql" "${clickhouse_url}"; then - log_success "Table $table_name created successfully" - else - log_error "Failed to create table $table_name" - return 1 - fi - fi - else - log_error "Failed to check if table exists" - return 1 - fi - return 0 -} - -map_unknown_licenses() { - local input_file="$1" - local output_file="$2" - - log_info "Mapping unknown licenses using JSON mappings" - - # Convert JSON to TSV temporarily - local mappings_tsv="$temp_dir/mappings.tsv" - jq -r 'to_entries[] | [.key, .value] | @tsv' /app/license-mappings.json > "$mappings_tsv" - - # Use awk to apply mappings - awk -F'\t' ' - BEGIN { OFS="\t" } - NR==FNR { licenses[$1] = $2; next } - { - name = $1; version = $2; license = $3; source = $4; - if (license == "unknown" || license == "" || license == "null") { - if (name in licenses) license = licenses[name] - } - print name, version, license, source - } - ' "$mappings_tsv" "$input_file" > "$output_file" - - log_success "License mapping completed" -} - -insert_sbom_data() { - local sbom_file="$1" - local table_name="$2" - local sbom_format="$3" - - log_info "Extracting components from $sbom_format SBOM for ClickHouse" - - # Build ClickHouse URL - local clickhouse_url="${CLICKHOUSE_URL}" - local auth_params="" - - # Use basic auth if username and password are provided - if [[ -n "${CLICKHOUSE_USERNAME:-}" ]] && [[ -n "${CLICKHOUSE_PASSWORD:-}" ]]; then - auth_params="-u ${CLICKHOUSE_USERNAME}:${CLICKHOUSE_PASSWORD}" - log_info "Using basic auth with username: ${CLICKHOUSE_USERNAME}" - elif [[ -n "${CLICKHOUSE_USERNAME:-}" ]]; then - auth_params="-u ${CLICKHOUSE_USERNAME}:" - log_info "Using basic auth with username only: ${CLICKHOUSE_USERNAME}" - fi - - # Determine source value based on context - local default_source_value="unknown" - local sbom_source="${SBOM_SOURCE:-github}" - local merge_mode="${MERGE:-false}" - - if [[ "$merge_mode" != "true" ]]; then - # For non-merged SBOMs, determine source from context - case "$sbom_source" in - "github") - default_source_value="${REPOSITORY:-unknown}" - ;; - "mend") - default_source_value="mend:${MEND_PROJECT_UUID:-${MEND_PRODUCT_UUID:-${MEND_ORG_SCOPE_UUID:-unknown}}}" - ;; - "wiz") - default_source_value="wiz:${WIZ_REPORT_ID:-unknown}" - ;; - *) - default_source_value="$sbom_source" - ;; - esac - fi - - log_info "Source value for ClickHouse: $default_source_value" - - # Create temporary file for data - local data_file="$temp_dir/clickhouse_data.tsv" - local mapped_data_file="$temp_dir/clickhouse_data_mapped.tsv" - - # Extract data based on SBOM format - case "$sbom_format" in - "cyclonedx") - log_debug "Sample CycloneDX component with license:" - if [[ "${DEBUG:-false}" == "true" ]]; then - jq -r '.components[0] | {name: .name, version: .version, licenses: .licenses}' "$sbom_file" 2>/dev/null || echo "No components found" - fi - # Enhanced extraction to handle component-level source references - jq -r --arg default_source "$default_source_value" ' - .components[]? // empty | - [ - .name // "unknown", - .version // "unknown", - ( - # Try to extract license from multiple sources - ( - # First: Try standard CycloneDX licenses array with content - if (.licenses | length) > 0 and (.licenses[0] | keys | length) > 0 then - .licenses[0] | (.license.id // .license.name // .id // .name // .expression) - else - null - end - ) // - ( - # Second: Try SPDX properties for license-concluded - if (.properties | length) > 0 then - (.properties[] | select(.name == "spdx:license-concluded") | .value) - else - null - end - ) // - ( - # Third: Try SPDX properties for license-declared - if (.properties | length) > 0 then - (.properties[] | select(.name == "spdx:license-declared") | .value) - else - null - end - ) // - # Final fallback - "unknown" - ), - ( - # Use component-level source if available, otherwise use default - .source // $default_source - ) - ] | @tsv - ' "$sbom_file" > "$data_file" - ;; - "spdxjson") - # Extract from SPDX format - # SPDX format doesn't have component-level source in merged SBOMs - # so always use the default source - jq -r --arg default_source "$default_source_value" ' - .packages[]? // empty | - select(.name != null) | - [ - .name // "unknown", - .versionInfo // "unknown", - (.licenseConcluded // .licenseDeclared // "unknown"), - ( - $default_source - ) - ] | @tsv - ' "$sbom_file" > "$data_file" - ;; - *) - log_error "Unsupported SBOM format for ClickHouse: $sbom_format" - return 1 - ;; - esac - - # Check if we have data to insert - if [[ ! -s "$data_file" ]]; then - log_warning "No component data found in SBOM" - return - fi - - # Map unknown licenses - map_unknown_licenses "$data_file" "$mapped_data_file" - - local component_count=$(wc -l < "$mapped_data_file") - log_info "Found $component_count components to insert (with license mapping and source tracking applied)" - - # Insert data into ClickHouse - if curl -s ${auth_params} \ - -H "Content-Type: text/tab-separated-values" \ - --data-binary "@$mapped_data_file" \ - "${clickhouse_url}/?query=INSERT%20INTO%20${CLICKHOUSE_DATABASE}.${table_name}%20(name,%20version,%20license,%20source)%20FORMAT%20TSV"; then - log_success "Inserted $component_count components with source tracking into ClickHouse table $table_name" - return 0 - else - log_error "Failed to insert data into ClickHouse" - return 1 - fi -} - -# Global variable for temp directory (so cleanup can access it) -temp_dir="" - -# Main function -main() { - log_info "Starting ClickBOM GitHub Action for SBOM processing" - - # Sanitize inputs - sanitize_inputs - - # Validate environment - validate_env - validate_mend_env - validate_wiz_env - - # Set defaults for optional variables - local s3_key="${S3_KEY:-sbom.json}" - local desired_format="${SBOM_FORMAT:-cyclonedx}" - local merge_mode="${MERGE:-false}" - local sbom_source="${SBOM_SOURCE:-github}" - - # Set up cleanup trap - trap cleanup EXIT - - # Temporary files - if ! temp_dir=$(mktemp -d); then - log_error "Failed to create temporary directory" - exit 1 - fi - - if [[ "$merge_mode" == "true" ]]; then - log_info "Running in MERGE mode - merging all CycloneDX SBOMs from S3" - - local merged_sbom="$temp_dir/merged_sbom.json" - - # Merge all CycloneDX SBOMs from S3 - merge_cyclonedx_sboms "$merged_sbom" - - # Validate the merged file - if ! jq . "$merged_sbom" > /dev/null 2>&1; then - log_error "Merged CycloneDX SBOM is not valid JSON" - exit 1 - fi - - # Upload merged SBOM back to S3 - upload_to_s3 "$merged_sbom" "$S3_BUCKET" "$s3_key" - - log_success "SBOM merging and upload completed successfully!" - log_info "Merged SBOM available at: s3://$S3_BUCKET/$s3_key" - - # ClickHouse operations - if [[ -n "${CLICKHOUSE_URL:-}" ]]; then - local table_name=$(echo "$s3_key" | sed 's|[^a-zA-Z0-9]|_|g' | sed 's|\.json|_merged|g' | tr '[:upper:]' '[:lower:]') - log_info "Starting ClickHouse operations for table: $table_name" - # Setup table with error handling - if ! setup_clickhouse_table "$table_name"; then - log_error "ClickHouse table setup failed, skipping data insertion" - exit 1 - else - # Insert SBOM data into ClickHouse - if ! insert_sbom_data "$merged_sbom" "$table_name" "$desired_format"; then - log_error "Failed to insert SBOM data into ClickHouse" - exit 1 - else - log_info "Component data available in ClickHouse table: ${CLICKHOUSE_DATABASE}.${table_name}" - log_success "ClickHouse operations completed successfully!" - fi - fi - fi - exit 0 - else - log_info "Running in NORMAL mode - processing SBOM from $sbom_source" - - local original_sbom="$temp_dir/original_sbom.json" - local extracted_sbom="$temp_dir/extracted_sbom.json" - local fixed_sbom="$temp_dir/fixed_sbom.json" - local processed_sbom="$temp_dir/processed_sbom.json" - - # Download SBOM based on source - case "$sbom_source" in - "github") - log_info "Downloading SBOM from GitHub" - download_sbom "$REPOSITORY" "$original_sbom" - ;; - "mend") - log_info "Downloading SBOM from Mend" - request_mend_sbom_export "$original_sbom" - ;; - "wiz") - log_info "Downloading SBOM from Wiz" - download_wiz_report "$original_sbom" - ;; - *) - log_error "Unsupported SBOM source: $sbom_source" - log_error "Supported sources: github, mend" - exit 1 - ;; - esac - - # Extract SBOM from wrapper if needed - extract_sbom_from_wrapper "$original_sbom" "$extracted_sbom" - - # Detect format - local detected_format - detected_format=$(detect_sbom_format "$extracted_sbom") - log_info "Detected SBOM format: $detected_format" - - # Fix SPDX compatibility issues if needed - if [[ "$detected_format" == "spdxjson" ]]; then - fix_spdx_compatibility "$extracted_sbom" "$fixed_sbom" - convert_sbom "$fixed_sbom" "$processed_sbom" "$detected_format" "$desired_format" - else - convert_sbom "$extracted_sbom" "$processed_sbom" "$detected_format" "$desired_format" - fi - - # Validate the converted file - if ! jq . "$processed_sbom" > /dev/null 2>&1; then - log_error "Generated CycloneDX SBOM is not valid JSON" - exit 1 - fi - - # Upload to S3 - upload_to_s3 "$processed_sbom" "$S3_BUCKET" "$s3_key" - - log_success "SBOM processing completed successfully!" - log_info "SBOM available at: s3://$S3_BUCKET/$s3_key" - - # ClickHouse operations - if [[ -n "${CLICKHOUSE_URL:-}" ]]; then - local table_name - case "$sbom_source" in - "github") - table_name=$(echo "$REPOSITORY" | sed 's|[^a-zA-Z0-9]|_|g' | tr '[:upper:]' '[:lower:]') - ;; - "mend") - table_name="mend_$(echo "${MEND_PROJECT_UUID:-${MEND_PRODUCT_UUID:-${MEND_ORG_SCOPE_UUID}}}" | sed 's|[^a-zA-Z0-9]|_|g' | tr '[:upper:]' '[:lower:]')" - ;; - "wiz") - table_name="wiz_$(echo "${WIZ_REPORT_ID}" | sed 's|[^a-zA-Z0-9]|_|g' | tr '[:upper:]' '[:lower:]')" - ;; - esac - log_info "Starting ClickHouse operations for table: $table_name" - # Setup table with error handling - if ! setup_clickhouse_table "$table_name"; then - log_error "ClickHouse table setup failed, skipping data insertion" - exit 1 - else - # Insert SBOM data into ClickHouse - if ! insert_sbom_data "$processed_sbom" "$table_name" "$desired_format"; then - log_error "Failed to insert SBOM data into ClickHouse" - exit 1 - else - log_info "Component data available in ClickHouse table: ${CLICKHOUSE_DATABASE}.${table_name}" - log_success "ClickHouse operations completed successfully!" - fi - fi - fi - fi -} - -# Run main function -main "$@" diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..4e8896c --- /dev/null +++ b/go.mod @@ -0,0 +1,27 @@ +module github.com/ClickHouse/ClickBOM + +go 1.25.3 + +require ( + github.com/aws/aws-sdk-go-v2 v1.39.4 + github.com/aws/aws-sdk-go-v2/config v1.31.15 + github.com/aws/aws-sdk-go-v2/credentials v1.18.19 + github.com/aws/aws-sdk-go-v2/service/s3 v1.88.7 +) + +require ( + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.2 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.11 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.11 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.11 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.11 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.2 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.2 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.11 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.11 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.29.8 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.3 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.38.9 // indirect + github.com/aws/smithy-go v1.23.1 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..6e4e000 --- /dev/null +++ b/go.sum @@ -0,0 +1,36 @@ +github.com/aws/aws-sdk-go-v2 v1.39.4 h1:qTsQKcdQPHnfGYBBs+Btl8QwxJeoWcOcPcixK90mRhg= +github.com/aws/aws-sdk-go-v2 v1.39.4/go.mod h1:yWSxrnioGUZ4WVv9TgMrNUeLV3PFESn/v+6T/Su8gnM= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.2 h1:t9yYsydLYNBk9cJ73rgPhPWqOh/52fcWDQB5b1JsKSY= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.2/go.mod h1:IusfVNTmiSN3t4rhxWFaBAqn+mcNdwKtPcV16eYdgko= +github.com/aws/aws-sdk-go-v2/config v1.31.15 h1:gE3M4xuNXfC/9bG4hyowGm/35uQTi7bUKeYs5e/6uvU= +github.com/aws/aws-sdk-go-v2/config v1.31.15/go.mod h1:HvnvGJoE2I95KAIW8kkWVPJ4XhdrlvwJpV6pEzFQa8o= +github.com/aws/aws-sdk-go-v2/credentials v1.18.19 h1:Jc1zzwkSY1QbkEcLujwqRTXOdvW8ppND3jRBb/VhBQc= +github.com/aws/aws-sdk-go-v2/credentials v1.18.19/go.mod h1:DIfQ9fAk5H0pGtnqfqkbSIzky82qYnGvh06ASQXXg6A= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.11 h1:X7X4YKb+c0rkI6d4uJ5tEMxXgCZ+jZ/D6mvkno8c8Uw= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.11/go.mod h1:EqM6vPZQsZHYvC4Cai35UDg/f5NCEU+vp0WfbVqVcZc= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.11 h1:7AANQZkF3ihM8fbdftpjhken0TP9sBzFbV/Ze/Y4HXA= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.11/go.mod h1:NTF4QCGkm6fzVwncpkFQqoquQyOolcyXfbpC98urj+c= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.11 h1:ShdtWUZT37LCAA4Mw2kJAJtzaszfSHFb5n25sdcv4YE= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.11/go.mod h1:7bUb2sSr2MZ3M/N+VyETLTQtInemHXb/Fl3s8CLzm0Y= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 h1:WKuaxf++XKWlHWu9ECbMlha8WOEGm0OUEZqm4K/Gcfk= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4/go.mod h1:ZWy7j6v1vWGmPReu0iSGvRiise4YI5SkR3OHKTZ6Wuc= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.11 h1:bKgSxk1TW//00PGQqYmrq83c+2myGidEclp+t9pPqVI= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.11/go.mod h1:vrPYCQ6rFHL8jzQA8ppu3gWX18zxjLIDGTeqDxkBmSI= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.2 h1:xtuxji5CS0JknaXoACOunXOYOQzgfTvGAc9s2QdCJA4= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.2/go.mod h1:zxwi0DIR0rcRcgdbl7E2MSOvxDyyXGBlScvBkARFaLQ= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.2 h1:DGFpGybmutVsCuF6vSuLZ25Vh55E3VmsnJmFfjeBx4M= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.2/go.mod h1:hm/wU1HDvXCFEDzOLorQnZZ/CVvPXvWEmHMSmqgQRuA= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.11 h1:GpMf3z2KJa4RnJ0ew3Hac+hRFYLZ9DDjfgXjuW+pB54= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.11/go.mod h1:6MZP3ZI4QQsgUCFTwMZA2V0sEriNQ8k2hmoHF3qjimQ= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.11 h1:weapBOuuFIBEQ9OX/NVW3tFQCvSutyjZYk/ga5jDLPo= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.11/go.mod h1:3C1gN4FmIVLwYSh8etngUS+f1viY6nLCDVtZmrFbDy0= +github.com/aws/aws-sdk-go-v2/service/s3 v1.88.7 h1:Wer3W0GuaedWT7dv/PiWNZGSQFSTcBY2rZpbiUp5xcA= +github.com/aws/aws-sdk-go-v2/service/s3 v1.88.7/go.mod h1:UHKgcRSx8PVtvsc1Poxb/Co3PD3wL7P+f49P0+cWtuY= +github.com/aws/aws-sdk-go-v2/service/sso v1.29.8 h1:M5nimZmugcZUO9wG7iVtROxPhiqyZX6ejS1lxlDPbTU= +github.com/aws/aws-sdk-go-v2/service/sso v1.29.8/go.mod h1:mbef/pgKhtKRwrigPPs7SSSKZgytzP8PQ6P6JAAdqyM= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.3 h1:S5GuJZpYxE0lKeMHKn+BRTz6PTFpgThyJ+5mYfux7BM= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.3/go.mod h1:X4OF+BTd7HIb3L+tc4UlWHVrpgwZZIVENU15pRDVTI0= +github.com/aws/aws-sdk-go-v2/service/sts v1.38.9 h1:Ekml5vGg6sHSZLZJQJagefnVe6PmqC2oiRkBq4F7fU0= +github.com/aws/aws-sdk-go-v2/service/sts v1.38.9/go.mod h1:/e15V+o1zFHWdH3u7lpI3rVBcxszktIKuHKCY2/py+k= +github.com/aws/smithy-go v1.23.1 h1:sLvcH6dfAFwGkHLZ7dGiYF7aK6mg4CgKA/iDKjLDt9M= +github.com/aws/smithy-go v1.23.1/go.mod h1:LEj2LM3rBRQJxPZTB4KuzZkaZYnZPnvgIhb4pu07mx0= diff --git a/internal/config/config.go b/internal/config/config.go new file mode 100644 index 0000000..6e3e7fe --- /dev/null +++ b/internal/config/config.go @@ -0,0 +1,315 @@ +// Package config handles loading and validating configuration from environment variables. +package config + +import ( + "fmt" + "os" + + "github.com/ClickHouse/ClickBOM/internal/validation" +) + +// Config holds the application configuration. +type Config struct { + // GitHub + GitHubToken string + Repository string + + // Mend + MendEmail string + MendOrgUUID string + MendUserKey string + MendBaseURL string + MendProjectUUID string + MendProductUUID string + MendOrgScopeUUID string + MendProjectUUIDs string + MendMaxWaitTime int + MendPollInterval int + + // Wiz + WizAuthEndpoint string + WizAPIEndpoint string + WizClientID string + WizClientSecret string + WizReportID string + + // AWS + AWSAccessKeyID string + AWSSecretAccessKey string + AWSRegion string + S3Bucket string + S3Key string + + // ClickHouse + ClickHouseURL string + ClickHouseDatabase string + ClickHouseUsername string + ClickHousePassword string + TruncateTable bool + + // General + SBOMSource string // "github", "mend", "wiz" + SBOMFormat string // "cyclonedx", "spdxjson" + Merge bool + Include string + Exclude string + Debug bool +} + +// LoadConfig loads configuration from environment variables. +func LoadConfig() (*Config, error) { + cfg := &Config{ + // AWS (required) + AWSAccessKeyID: os.Getenv("AWS_ACCESS_KEY_ID"), + AWSSecretAccessKey: os.Getenv("AWS_SECRET_ACCESS_KEY"), + AWSRegion: getEnvOrDefault("AWS_DEFAULT_REGION", "us-east-1"), + S3Bucket: os.Getenv("S3_BUCKET"), + S3Key: getEnvOrDefault("S3_KEY", "sbom.json"), + + // GitHub + GitHubToken: os.Getenv("GITHUB_TOKEN"), + Repository: os.Getenv("REPOSITORY"), + + // Mend + MendEmail: os.Getenv("MEND_EMAIL"), + MendOrgUUID: os.Getenv("MEND_ORG_UUID"), + MendUserKey: os.Getenv("MEND_USER_KEY"), + MendBaseURL: getEnvOrDefault("MEND_BASE_URL", "https://api-saas.mend.io"), + MendProjectUUID: os.Getenv("MEND_PROJECT_UUID"), + MendProductUUID: os.Getenv("MEND_PRODUCT_UUID"), + MendOrgScopeUUID: os.Getenv("MEND_ORG_SCOPE_UUID"), + MendProjectUUIDs: os.Getenv("MEND_PROJECT_UUIDS"), + MendMaxWaitTime: getEnvAsInt("MEND_MAX_WAIT_TIME", 1800), + MendPollInterval: getEnvAsInt("MEND_POLL_INTERVAL", 30), + + // Wiz + WizAuthEndpoint: os.Getenv("WIZ_AUTH_ENDPOINT"), + WizAPIEndpoint: os.Getenv("WIZ_API_ENDPOINT"), + WizClientID: os.Getenv("WIZ_CLIENT_ID"), + WizClientSecret: os.Getenv("WIZ_CLIENT_SECRET"), + WizReportID: os.Getenv("WIZ_REPORT_ID"), + + // ClickHouse + ClickHouseURL: os.Getenv("CLICKHOUSE_URL"), + ClickHouseDatabase: getEnvOrDefault("CLICKHOUSE_DATABASE", "default"), + ClickHouseUsername: getEnvOrDefault("CLICKHOUSE_USERNAME", "default"), + ClickHousePassword: os.Getenv("CLICKHOUSE_PASSWORD"), + TruncateTable: getEnvAsBool("TRUNCATE_TABLE", false), + + // General + SBOMSource: getEnvOrDefault("SBOM_SOURCE", "github"), + SBOMFormat: getEnvOrDefault("SBOM_FORMAT", "cyclonedx"), + Merge: getEnvAsBool("MERGE", false), + Include: os.Getenv("INCLUDE"), + Exclude: os.Getenv("EXCLUDE"), + Debug: getEnvAsBool("DEBUG", false), + } + + // Sanitize inputs + if err := cfg.Sanitize(); err != nil { + return nil, fmt.Errorf("sanitization failed: %w", err) + } + + // Validate required fields + if err := cfg.Validate(); err != nil { + return nil, fmt.Errorf("validation failed: %w", err) + } + + return cfg, nil +} + +// Validate checks that all required configuration fields are set appropriately. +func (c *Config) Validate() error { + // AWS is always required + if c.AWSAccessKeyID == "" { + return fmt.Errorf("AWS_ACCESS_KEY_ID is required") + } + if c.AWSSecretAccessKey == "" { + return fmt.Errorf("AWS_SECRET_ACCESS_KEY is required") + } + if c.S3Bucket == "" { + return fmt.Errorf("S3_BUCKET is required") + } + + // Repository required if not in merge mode and source is GitHub + if !c.Merge && c.SBOMSource != "mend" && c.SBOMSource != "wiz" { + if c.Repository == "" { + return fmt.Errorf("REPOSITORY is required when not in merge mode") + } + } + + // Mend validation + if c.SBOMSource == "mend" { + if c.MendEmail == "" { + return fmt.Errorf("MEND_EMAIL is required for Mend source") + } + if c.MendOrgUUID == "" { + return fmt.Errorf("MEND_ORG_UUID is required for Mend source") + } + if c.MendUserKey == "" { + return fmt.Errorf("MEND_USER_KEY is required for Mend source") + } + if c.MendProjectUUID == "" && c.MendProductUUID == "" { + return fmt.Errorf("at least one of MEND_PROJECT_UUID or MEND_PRODUCT_UUID is required") + } + } + + // Wiz validation + if c.SBOMSource == "wiz" { + if c.WizAPIEndpoint == "" { + return fmt.Errorf("WIZ_API_ENDPOINT is required for Wiz source") + } + if c.WizClientID == "" { + return fmt.Errorf("WIZ_CLIENT_ID is required for Wiz source") + } + if c.WizClientSecret == "" { + return fmt.Errorf("WIZ_CLIENT_SECRET is required for Wiz source") + } + if c.WizReportID == "" { + return fmt.Errorf("WIZ_REPORT_ID is required for Wiz source") + } + } + + // ClickHouse validation + if c.ClickHouseURL != "" { + if c.ClickHouseDatabase == "" { + return fmt.Errorf("CLICKHOUSE_DATABASE is required when using ClickHouse") + } + if c.ClickHouseUsername == "" { + return fmt.Errorf("CLICKHOUSE_USERNAME is required when using ClickHouse") + } + } + + return nil +} + +func getEnvOrDefault(key, defaultVal string) string { + if val := os.Getenv(key); val != "" { + return val + } + return defaultVal +} + +func getEnvAsInt(key string, defaultVal int) int { + valStr := os.Getenv(key) + if valStr == "" { + return defaultVal + } + var val int + _, err := fmt.Sscanf(valStr, "%d", &val) + if err != nil { + return defaultVal + } + return val +} + +func getEnvAsBool(key string, defaultVal bool) bool { + valStr := os.Getenv(key) + if valStr == "" { + return defaultVal + } + return valStr == "true" +} + +// Sanitize cleans and validates configuration fields. +func (c *Config) Sanitize() error { + var err error + + // Repository + if c.Repository != "" { + c.Repository, err = validation.SanitizeRepository(c.Repository) + if err != nil { + return err + } + } + + // Email + if c.MendEmail != "" { + c.MendEmail, err = validation.SanitizeEmail(c.MendEmail) + if err != nil { + return err + } + } + + // S3 + if c.S3Bucket != "" { + c.S3Bucket, err = validation.SanitizeS3Bucket(c.S3Bucket) + if err != nil { + return err + } + } + + if c.S3Key != "" { + c.S3Key, err = validation.SanitizeS3Key(c.S3Key) + if err != nil { + return err + } + } + + // URLs + if c.MendBaseURL != "" { + c.MendBaseURL, err = validation.SanitizeURL(c.MendBaseURL, "mend") + if err != nil { + return err + } + } + + if c.WizAuthEndpoint != "" { + c.WizAuthEndpoint, err = validation.SanitizeURL(c.WizAuthEndpoint, "wiz") + if err != nil { + return err + } + } + + if c.WizAPIEndpoint != "" { + c.WizAPIEndpoint, err = validation.SanitizeURL(c.WizAPIEndpoint, "wiz") + if err != nil { + return err + } + } + + if c.ClickHouseURL != "" { + c.ClickHouseURL, err = validation.SanitizeURL(c.ClickHouseURL, "clickhouse") + if err != nil { + return err + } + } + + // UUIDs + if c.MendOrgUUID != "" { + c.MendOrgUUID, err = validation.SanitizeUUID(c.MendOrgUUID, "MEND_ORG_UUID") + if err != nil { + return err + } + } + + if c.MendProjectUUID != "" { + c.MendProjectUUID, err = validation.SanitizeUUID(c.MendProjectUUID, "MEND_PROJECT_UUID") + if err != nil { + return err + } + } + + if c.MendProductUUID != "" { + c.MendProductUUID, err = validation.SanitizeUUID(c.MendProductUUID, "MEND_PRODUCT_UUID") + if err != nil { + return err + } + } + + // Patterns + c.Include = validation.SanitizePatterns(c.Include) + c.Exclude = validation.SanitizePatterns(c.Exclude) + + // Sanitize strings with length limits + c.GitHubToken = validation.SanitizeString(c.GitHubToken, 1000) + c.MendUserKey = validation.SanitizeString(c.MendUserKey, 500) + c.WizClientID = validation.SanitizeString(c.WizClientID, 200) + c.WizClientSecret = validation.SanitizeString(c.WizClientSecret, 500) + c.WizReportID = validation.SanitizeString(c.WizReportID, 200) + c.AWSAccessKeyID = validation.SanitizeString(c.AWSAccessKeyID, 100) + c.AWSSecretAccessKey = validation.SanitizeString(c.AWSSecretAccessKey, 500) + c.ClickHousePassword = validation.SanitizeString(c.ClickHousePassword, 500) + + return nil +} diff --git a/internal/config/config_test.go b/internal/config/config_test.go new file mode 100644 index 0000000..bb617cd --- /dev/null +++ b/internal/config/config_test.go @@ -0,0 +1,128 @@ +package config + +import ( + "os" + "testing" +) + +func TestLoadConfig(t *testing.T) { + tests := []struct { + name string + env map[string]string + wantErr bool + }{ + { + name: "valid minimal config", + env: map[string]string{ + "AWS_ACCESS_KEY_ID": "test-key", + "AWS_SECRET_ACCESS_KEY": "test-secret", + "S3_BUCKET": "test-bucket", + "REPOSITORY": "owner/repo", + }, + wantErr: false, + }, + { + name: "missing required field", + env: map[string]string{ + "AWS_ACCESS_KEY_ID": "test-key", + // Missing AWS_SECRET_ACCESS_KEY + "S3_BUCKET": "test-bucket", + "REPOSITORY": "owner/repo", + }, + wantErr: true, + }, + { + name: "invalid repository format", + env: map[string]string{ + "AWS_ACCESS_KEY_ID": "test-key", + "AWS_SECRET_ACCESS_KEY": "test-secret", + "S3_BUCKET": "test-bucket", + "REPOSITORY": "invalid-repo", // No slash + }, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Clear environment + os.Clearenv() + + // Set test environment + for k, v := range tt.env { + err := os.Setenv(k, v) + if err != nil { + t.Fatalf("Failed to set env var %s: %v", k, err) + } + } + + cfg, err := LoadConfig() + + if (err != nil) != tt.wantErr { + t.Errorf("LoadConfig() error = %v, wantErr %v", err, tt.wantErr) + return + } + + if !tt.wantErr && cfg == nil { + t.Error("LoadConfig() returned nil config") + } + }) + } +} + +func TestConfigValidate(t *testing.T) { + tests := []struct { + name string + config *Config + wantErr bool + }{ + { + name: "valid github config", + config: &Config{ + AWSAccessKeyID: "key", + AWSSecretAccessKey: "secret", + S3Bucket: "bucket", + Repository: "owner/repo", + SBOMSource: "github", + }, + wantErr: false, + }, + { + name: "valid mend config", + config: &Config{ + AWSAccessKeyID: "key", + AWSSecretAccessKey: "secret", + S3Bucket: "bucket", + SBOMSource: "mend", + MendEmail: "test@example.com", + MendOrgUUID: "123e4567-e89b-12d3-a456-426614174000", + MendUserKey: "user-key", + MendProjectUUID: "123e4567-e89b-12d3-a456-426614174001", + }, + wantErr: false, + }, + { + name: "invalid mend config - missing email", + config: &Config{ + AWSAccessKeyID: "key", + AWSSecretAccessKey: "secret", + S3Bucket: "bucket", + SBOMSource: "mend", + // Missing MendEmail + MendOrgUUID: "123e4567-e89b-12d3-a456-426614174000", + MendUserKey: "user-key", + MendProjectUUID: "123e4567-e89b-12d3-a456-426614174001", + }, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.config.Validate() + if (err != nil) != tt.wantErr { + t.Errorf("Config.Validate() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} diff --git a/internal/sbom/filter.go b/internal/sbom/filter.go new file mode 100644 index 0000000..7dda0c5 --- /dev/null +++ b/internal/sbom/filter.go @@ -0,0 +1,94 @@ +// Package sbom provides functionalities for filtering files for SBOM generation. +package sbom + +import ( + "path/filepath" + "strings" + + "github.com/ClickHouse/ClickBOM/pkg/logger" +) + +// FileFilter defines inclusion and exclusion patterns for filtering files. +type FileFilter struct { + Include []string + Exclude []string +} + +// NewFileFilter creates a new FileFilter with the given include and exclude patterns. +func NewFileFilter(include, exclude string) *FileFilter { + return &FileFilter{ + Include: parsePatterns(include), + Exclude: parsePatterns(exclude), + } +} + +func parsePatterns(patterns string) []string { + if patterns == "" { + return nil + } + + parts := strings.Split(patterns, ",") + var result []string + for _, p := range parts { + p = strings.TrimSpace(p) + if p != "" { + result = append(result, p) + } + } + return result +} + +// MatchesPattern checks if the filename matches any of the provided patterns. +func (f *FileFilter) MatchesPattern(filename string, patterns []string) bool { + if len(patterns) == 0 { + return false + } + + for _, pattern := range patterns { + matched, err := filepath.Match(pattern, filename) + if err != nil { + logger.Warning("Invalid pattern %s: %v", pattern, err) + continue + } + if matched { + return true + } + } + return false +} + +// ShouldInclude determines if a file should be included based on the filter rules. +func (f *FileFilter) ShouldInclude(filename string) bool { + // If include patterns specified, file must match at least one + if len(f.Include) > 0 { + if !f.MatchesPattern(filename, f.Include) { + return false + } + } + + // If exclude patterns specified and file matches, exclude it + if len(f.Exclude) > 0 { + if f.MatchesPattern(filename, f.Exclude) { + return false + } + } + + return true +} + +// FilterFiles filters the given list of files based on the FileFilter rules. +func (f *FileFilter) FilterFiles(files []string) []string { + var filtered []string + + for _, file := range files { + filename := filepath.Base(file) + if f.ShouldInclude(filename) { + filtered = append(filtered, file) + } else { + logger.Debug("Filtered out: %s", filename) + } + } + + logger.Info("Filtered %d files to %d files", len(files), len(filtered)) + return filtered +} diff --git a/internal/sbom/github.go b/internal/sbom/github.go new file mode 100644 index 0000000..eb05b6e --- /dev/null +++ b/internal/sbom/github.go @@ -0,0 +1,169 @@ +// Package sbom provides functionalities to interact with GitHub API for SBOM download. +package sbom + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "time" + + "github.com/ClickHouse/ClickBOM/pkg/logger" +) + +// GitHubClient handles interactions with the GitHub API. +type GitHubClient struct { + token string + httpClient *http.Client +} + +// NewGitHubClient creates a new GitHubClient with the provided token. +func NewGitHubClient(token string) *GitHubClient { + return &GitHubClient{ + token: token, + httpClient: &http.Client{ + Timeout: 10 * time.Minute, + }, + } +} + +// DownloadSBOM downloads the SBOM from the specified GitHub repository. +func (g *GitHubClient) DownloadSBOM(ctx context.Context, repo, outputFile string) error { + logger.Info("Downloading SBOM from %s", repo) + + url := fmt.Sprintf("https://api.github.com/repos/%s/dependency-graph/sbom", repo) + + maxAttempts := 3 + baseDelay := 30 * time.Second + + for attempt := 1; attempt <= maxAttempts; attempt++ { + logger.Info("Starting SBOM download, attempt %d/%d", attempt, maxAttempts) + + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("Accept", "application/vnd.github+json") + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", g.token)) + req.Header.Set("X-GitHub-Api-Version", "2022-11-28") + + resp, err := g.httpClient.Do(req) + if err != nil { + if attempt < maxAttempts { + delay := baseDelay * time.Duration(attempt) + logger.Warning("Request failed, waiting %v before retry: %v", delay, err) + time.Sleep(delay) + continue + } + return fmt.Errorf("failed to download SBOM after %d attempts: %w", maxAttempts, err) + } + + defer func() { + if err := resp.Body.Close(); err != nil { + logger.Warning("Failed to close response body: %v", err) + } + }() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + + // Check for retryable errors + var errResp struct { + Message string `json:"message"` + } + if json.Unmarshal(body, &errResp) == nil { + if isRetryableError(errResp.Message) && attempt < maxAttempts { + delay := baseDelay * time.Duration(attempt) + logger.Warning("GitHub SBOM generation timed out on attempt %d: %s", attempt, errResp.Message) + logger.Info("Waiting %v before retry...", delay) + time.Sleep(delay) + continue + } + } + + return fmt.Errorf("GitHub API error (status %d): %s", resp.StatusCode, string(body)) + } + + // Create output file + outFile, err := os.Create(outputFile) + if err != nil { + return fmt.Errorf("failed to create output file: %w", err) + } + defer func() { + if err := outFile.Close(); err != nil { + logger.Warning("Failed to close file: %v", err) + } + }() + + // Copy response to file + written, err := io.Copy(outFile, resp.Body) + if err != nil { + return fmt.Errorf("failed to write SBOM to file: %w", err) + } + + logger.Success("SBOM downloaded successfully (%d bytes) on attempt %d", written, attempt) + + // Validate JSON + if err := validateJSON(outputFile); err != nil { + if attempt < maxAttempts { + delay := baseDelay * time.Duration(attempt) + logger.Warning("Downloaded file is not valid JSON on attempt %d", attempt) + time.Sleep(delay) + continue + } + return fmt.Errorf("downloaded file is not valid JSON after all attempts: %w", err) + } + + return nil + } + + return fmt.Errorf("failed to download SBOM after %d attempts", maxAttempts) +} + +func isRetryableError(message string) bool { + retryableMessages := []string{ + "Request timed out", + "Failed to generate SBOM", + "timeout", + } + + for _, msg := range retryableMessages { + if contains(message, msg) { + return true + } + } + return false +} + +func validateJSON(filename string) error { + data, err := os.ReadFile(filename) + if err != nil { + return err + } + + var js json.RawMessage + if err := json.Unmarshal(data, &js); err != nil { + return fmt.Errorf("invalid JSON: %w", err) + } + + return nil +} + +func contains(s, substr string) bool { + return len(s) >= len(substr) && + (s == substr || len(s) > len(substr) && + (s[:len(substr)] == substr || s[len(s)-len(substr):] == substr || + containsSubstring(s, substr))) +} + +func containsSubstring(s, substr string) bool { + for i := 0; i <= len(s)-len(substr); i++ { + if s[i:i+len(substr)] == substr { + return true + } + } + return false +} diff --git a/internal/sbom/mend.go b/internal/sbom/mend.go new file mode 100644 index 0000000..946dafd --- /dev/null +++ b/internal/sbom/mend.go @@ -0,0 +1,382 @@ +// Package sbom provides functionalities to interact with Mend API 3.0 for SBOM export. +package sbom + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "strings" + "time" + + "github.com/ClickHouse/ClickBOM/internal/config" + "github.com/ClickHouse/ClickBOM/pkg/logger" +) + +// MendClient handles interactions with the Mend API 3.0. +type MendClient struct { + email string + orgUUID string + userKey string + baseURL string + projectUUID string + productUUID string + orgScopeUUID string + projectUUIDs string + maxWaitTime int + pollInterval int + httpClient *http.Client + jwtToken string +} + +// NewMendClient creates a new MendClient with the provided configuration. +func NewMendClient(cfg *config.Config) *MendClient { + return &MendClient{ + email: cfg.MendEmail, + orgUUID: cfg.MendOrgUUID, + userKey: cfg.MendUserKey, + baseURL: cfg.MendBaseURL, + projectUUID: cfg.MendProjectUUID, + productUUID: cfg.MendProductUUID, + orgScopeUUID: cfg.MendOrgScopeUUID, + projectUUIDs: cfg.MendProjectUUIDs, + maxWaitTime: cfg.MendMaxWaitTime, + pollInterval: cfg.MendPollInterval, + httpClient: &http.Client{ + Timeout: 10 * time.Minute, + }, + } +} + +func (m *MendClient) authenticate(ctx context.Context) error { + logger.Info("Authenticating with Mend API 3.0") + + loginPayload := map[string]string{ + "email": m.email, + "orgUuid": m.orgUUID, + "userKey": m.userKey, + } + + payloadBytes, err := json.Marshal(loginPayload) + if err != nil { + return fmt.Errorf("failed to marshal login payload: %w", err) + } + + req, err := http.NewRequestWithContext(ctx, "POST", + m.baseURL+"/api/v3.0/login", + bytes.NewReader(payloadBytes)) + if err != nil { + return fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Accept", "application/json") + + resp, err := m.httpClient.Do(req) + if err != nil { + return fmt.Errorf("failed to authenticate: %w", err) + } + defer func() { + if err := resp.Body.Close(); err != nil { + logger.Warning("Failed to close response body: %v", err) + } + }() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("authentication failed (status %d): %s", resp.StatusCode, string(body)) + } + + var loginResp struct { + Response struct { + RefreshToken string `json:"refreshToken"` + JWTToken string `json:"jwtToken"` + } `json:"response"` + } + + if err := json.NewDecoder(resp.Body).Decode(&loginResp); err != nil { + return fmt.Errorf("failed to parse login response: %w", err) + } + + // Try to get JWT directly from login response + if loginResp.Response.JWTToken != "" { + m.jwtToken = loginResp.Response.JWTToken + logger.Success("Mend authentication successful") + return nil + } + + // Otherwise use refresh token to get JWT + if loginResp.Response.RefreshToken == "" { + return fmt.Errorf("no refresh token or JWT token in response") + } + + // Get JWT token using refresh token + req, err = http.NewRequestWithContext(ctx, "POST", + m.baseURL+"/api/v3.0/login/accessToken", + nil) + if err != nil { + return fmt.Errorf("failed to create JWT request: %w", err) + } + + req.Header.Set("wss-refresh-token", loginResp.Response.RefreshToken) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Accept", "application/json") + + resp, err = m.httpClient.Do(req) + if err != nil { + return fmt.Errorf("failed to get JWT token: %w", err) + } + defer func() { + if err := resp.Body.Close(); err != nil { + logger.Warning("Failed to close response body: %v", err) + } + }() + + var jwtResp struct { + Response struct { + JWTToken string `json:"jwtToken"` + } `json:"response"` + } + + if err := json.NewDecoder(resp.Body).Decode(&jwtResp); err != nil { + return fmt.Errorf("failed to parse JWT response: %w", err) + } + + if jwtResp.Response.JWTToken == "" { + return fmt.Errorf("no JWT token in response") + } + + m.jwtToken = jwtResp.Response.JWTToken + logger.Success("Mend authentication successful") + return nil +} + +// RequestSBOMExport requests an SBOM export and downloads it when ready. +func (m *MendClient) RequestSBOMExport(ctx context.Context, outputFile string) error { + logger.Info("Requesting SBOM export from Mend API 3.0") + + // Authenticate first + if err := m.authenticate(ctx); err != nil { + return fmt.Errorf("authentication failed: %w", err) + } + + // Build request payload + payload := map[string]interface{}{ + "name": "clickbom-export", + "reportType": "cycloneDX_1_5", + "format": "json", + "includeVulnerabilities": false, + } + + // Add scope + switch { + case m.projectUUID != "": + payload["scopeType"] = "project" + payload["scopeUuid"] = m.projectUUID + uuids := strings.Split(m.projectUUIDs, ",") + payload["projectUuids"] = uuids + case m.productUUID != "": + payload["scopeType"] = "product" + payload["scopeUuid"] = m.productUUID + uuids := strings.Split(m.projectUUIDs, ",") + payload["projectUuids"] = uuids + case m.orgScopeUUID != "": + payload["scopeType"] = "organization" + payload["scopeUuid"] = m.orgScopeUUID + } + + payloadBytes, err := json.Marshal(payload) + if err != nil { + return fmt.Errorf("failed to marshal payload: %w", err) + } + + url := fmt.Sprintf("%s/api/v3.0/projects/%s/dependencies/reports/SBOM", + m.baseURL, m.projectUUID) + + req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(payloadBytes)) + if err != nil { + return fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("Authorization", "Bearer "+m.jwtToken) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Accept", "application/json") + + resp, err := m.httpClient.Do(req) + if err != nil { + return fmt.Errorf("failed to request SBOM export: %w", err) + } + defer func() { + if err := resp.Body.Close(); err != nil { + logger.Warning("Failed to close response body: %v", err) + } + }() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("export request failed (status %d): %s", resp.StatusCode, string(body)) + } + + var exportResp struct { + Response struct { + UUID string `json:"uuid"` + } `json:"response"` + } + + if err := json.NewDecoder(resp.Body).Decode(&exportResp); err != nil { + return fmt.Errorf("failed to parse export response: %w", err) + } + + if exportResp.Response.UUID == "" { + return fmt.Errorf("no report UUID in response") + } + + logger.Info("Report UUID: %s", exportResp.Response.UUID) + + // Wait for report and download + return m.downloadWhenReady(ctx, exportResp.Response.UUID, outputFile) +} + +func (m *MendClient) downloadWhenReady(ctx context.Context, reportUUID, outputFile string) error { + logger.Info("Waiting for SBOM report to be ready (UUID: %s)", reportUUID) + logger.Info("Max wait time: %ds, Poll interval: %ds", m.maxWaitTime, m.pollInterval) + + startTime := time.Now() + ticker := time.NewTicker(time.Duration(m.pollInterval) * time.Second) + defer ticker.Stop() + + timeout := time.After(time.Duration(m.maxWaitTime) * time.Second) + + for { + select { + case <-ctx.Done(): + return ctx.Err() + + case <-timeout: + return fmt.Errorf("timeout waiting for SBOM report after %ds", m.maxWaitTime) + + case <-ticker.C: + elapsed := int(time.Since(startTime).Seconds()) + logger.Info("Checking report status... (elapsed: %ds)", elapsed) + + // Refresh token if needed (every 25 minutes) + if elapsed > 0 && elapsed%1500 == 0 { + logger.Info("Refreshing JWT token") + if err := m.authenticate(ctx); err != nil { + logger.Warning("Failed to refresh token: %v", err) + } + } + + status, err := m.checkReportStatus(ctx, reportUUID) + if err != nil { + logger.Warning("Failed to check status: %v", err) + continue + } + + logger.Info("Report status: %s", status) + + switch status { + case "COMPLETED", "SUCCESS": + logger.Success("Report is ready for download") + return m.downloadReport(ctx, reportUUID, outputFile) + + case "FAILED", "CANCELED": + return fmt.Errorf("report generation failed with status: %s", status) + + case "PENDING", "IN_PROGRESS": + continue + + default: + logger.Warning("Unknown report status: %s", status) + continue + } + } + } +} + +func (m *MendClient) checkReportStatus(ctx context.Context, reportUUID string) (string, error) { + url := fmt.Sprintf("%s/api/v3.0/orgs/%s/reports/%s", m.baseURL, m.orgUUID, reportUUID) + + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return "", err + } + + req.Header.Set("Authorization", "Bearer "+m.jwtToken) + req.Header.Set("Accept", "application/json") + + resp, err := m.httpClient.Do(req) + if err != nil { + return "", err + } + defer func() { + if err := resp.Body.Close(); err != nil { + logger.Warning("Failed to close response body: %v", err) + } + }() + + var statusResp struct { + Response struct { + Status string `json:"status"` + } `json:"response"` + } + + if err := json.NewDecoder(resp.Body).Decode(&statusResp); err != nil { + return "", err + } + + return statusResp.Response.Status, nil +} + +func (m *MendClient) downloadReport(ctx context.Context, reportUUID, outputFile string) error { + logger.Info("Downloading SBOM report (UUID: %s)", reportUUID) + + url := fmt.Sprintf("%s/api/v3.0/orgs/%s/reports/download/%s", + m.baseURL, m.orgUUID, reportUUID) + + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return err + } + + req.Header.Set("Authorization", "Bearer "+m.jwtToken) + req.Header.Set("Accept", "application/json") + + resp, err := m.httpClient.Do(req) + if err != nil { + return fmt.Errorf("download failed: %w", err) + } + defer func() { + if err := resp.Body.Close(); err != nil { + logger.Warning("Failed to close response body: %v", err) + } + }() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("download failed (status %d): %s", resp.StatusCode, string(body)) + } + + // Create output file + outFile, err := os.Create(outputFile) + if err != nil { + return fmt.Errorf("failed to create output file: %w", err) + } + defer func() { + if err := outFile.Close(); err != nil { + logger.Warning("Failed to close file: %v", err) + } + }() + + // Copy response to file + written, err := io.Copy(outFile, resp.Body) + if err != nil { + return fmt.Errorf("failed to write file: %w", err) + } + + logger.Success("Mend SBOM downloaded successfully (%d bytes)", written) + return nil +} diff --git a/internal/sbom/processing.go b/internal/sbom/processing.go new file mode 100644 index 0000000..a31b39b --- /dev/null +++ b/internal/sbom/processing.go @@ -0,0 +1,137 @@ +// Package sbom provides functionalities to interact with Software Bill of Materials (SBOM). +package sbom + +import ( + "encoding/json" + "fmt" + "os" + "os/exec" + + "github.com/ClickHouse/ClickBOM/pkg/logger" +) + +// Format represents the format of a Software Bill of Materials (SBOM). +type Format string + +const ( + // FormatCycloneDX represents the CycloneDX SBOM format. + FormatCycloneDX Format = "cyclonedx" + // FormatSPDXJSON represents the SPDX JSON SBOM format. + FormatSPDXJSON Format = "spdxjson" + // FormatUnknown represents an unknown SBOM format. + FormatUnknown Format = "unknown" +) + +// CycloneDXDocument represents the basic structure of a CycloneDX SBOM. +type CycloneDXDocument struct { + BOMFormat string `json:"bomFormat"` + SpecVersion string `json:"specVersion"` +} + +// SPDXDocument represents the basic structure of an SPDX SBOM. +type SPDXDocument struct { + SPDXVersion string `json:"spdxVersion"` + SPDXID string `json:"SPDXID"` +} + +// DetectSBOMFormat detects the format of the SBOM file based on its content. +func DetectSBOMFormat(filename string) (Format, error) { + logger.Debug("Detecting SBOM format for: %s", filename) + + data, err := os.ReadFile(filename) + if err != nil { + return FormatUnknown, fmt.Errorf("failed to read file: %w", err) + } + + // Try CycloneDX + var cdx CycloneDXDocument + if err := json.Unmarshal(data, &cdx); err == nil { + if cdx.BOMFormat == "CycloneDX" { + logger.Debug("Detected format: CycloneDX") + return FormatCycloneDX, nil + } + } + + // Try SPDX + var spdx SPDXDocument + if err := json.Unmarshal(data, &spdx); err == nil { + if spdx.SPDXVersion != "" { + logger.Debug("Detected format: SPDX") + return FormatSPDXJSON, nil + } + } + + logger.Warning("Unknown SBOM format") + return FormatUnknown, nil +} + +// ExtractSBOMFromWrapper extracts the SBOM from a wrapper format (e.g., GitHub) if necessary. +func ExtractSBOMFromWrapper(inputFile, outputFile string) error { + logger.Debug("Checking if SBOM is wrapped") + + data, err := os.ReadFile(inputFile) + if err != nil { + return fmt.Errorf("failed to read input file: %w", err) + } + + var wrapper map[string]interface{} + if err := json.Unmarshal(data, &wrapper); err != nil { + return fmt.Errorf("failed to parse JSON: %w", err) + } + + // Check if there's an 'sbom' field (GitHub wrapper) + if sbomData, ok := wrapper["sbom"]; ok { + logger.Info("Found wrapped SBOM, extracting...") + + sbomJSON, err := json.MarshalIndent(sbomData, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal SBOM: %w", err) + } + + if err := os.WriteFile(outputFile, sbomJSON, 0644); err != nil { + return fmt.Errorf("failed to write output file: %w", err) + } + + logger.Success("SBOM extracted from wrapper") + return nil + } + + // Not wrapped, just copy + logger.Debug("SBOM is not wrapped") + if err := os.WriteFile(outputFile, data, 0644); err != nil { + return fmt.Errorf("failed to write output file: %w", err) + } + + return nil +} + +// ConvertSBOM converts the SBOM from one format to another. +func ConvertSBOM(inputFile, outputFile string, sourceFormat, targetFormat Format) error { + if sourceFormat == targetFormat { + logger.Info("Source and target formats are the same, copying file") + data, err := os.ReadFile(inputFile) + if err != nil { + return err + } + return os.WriteFile(outputFile, data, 0644) + } + + logger.Info("Converting SBOM from %s to %s", sourceFormat, targetFormat) + + // Use cyclonedx-cli for conversion + cmd := exec.Command("cyclonedx", + "convert", + "--input-file", inputFile, + "--output-file", outputFile, + "--input-format", string(sourceFormat), + "--output-format", string(targetFormat), + ) + + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("conversion failed: %w\nOutput: %s", err, string(output)) + } + + logger.Success("SBOM converted successfully") + return nil +} diff --git a/internal/sbom/processing_integration_test.go b/internal/sbom/processing_integration_test.go new file mode 100644 index 0000000..60cb2f6 --- /dev/null +++ b/internal/sbom/processing_integration_test.go @@ -0,0 +1,117 @@ +//go:build integration + +package storage + +import ( + "os" + "path/filepath" + "testing" + + "github.com/ClickHouse/ClickBOM/internal/sbom" +) + +func TestSBOMProcessing(t *testing.T) { + tempDir := t.TempDir() + + t.Run("Extract SBOM from GitHub wrapper", func(t *testing.T) { + wrappedSBOM := filepath.Join(tempDir, "wrapped.json") + wrappedContent := `{ + "sbom": { + "bomFormat": "CycloneDX", + "specVersion": "1.6", + "components": [] + } + }` + + if err := os.WriteFile(wrappedSBOM, []byte(wrappedContent), 0644); err != nil { + t.Fatalf("Failed to create wrapped SBOM: %v", err) + } + + extractedSBOM := filepath.Join(tempDir, "extracted.json") + if err := sbom.ExtractSBOMFromWrapper(wrappedSBOM, extractedSBOM); err != nil { + t.Fatalf("Failed to extract: %v", err) + } + + // Verify extracted file + data, err := os.ReadFile(extractedSBOM) + if err != nil { + t.Fatalf("Failed to read extracted file: %v", err) + } + + if len(data) == 0 { + t.Error("Extracted file is empty") + } + + t.Log("โœ“ Successfully extracted SBOM from wrapper") + }) + + t.Run("Detect CycloneDX format", func(t *testing.T) { + cdxSBOM := filepath.Join(tempDir, "cyclonedx.json") + cdxContent := `{ + "bomFormat": "CycloneDX", + "specVersion": "1.6", + "components": [] + }` + + if err := os.WriteFile(cdxSBOM, []byte(cdxContent), 0644); err != nil { + t.Fatalf("Failed to create CycloneDX SBOM: %v", err) + } + + format, err := sbom.DetectSBOMFormat(cdxSBOM) + if err != nil { + t.Fatalf("Failed to detect format: %v", err) + } + + if format != sbom.FormatCycloneDX { + t.Errorf("Expected CycloneDX, got %s", format) + } + + t.Log("โœ“ Correctly detected CycloneDX format") + }) + + t.Run("Detect SPDX format", func(t *testing.T) { + spdxSBOM := filepath.Join(tempDir, "spdx.json") + spdxContent := `{ + "spdxVersion": "SPDX-2.3", + "SPDXID": "SPDXRef-DOCUMENT", + "name": "test" + }` + + if err := os.WriteFile(spdxSBOM, []byte(spdxContent), 0644); err != nil { + t.Fatalf("Failed to create SPDX SBOM: %v", err) + } + + format, err := sbom.DetectSBOMFormat(spdxSBOM) + if err != nil { + t.Fatalf("Failed to detect format: %v", err) + } + + if format != sbom.FormatSPDXJSON { + t.Errorf("Expected SPDX, got %s", format) + } + + t.Log("โœ“ Correctly detected SPDX format") + }) + + t.Run("Convert same format (copy)", func(t *testing.T) { + inputSBOM := filepath.Join(tempDir, "input.json") + outputSBOM := filepath.Join(tempDir, "output.json") + + content := `{"bomFormat":"CycloneDX","specVersion":"1.6"}` + if err := os.WriteFile(inputSBOM, []byte(content), 0644); err != nil { + t.Fatalf("Failed to create input SBOM: %v", err) + } + + err := sbom.ConvertSBOM(inputSBOM, outputSBOM, sbom.FormatCycloneDX, sbom.FormatCycloneDX) + if err != nil { + t.Fatalf("Failed to convert: %v", err) + } + + // Verify output exists + if _, err := os.Stat(outputSBOM); os.IsNotExist(err) { + t.Error("Output file was not created") + } + + t.Log("โœ“ Same format conversion (copy) successful") + }) +} diff --git a/internal/sbom/processing_test.go b/internal/sbom/processing_test.go new file mode 100644 index 0000000..f9158ed --- /dev/null +++ b/internal/sbom/processing_test.go @@ -0,0 +1,32 @@ +package sbom + +import ( + "os" + "testing" +) + +func BenchmarkDetectSBOMFormat(b *testing.B) { + // Create test SBOM file + testFile := "/tmp/bench-sbom.json" + testContent := []byte(`{ + "bomFormat": "CycloneDX", + "specVersion": "1.6", + "components": [] + }`) + if err := os.WriteFile(testFile, testContent, 0644); err != nil { + b.Fatalf("Failed to create test file: %v", err) + } + defer func() { + if err := os.Remove(testFile); err != nil { + b.Fatalf("Failed to remove test file: %v", err) + } + }() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := DetectSBOMFormat(testFile) + if err != nil { + b.Fatalf("DetectSBOMFormat failed: %v", err) + } + } +} diff --git a/internal/sbom/wiz.go b/internal/sbom/wiz.go new file mode 100644 index 0000000..b8a7d6b --- /dev/null +++ b/internal/sbom/wiz.go @@ -0,0 +1,219 @@ +// Package sbom provides functionalities to interact with Software Bill of Materials (SBOM). +package sbom + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "time" + + "github.com/ClickHouse/ClickBOM/internal/config" + "github.com/ClickHouse/ClickBOM/pkg/logger" +) + +// WizClient handles interactions with the Wiz API. +type WizClient struct { + authEndpoint string + apiEndpoint string + clientID string + clientSecret string + reportID string + httpClient *http.Client + accessToken string +} + +// NewWizClient creates a new WizClient with the provided configuration. +func NewWizClient(cfg *config.Config) *WizClient { + return &WizClient{ + authEndpoint: cfg.WizAuthEndpoint, + apiEndpoint: cfg.WizAPIEndpoint, + clientID: cfg.WizClientID, + clientSecret: cfg.WizClientSecret, + reportID: cfg.WizReportID, + httpClient: &http.Client{ + Timeout: 10 * time.Minute, + }, + } +} + +func (w *WizClient) authenticate(ctx context.Context) error { + logger.Info("Authenticating with Wiz API") + + data := map[string]string{ + "grant_type": "client_credentials", + "client_id": w.clientID, + "client_secret": w.clientSecret, + "audience": "wiz-api", + } + + jsonData, err := json.Marshal(data) + if err != nil { + return fmt.Errorf("failed to marshal auth data: %w", err) + } + + req, err := http.NewRequestWithContext(ctx, "POST", w.authEndpoint, bytes.NewReader(jsonData)) + if err != nil { + return fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Accept", "application/json") + + resp, err := w.httpClient.Do(req) + if err != nil { + return fmt.Errorf("authentication request failed: %w", err) + } + defer func() { + if err := resp.Body.Close(); err != nil { + logger.Warning("Failed to close response body: %v", err) + } + }() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("authentication failed (status %d): %s", resp.StatusCode, string(body)) + } + + var authResp struct { + AccessToken string `json:"access_token"` + } + + if err := json.NewDecoder(resp.Body).Decode(&authResp); err != nil { + return fmt.Errorf("failed to parse auth response: %w", err) + } + + if authResp.AccessToken == "" { + return fmt.Errorf("no access token in response") + } + + w.accessToken = authResp.AccessToken + logger.Success("Wiz authentication successful") + return nil +} + +// DownloadReport downloads the Wiz report and saves it to the specified output file. +func (w *WizClient) DownloadReport(ctx context.Context, outputFile string) error { + logger.Info("Downloading Wiz report: %s", w.reportID) + + // Authenticate first + if err := w.authenticate(ctx); err != nil { + return fmt.Errorf("authentication failed: %w", err) + } + + // GraphQL query to get download URL + query := fmt.Sprintf(`{ + "query": "query ReportDownloadUrl($reportId: ID!) { report(id: $reportId) { lastRun { url } } }", + "variables": { + "reportId": "%s" + } + }`, w.reportID) + + req, err := http.NewRequestWithContext(ctx, "POST", + w.apiEndpoint+"/api/graphql", + bytes.NewReader([]byte(query))) + if err != nil { + return fmt.Errorf("failed to create GraphQL request: %w", err) + } + + req.Header.Set("Authorization", "Bearer "+w.accessToken) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Accept", "application/json") + + resp, err := w.httpClient.Do(req) + if err != nil { + return fmt.Errorf("GraphQL request failed: %w", err) + } + defer func() { + if err := resp.Body.Close(); err != nil { + logger.Warning("Failed to close response body: %v", err) + } + }() + + var graphqlResp struct { + Data struct { + Report struct { + LastRun struct { + URL string `json:"url"` + } `json:"lastRun"` + } `json:"report"` + } `json:"data"` + Errors []struct { + Message string `json:"message"` + } `json:"errors"` + } + + if err := json.NewDecoder(resp.Body).Decode(&graphqlResp); err != nil { + return fmt.Errorf("failed to parse GraphQL response: %w", err) + } + + if len(graphqlResp.Errors) > 0 { + return fmt.Errorf("a Wiz GraphQL error has occurred: %s", graphqlResp.Errors[0].Message) + } + + downloadURL := graphqlResp.Data.Report.LastRun.URL + if downloadURL == "" { + return fmt.Errorf("no download URL found in response") + } + + logger.Info("Got download URL from Wiz") + + // Download the report + return w.downloadFromURL(ctx, downloadURL, outputFile) +} + +func (w *WizClient) downloadFromURL(ctx context.Context, url, outputFile string) error { + logger.Info("Downloading Wiz report from URL") + + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return fmt.Errorf("failed to create download request: %w", err) + } + + req.Header.Set("Authorization", "Bearer "+w.accessToken) + req.Header.Set("Accept", "application/json") + + resp, err := w.httpClient.Do(req) + if err != nil { + return fmt.Errorf("download failed: %w", err) + } + defer func() { + if err := resp.Body.Close(); err != nil { + logger.Warning("Failed to close response body: %v", err) + } + }() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("download failed (status %d): %s", resp.StatusCode, string(body)) + } + + // Create output file + outFile, err := os.Create(outputFile) + if err != nil { + return fmt.Errorf("failed to create output file: %w", err) + } + defer func() { + if err := outFile.Close(); err != nil { + logger.Warning("Failed to close file: %v", err) + } + }() + + // Copy response to file + written, err := io.Copy(outFile, resp.Body) + if err != nil { + return fmt.Errorf("failed to write file: %w", err) + } + + logger.Success("Wiz report downloaded successfully (%d bytes)", written) + + // Validate JSON + if err := validateJSON(outputFile); err != nil { + return fmt.Errorf("downloaded file is not valid JSON: %w", err) + } + + return nil +} diff --git a/internal/storage/clickhouse.go b/internal/storage/clickhouse.go new file mode 100644 index 0000000..c7a68d1 --- /dev/null +++ b/internal/storage/clickhouse.go @@ -0,0 +1,325 @@ +// Package storage provides functionalities to interact with storage backends like ClickHouse. +package storage + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "os" + "strings" + "time" + + "github.com/ClickHouse/ClickBOM/internal/config" + "github.com/ClickHouse/ClickBOM/pkg/logger" +) + +// ClickHouseClient handles interactions with ClickHouse database. +type ClickHouseClient struct { + url string + database string + username string + password string + truncate bool + httpClient *http.Client +} + +// NewClickHouseClient creates a new ClickHouseClient with the provided configuration. +func NewClickHouseClient(cfg *config.Config) (*ClickHouseClient, error) { + return &ClickHouseClient{ + url: cfg.ClickHouseURL, + database: cfg.ClickHouseDatabase, + username: cfg.ClickHouseUsername, + password: cfg.ClickHousePassword, + truncate: cfg.TruncateTable, + httpClient: &http.Client{ + Timeout: 5 * time.Minute, + }, + }, nil +} + +func (c *ClickHouseClient) executeQuery(ctx context.Context, query string) error { + req, err := http.NewRequestWithContext(ctx, "POST", c.url, strings.NewReader(query)) + if err != nil { + return fmt.Errorf("failed to create request: %w", err) + } + + if c.username != "" { + req.SetBasicAuth(c.username, c.password) + } + + req.Header.Set("Content-Type", "text/plain") + + resp, err := c.httpClient.Do(req) + if err != nil { + return fmt.Errorf("request failed: %w", err) + } + defer func() { + if err := resp.Body.Close(); err != nil { + logger.Warning("Failed to close response body: %v", err) + } + }() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("query failed (status %d): %s", resp.StatusCode, string(body)) + } + + return nil +} + +func (c *ClickHouseClient) queryScalar(ctx context.Context, query string) (string, error) { + req, err := http.NewRequestWithContext(ctx, "POST", c.url, strings.NewReader(query)) + if err != nil { + return "", fmt.Errorf("failed to create request: %w", err) + } + + if c.username != "" { + req.SetBasicAuth(c.username, c.password) + } + + resp, err := c.httpClient.Do(req) + if err != nil { + return "", fmt.Errorf("request failed: %w", err) + } + defer func() { + if err := resp.Body.Close(); err != nil { + logger.Warning("Failed to close response body: %v", err) + } + }() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return "", fmt.Errorf("query failed (status %d): %s", resp.StatusCode, string(body)) + } + + result, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + + return strings.TrimSpace(string(result)), nil +} + +// SetupTable prepares the ClickHouse table for data insertion. +func (c *ClickHouseClient) SetupTable(ctx context.Context, tableName string) error { + logger.Info("Setting up ClickHouse table: %s", tableName) + + // Test connection + logger.Debug("Testing ClickHouse connection...") + if err := c.executeQuery(ctx, "SELECT 1"); err != nil { + logger.Error("ClickHouse connection test failed") + return fmt.Errorf("connection test failed: %w", err) + } + logger.Success("ClickHouse connection successful") + + // Check if table exists + checkQuery := fmt.Sprintf( + "SELECT COUNT(*) FROM system.tables WHERE database='%s' AND name='%s'", + c.database, tableName) + + result, err := c.queryScalar(ctx, checkQuery) + if err != nil { + return fmt.Errorf("failed to check table existence: %w", err) + } + + if result == "1" { + logger.Info("Table %s already exists", tableName) + + // Check and migrate if needed + if err := c.checkAndMigrateTable(ctx, tableName); err != nil { + return fmt.Errorf("table migration failed: %w", err) + } + + if c.truncate { + logger.Info("Truncating existing table: %s", tableName) + truncateQuery := fmt.Sprintf("TRUNCATE TABLE %s.%s", c.database, tableName) + if err := c.executeQuery(ctx, truncateQuery); err != nil { + return fmt.Errorf("failed to truncate table: %w", err) + } + logger.Success("Table %s truncated", tableName) + } else { + logger.Info("New data will be appended to existing table: %s", tableName) + } + } else { + logger.Info("Creating new table: %s", tableName) + createQuery := fmt.Sprintf(` + CREATE TABLE %s.%s ( + name String, + version String, + license String, + source LowCardinality(String), + inserted_at DateTime DEFAULT now() + ) ENGINE = MergeTree() + ORDER BY (name, version, license) + `, c.database, tableName) + + if err := c.executeQuery(ctx, createQuery); err != nil { + return fmt.Errorf("failed to create table: %w", err) + } + logger.Success("Table %s created successfully", tableName) + } + + return nil +} + +func (c *ClickHouseClient) checkAndMigrateTable(ctx context.Context, tableName string) error { + logger.Info("Checking if table %s needs migration for source column", tableName) + + // Check if source column exists + checkQuery := fmt.Sprintf( + "SELECT COUNT(*) FROM system.columns WHERE database='%s' AND table='%s' AND name='source'", + c.database, tableName) + + result, err := c.queryScalar(ctx, checkQuery) + if err != nil { + return fmt.Errorf("failed to check column existence: %w", err) + } + + if result == "0" { + logger.Info("source column not found, migrating table: %s", tableName) + + alterQuery := fmt.Sprintf( + "ALTER TABLE %s.%s ADD COLUMN source LowCardinality(String) DEFAULT 'unknown'", + c.database, tableName) + + if err := c.executeQuery(ctx, alterQuery); err != nil { + return fmt.Errorf("failed to add source column: %w", err) + } + + logger.Success("source column added to table %s", tableName) + } else { + logger.Info("source column already exists in table %s", tableName) + } + + return nil +} + +// InsertSBOMData extracts components from the SBOM and inserts them into the ClickHouse table. +func (c *ClickHouseClient) InsertSBOMData(ctx context.Context, sbomFile, tableName, sbomFormat string) error { + logger.Info("Extracting components from %s SBOM for ClickHouse", sbomFormat) + + // Read SBOM file + data, err := os.ReadFile(sbomFile) + if err != nil { + return fmt.Errorf("failed to read SBOM file: %w", err) + } + + var components []map[string]interface{} + + // Parse based on format + switch sbomFormat { + case "cyclonedx": + var cdx struct { + Components []map[string]interface{} `json:"components"` + } + if err := json.Unmarshal(data, &cdx); err != nil { + return fmt.Errorf("failed to parse CycloneDX: %w", err) + } + components = cdx.Components + + case "spdxjson": + var spdx struct { + Packages []map[string]interface{} `json:"packages"` + } + if err := json.Unmarshal(data, &spdx); err != nil { + return fmt.Errorf("failed to parse SPDX: %w", err) + } + components = spdx.Packages + + default: + return fmt.Errorf("unsupported SBOM format: %s", sbomFormat) + } + + if len(components) == 0 { + logger.Warning("No components found in SBOM") + return nil + } + + logger.Info("Found %d components to insert", len(components)) + + // Build TSV data + var tsvData bytes.Buffer + for _, comp := range components { + name := getStringField(comp, "name", "unknown") + version := getStringField(comp, "version", "unknown") + license := extractLicense(comp) + source := getStringField(comp, "source", "unknown") + + fmt.Fprintf(&tsvData, "%s\t%s\t%s\t%s\n", name, version, license, source) + } + + // Insert data + insertURL := fmt.Sprintf("%s/?query=%s", + c.url, + url.QueryEscape(fmt.Sprintf( + "INSERT INTO %s.%s (name, version, license, source) FORMAT TSV", + c.database, tableName))) + + req, err := http.NewRequestWithContext(ctx, "POST", insertURL, &tsvData) + if err != nil { + return fmt.Errorf("failed to create insert request: %w", err) + } + + if c.username != "" { + req.SetBasicAuth(c.username, c.password) + } + + req.Header.Set("Content-Type", "text/tab-separated-values") + + resp, err := c.httpClient.Do(req) + if err != nil { + return fmt.Errorf("insert request failed: %w", err) + } + defer func() { + if err := resp.Body.Close(); err != nil { + logger.Warning("Failed to close response body: %v", err) + } + }() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return fmt.Errorf("insert failed (status %d): %s", resp.StatusCode, string(body)) + } + + logger.Success("Inserted %d components into ClickHouse table %s", len(components), tableName) + return nil +} + +func getStringField(m map[string]interface{}, key, defaultVal string) string { + if val, ok := m[key]; ok { + if str, ok := val.(string); ok { + return str + } + } + return defaultVal +} + +func extractLicense(comp map[string]interface{}) string { + // Try CycloneDX licenses array + if licenses, ok := comp["licenses"].([]interface{}); ok && len(licenses) > 0 { + if lic, ok := licenses[0].(map[string]interface{}); ok { + if license, ok := lic["license"].(map[string]interface{}); ok { + if id, ok := license["id"].(string); ok && id != "" { + return id + } + if name, ok := license["name"].(string); ok && name != "" { + return name + } + } + } + } + + // Try SPDX fields + if concluded, ok := comp["licenseConcluded"].(string); ok && concluded != "" { + return concluded + } + if declared, ok := comp["licenseDeclared"].(string); ok && declared != "" { + return declared + } + + return "unknown" +} diff --git a/internal/storage/clickhouse_integration_test.go b/internal/storage/clickhouse_integration_test.go new file mode 100644 index 0000000..8b61355 --- /dev/null +++ b/internal/storage/clickhouse_integration_test.go @@ -0,0 +1,104 @@ +//go:build integration + +package storage + +import ( + "context" + "os" + "testing" + + "github.com/ClickHouse/ClickBOM/internal/config" +) + +func TestClickHouseIntegration(t *testing.T) { + if os.Getenv("CLICKHOUSE_URL") == "" { + t.Skip("Skipping integration test - CLICKHOUSE_URL not set") + } + + ctx := context.Background() + + // Create ClickHouse client + cfg := &config.Config{ + ClickHouseURL: os.Getenv("CLICKHOUSE_URL"), + ClickHouseDatabase: "default", + ClickHouseUsername: "default", + ClickHousePassword: "", + TruncateTable: true, + } + + chClient, err := storage.NewClickHouseClient(cfg) + if err != nil { + t.Fatalf("Failed to create ClickHouse client: %v", err) + } + + tableName := "test_sbom_components" + + t.Run("Setup ClickHouse table", func(t *testing.T) { + err := chClient.SetupTable(ctx, tableName) + if err != nil { + t.Fatalf("Failed to setup table: %v", err) + } + + t.Logf("โœ“ Successfully set up table: %s", tableName) + }) + + t.Run("Insert SBOM data", func(t *testing.T) { + // Create test SBOM file + testSBOM := `/tmp/test-clickhouse-sbom.json` + testContent := `{ + "bomFormat": "CycloneDX", + "specVersion": "1.6", + "components": [ + { + "name": "lodash", + "version": "4.17.21", + "type": "library", + "licenses": [ + { + "license": { + "id": "MIT" + } + } + ], + "source": "github" + }, + { + "name": "react", + "version": "18.2.0", + "type": "library", + "licenses": [ + { + "license": { + "id": "MIT" + } + } + ], + "source": "github" + } + ] + }` + + if err := os.WriteFile(testSBOM, []byte(testContent), 0644); err != nil { + t.Fatalf("Failed to create test SBOM: %v", err) + } + defer os.Remove(testSBOM) + + // Insert data + err := chClient.InsertSBOMData(ctx, testSBOM, tableName, "cyclonedx") + if err != nil { + t.Fatalf("Failed to insert data: %v", err) + } + + t.Log("โœ“ Successfully inserted SBOM data into ClickHouse") + }) + + t.Run("Verify table migration", func(t *testing.T) { + // This tests the source column migration + err := chClient.SetupTable(ctx, tableName) + if err != nil { + t.Fatalf("Failed during table setup/migration: %v", err) + } + + t.Log("โœ“ Table migration check passed") + }) +} diff --git a/internal/storage/e2e_test.go b/internal/storage/e2e_test.go new file mode 100644 index 0000000..5a094bc --- /dev/null +++ b/internal/storage/e2e_test.go @@ -0,0 +1,141 @@ +//go:build integration + +package storage + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/ClickHouse/ClickBOM/internal/config" + "github.com/ClickHouse/ClickBOM/internal/sbom" +) + +func TestEndToEndWorkflow(t *testing.T) { + if os.Getenv("AWS_ENDPOINT_URL") == "" || os.Getenv("CLICKHOUSE_URL") == "" { + t.Skip("Skipping E2E test - AWS_ENDPOINT_URL or CLICKHOUSE_URL not set") + } + + ctx := context.Background() + + // Create temp directory for test files + tempDir := t.TempDir() + + t.Run("Complete SBOM workflow", func(t *testing.T) { + // Step 1: Create a mock SBOM + originalSBOM := filepath.Join(tempDir, "original.json") + sbomContent := `{ + "sbom": { + "spdxVersion": "SPDX-2.3", + "SPDXID": "SPDXRef-DOCUMENT", + "name": "test-document", + "packages": [ + { + "name": "test-package", + "versionInfo": "1.0.0", + "licenseConcluded": "MIT" + } + ] + } + }` + + if err := os.WriteFile(originalSBOM, []byte(sbomContent), 0644); err != nil { + t.Fatalf("Failed to create test SBOM: %v", err) + } + + t.Log("โœ“ Created test SBOM file") + + // Step 2: Extract from wrapper + extractedSBOM := filepath.Join(tempDir, "extracted.json") + if err := sbom.ExtractSBOMFromWrapper(originalSBOM, extractedSBOM); err != nil { + t.Fatalf("Failed to extract SBOM: %v", err) + } + + t.Log("โœ“ Extracted SBOM from wrapper") + + // Step 3: Detect format + format, err := sbom.DetectSBOMFormat(extractedSBOM) + if err != nil { + t.Fatalf("Failed to detect format: %v", err) + } + + if format != sbom.FormatSPDXJSON { + t.Errorf("Expected SPDX format, got %s", format) + } + + t.Logf("โœ“ Detected format: %s", format) + + // Step 4: Convert to CycloneDX + convertedSBOM := filepath.Join(tempDir, "converted.json") + if err := sbom.ConvertSBOM(extractedSBOM, convertedSBOM, format, sbom.FormatCycloneDX); err != nil { + t.Fatalf("Failed to convert SBOM: %v", err) + } + + t.Log("โœ“ Converted SBOM to CycloneDX") + + // Step 5: Upload to S3 + s3Client, err := storage.NewS3Client( + ctx, + os.Getenv("AWS_ACCESS_KEY_ID"), + os.Getenv("AWS_SECRET_ACCESS_KEY"), + os.Getenv("AWS_DEFAULT_REGION"), + ) + if err != nil { + t.Fatalf("Failed to create S3 client: %v", err) + } + + testBucket := "test-bucket" + testKey := "e2e-test.json" + + if err := s3Client.Upload(ctx, convertedSBOM, testBucket, testKey, "cyclonedx"); err != nil { + t.Fatalf("Failed to upload to S3: %v", err) + } + + t.Log("โœ“ Uploaded SBOM to S3") + + // Step 6: Insert into ClickHouse + cfg := &config.Config{ + ClickHouseURL: os.Getenv("CLICKHOUSE_URL"), + ClickHouseDatabase: "default", + ClickHouseUsername: "default", + ClickHousePassword: "", + TruncateTable: true, + } + + chClient, err := storage.NewClickHouseClient(cfg) + if err != nil { + t.Fatalf("Failed to create ClickHouse client: %v", err) + } + + tableName := "e2e_test_sbom" + + if err := chClient.SetupTable(ctx, tableName); err != nil { + t.Fatalf("Failed to setup ClickHouse table: %v", err) + } + + if err := chClient.InsertSBOMData(ctx, convertedSBOM, tableName, "cyclonedx"); err != nil { + t.Fatalf("Failed to insert into ClickHouse: %v", err) + } + + t.Log("โœ“ Inserted data into ClickHouse") + + // Step 7: Download from S3 and verify + downloadedSBOM := filepath.Join(tempDir, "downloaded.json") + if err := s3Client.Download(ctx, testBucket, testKey, downloadedSBOM); err != nil { + t.Fatalf("Failed to download from S3: %v", err) + } + + // Verify downloaded file exists and has content + downloadedData, err := os.ReadFile(downloadedSBOM) + if err != nil { + t.Fatalf("Failed to read downloaded file: %v", err) + } + + if len(downloadedData) == 0 { + t.Error("Downloaded file is empty") + } + + t.Log("โœ“ Downloaded and verified SBOM from S3") + }) +} diff --git a/internal/storage/s3.go b/internal/storage/s3.go new file mode 100644 index 0000000..69cc5d1 --- /dev/null +++ b/internal/storage/s3.go @@ -0,0 +1,136 @@ +// Package storage provides functionalities to interact with storage backends like S3. +package storage + +import ( + "context" + "fmt" + "io" + "os" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/credentials" + "github.com/aws/aws-sdk-go-v2/service/s3" + + "github.com/ClickHouse/ClickBOM/pkg/logger" +) + +// S3Client handles interactions with Amazon S3. +type S3Client struct { + client *s3.Client +} + +// NewS3Client creates a new S3Client with the provided AWS credentials and region. +func NewS3Client(ctx context.Context, accessKeyID, secretAccessKey, region string) (*S3Client, error) { + cfg, err := config.LoadDefaultConfig(ctx, + config.WithRegion(region), + config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider( + accessKeyID, + secretAccessKey, + "", + )), + ) + if err != nil { + return nil, fmt.Errorf("failed to load AWS config: %w", err) + } + + return &S3Client{ + client: s3.NewFromConfig(cfg), + }, nil +} + +// Upload uploads a file to the specified S3 bucket and key. +func (s *S3Client) Upload(ctx context.Context, localFile, bucket, key, sbomFormat string) error { + logger.Info("Uploading %s SBOM to s3://%s/%s", sbomFormat, bucket, key) + + file, err := os.Open(localFile) + if err != nil { + return fmt.Errorf("failed to open file: %w", err) + } + defer func() { + if err := file.Close(); err != nil { + logger.Warning("Failed to close file: %v", err) + } + }() + + contentType := "application/json" + + _, err = s.client.PutObject(ctx, &s3.PutObjectInput{ + Bucket: aws.String(bucket), + Key: aws.String(key), + Body: file, + ContentType: aws.String(contentType), + Metadata: map[string]string{ + "format": sbomFormat, + "source": "github-action", + }, + }) + + if err != nil { + return fmt.Errorf("failed to upload SBOM to S3: %w", err) + } + + logger.Success("SBOM uploaded successfully to S3") + return nil +} + +// Download downloads a file from the specified S3 bucket and key to a local file. +func (s *S3Client) Download(ctx context.Context, bucket, key, localFile string) error { + logger.Debug("Downloading s3://%s/%s to %s", bucket, key, localFile) + + result, err := s.client.GetObject(ctx, &s3.GetObjectInput{ + Bucket: aws.String(bucket), + Key: aws.String(key), + }) + if err != nil { + return fmt.Errorf("failed to download from S3: %w", err) + } + defer func() { + if err := result.Body.Close(); err != nil { + logger.Warning("Failed to close response body: %v", err) + } + }() + + file, err := os.Create(localFile) + if err != nil { + return fmt.Errorf("failed to create local file: %w", err) + } + defer func() { + if err := file.Close(); err != nil { + logger.Warning("Failed to close file: %v", err) + } + }() + + _, err = io.Copy(file, result.Body) + if err != nil { + return fmt.Errorf("failed to write file: %w", err) + } + + return nil +} + +// ListObjects lists the object keys in the specified S3 bucket with the given prefix. +func (s *S3Client) ListObjects(ctx context.Context, bucket, prefix string) ([]string, error) { + logger.Debug("Listing objects in s3://%s with prefix: %s", bucket, prefix) + + var keys []string + + paginator := s3.NewListObjectsV2Paginator(s.client, &s3.ListObjectsV2Input{ + Bucket: aws.String(bucket), + Prefix: aws.String(prefix), + }) + + for paginator.HasMorePages() { + page, err := paginator.NextPage(ctx) + if err != nil { + return nil, fmt.Errorf("failed to list S3 objects: %w", err) + } + + for _, obj := range page.Contents { + keys = append(keys, *obj.Key) + } + } + + logger.Info("Found %d objects in S3", len(keys)) + return keys, nil +} diff --git a/internal/storage/s3_integration_test.go b/internal/storage/s3_integration_test.go new file mode 100644 index 0000000..63d20af --- /dev/null +++ b/internal/storage/s3_integration_test.go @@ -0,0 +1,111 @@ +//go:build integration + +package storage + +import ( + "context" + "os" + "testing" +) + +func TestS3Integration(t *testing.T) { + // Skip if not in integration test mode + if os.Getenv("AWS_ENDPOINT_URL") == "" { + t.Skip("Skipping integration test - AWS_ENDPOINT_URL not set") + } + + ctx := context.Background() + + // Create S3 client + s3Client, err := storage.NewS3Client( + ctx, + os.Getenv("AWS_ACCESS_KEY_ID"), + os.Getenv("AWS_SECRET_ACCESS_KEY"), + os.Getenv("AWS_DEFAULT_REGION"), + ) + if err != nil { + t.Fatalf("Failed to create S3 client: %v", err) + } + + // Test data + testBucket := "test-bucket" + testKey := "test-sbom.json" + testContent := `{ + "bomFormat": "CycloneDX", + "specVersion": "1.6", + "serialNumber": "urn:uuid:test-123", + "version": 1, + "components": [ + { + "name": "test-component", + "version": "1.0.0", + "type": "library" + } + ] + }` + + t.Run("Upload SBOM to S3", func(t *testing.T) { + // Create test file + testFile := "/tmp/test-sbom.json" + if err := os.WriteFile(testFile, []byte(testContent), 0644); err != nil { + t.Fatalf("Failed to create test file: %v", err) + } + defer os.Remove(testFile) + + // Upload + err := s3Client.Upload(ctx, testFile, testBucket, testKey, "cyclonedx") + if err != nil { + t.Fatalf("Failed to upload: %v", err) + } + + t.Log("โœ“ Successfully uploaded SBOM to S3") + }) + + t.Run("Download SBOM from S3", func(t *testing.T) { + downloadFile := "/tmp/downloaded-sbom.json" + defer os.Remove(downloadFile) + + err := s3Client.Download(ctx, testBucket, testKey, downloadFile) + if err != nil { + t.Fatalf("Failed to download: %v", err) + } + + // Verify content + downloaded, err := os.ReadFile(downloadFile) + if err != nil { + t.Fatalf("Failed to read downloaded file: %v", err) + } + + if len(downloaded) == 0 { + t.Error("Downloaded file is empty") + } + + t.Logf("โœ“ Successfully downloaded SBOM from S3 (%d bytes)", len(downloaded)) + }) + + t.Run("List S3 objects", func(t *testing.T) { + keys, err := s3Client.ListObjects(ctx, testBucket, "") + if err != nil { + t.Fatalf("Failed to list objects: %v", err) + } + + if len(keys) == 0 { + t.Error("No objects found in bucket") + } + + // Verify our test file is in the list + found := false + for _, key := range keys { + if key == testKey { + found = true + break + } + } + + if !found { + t.Errorf("Test file %s not found in bucket listing", testKey) + } + + t.Logf("โœ“ Found %d objects in S3 bucket", len(keys)) + }) +} diff --git a/internal/storage/s3_test.go b/internal/storage/s3_test.go new file mode 100644 index 0000000..7ced2bb --- /dev/null +++ b/internal/storage/s3_test.go @@ -0,0 +1,66 @@ +//go:build integration +// +build integration + +package storage + +import ( + "context" + "os" + "testing" +) + +func TestS3Upload(t *testing.T) { + // Skip if not in integration test mode + if os.Getenv("AWS_ENDPOINT_URL") == "" { + t.Skip("Skipping integration test - AWS_ENDPOINT_URL not set") + } + + ctx := context.Background() + + // Create S3 client + s3Client, err := storage.NewS3Client( + ctx, + "test", + "test", + "us-east-1", + ) + if err != nil { + t.Fatalf("Failed to create S3 client: %v", err) + } + + // Create test file + testFile := "/tmp/test-sbom.json" + testContent := []byte(`{"bomFormat":"CycloneDX","specVersion":"1.6"}`) + if err := os.WriteFile(testFile, testContent, 0644); err != nil { + t.Fatalf("Failed to create test file: %v", err) + } + defer os.Remove(testFile) + + // Upload + bucket := "test-bucket" + key := "test.json" + + err = s3Client.Upload(ctx, testFile, bucket, key, "cyclonedx") + if err != nil { + t.Fatalf("Failed to upload: %v", err) + } + + // Download and verify + downloadFile := "/tmp/downloaded-sbom.json" + defer os.Remove(downloadFile) + + err = s3Client.Download(ctx, bucket, key, downloadFile) + if err != nil { + t.Fatalf("Failed to download: %v", err) + } + + // Verify content + downloaded, err := os.ReadFile(downloadFile) + if err != nil { + t.Fatalf("Failed to read downloaded file: %v", err) + } + + if string(downloaded) != string(testContent) { + t.Errorf("Downloaded content doesn't match. Got %s, want %s", downloaded, testContent) + } +} diff --git a/internal/validation/sanitize.go b/internal/validation/sanitize.go new file mode 100644 index 0000000..ab8722b --- /dev/null +++ b/internal/validation/sanitize.go @@ -0,0 +1,205 @@ +// Package validation provides functions to sanitize and validate various types of input data. +package validation + +import ( + "fmt" + "regexp" + "strings" + "unicode" +) + +var ( + repoRegex = regexp.MustCompile(`^[a-zA-Z0-9._-]+/[a-zA-Z0-9._-]+$`) + emailRegex = regexp.MustCompile(`^[a-zA-Z0-9._+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`) + s3BucketRegex = regexp.MustCompile(`^[a-z0-9][a-z0-9.-]{1,61}[a-z0-9]$`) + uuidRegex = regexp.MustCompile(`^[0-9a-fA-F]{8}-?([0-9a-fA-F]{4}-?){3}[0-9a-fA-F]{12}$`) + httpURLRegex = regexp.MustCompile(`^https?://[a-zA-Z0-9][a-zA-Z0-9.-]*(:[0-9]+)?/?$`) +) + +// SanitizeString removes potentially dangerous characters from a string +func SanitizeString(input string, maxLength int) string { + // Remove null bytes and control characters + var result strings.Builder + for _, r := range input { + if r == 0 || r < 32 || r == 127 { + continue + } + if r > 127 { + continue + } + // Remove dangerous characters + if strings.ContainsRune("$(){}|;&<>`@[]", r) { + continue + } + result.WriteRune(r) + } + + sanitized := result.String() + if len(sanitized) > maxLength { + return sanitized[:maxLength] + } + return sanitized +} + +// SanitizeRepository ensures the repository string is in 'owner/repo' format +func SanitizeRepository(repo string) (string, error) { + sanitized := removeChars(repo, `[^a-zA-Z0-9._/-]`) + + if !repoRegex.MatchString(sanitized) { + return "", fmt.Errorf("invalid repository format: %s (must be 'owner/repo')", repo) + } + + return sanitized, nil +} + +// SanitizeEmail ensures the email string is in a valid format +func SanitizeEmail(email string) (string, error) { + // Remove control characters and newlines + sanitized := strings.TrimSpace(email) + sanitized = strings.ReplaceAll(sanitized, "\n", "") + sanitized = strings.ReplaceAll(sanitized, "\r", "") + sanitized = strings.ReplaceAll(sanitized, "\t", "") + + // Remove dangerous characters but keep email-valid ones + sanitized = removeChars(sanitized, `[^a-zA-Z0-9@._+-]`) + + if !emailRegex.MatchString(sanitized) { + return "", fmt.Errorf("invalid email format: %s", email) + } + + return sanitized, nil +} + +// SanitizeS3Bucket ensures the S3 bucket string is in a valid format +func SanitizeS3Bucket(bucket string) (string, error) { + // Convert to lowercase + sanitized := strings.ToLower(bucket) + + // Remove invalid characters + sanitized = removeChars(sanitized, `[^a-z0-9.-]`) + + if !s3BucketRegex.MatchString(sanitized) { + return "", fmt.Errorf("invalid S3 bucket name: %s", bucket) + } + + // Check for IP-like format + if regexp.MustCompile(`^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$`).MatchString(sanitized) { + return "", fmt.Errorf("S3 bucket name cannot be IP address format: %s", bucket) + } + + return sanitized, nil +} + +// SanitizeS3Key ensures the S3 key string is in a valid format +func SanitizeS3Key(key string) (string, error) { + // Remove dangerous characters + sanitized := removeChars(key, `[^a-zA-Z0-9._/-]`) + + // Remove path traversal + sanitized = strings.ReplaceAll(sanitized, "..", "") + + // Remove multiple slashes + for strings.Contains(sanitized, "//") { + sanitized = strings.ReplaceAll(sanitized, "//", "/") + } + + // Remove leading/trailing slashes + sanitized = strings.Trim(sanitized, "/") + + if sanitized == "" { + return "", fmt.Errorf("invalid S3 key: cannot be empty") + } + + return sanitized, nil +} + +// SanitizeURL ensures the URL string is in a valid format +func SanitizeURL(url, urlType string) (string, error) { + // Remove control characters + sanitized := removeControlChars(url) + + var valid bool + switch urlType { + case "mend", "wiz": + // Must be HTTPS + valid = strings.HasPrefix(sanitized, "https://") && + regexp.MustCompile(`^https://[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}(/.*)?$`).MatchString(sanitized) + case "clickhouse": + valid = httpURLRegex.MatchString(sanitized) + default: + valid = httpURLRegex.MatchString(sanitized) + } + + if !valid { + return "", fmt.Errorf("invalid %s URL format: %s", urlType, url) + } + + return sanitized, nil +} + +// SanitizeUUID ensures the UUID string is in a valid format +func SanitizeUUID(uuid, fieldName string) (string, error) { + sanitized := removeChars(uuid, `[^a-fA-F0-9-]`) + + if !uuidRegex.MatchString(sanitized) { + return "", fmt.Errorf("invalid UUID format for %s: %s", fieldName, uuid) + } + + return sanitized, nil +} + +// SanitizeNumeric ensures the numeric string is in a valid format +func SanitizeNumeric(value string, fieldName string, minimum, maximum int) (int, error) { + sanitized := removeChars(value, `[^0-9]`) + + if sanitized == "" { + return 0, fmt.Errorf("invalid numeric value for %s: %s", fieldName, value) + } + + var num int + _, err := fmt.Sscanf(sanitized, "%d", &num) + if err != nil { + return 0, fmt.Errorf("invalid numeric value for %s: %s", fieldName, value) + } + + if num < minimum || num > maximum { + return 0, fmt.Errorf("numeric value for %s out of range (%d-%d): %d", fieldName, minimum, maximum, num) + } + + return num, nil +} + +// SanitizePatterns ensures the patterns string is in a valid format +func SanitizePatterns(patterns string) string { + if patterns == "" { + return "" + } + + parts := strings.Split(patterns, ",") + var sanitized []string + + for _, pattern := range parts { + pattern = strings.TrimSpace(pattern) + pattern = removeChars(pattern, `[^a-zA-Z0-9.*_-]`) + if pattern != "" { + sanitized = append(sanitized, pattern) + } + } + + return strings.Join(sanitized, ",") +} + +func removeChars(s, pattern string) string { + re := regexp.MustCompile(pattern) + return re.ReplaceAllString(s, "") +} + +func removeControlChars(s string) string { + var result strings.Builder + for _, r := range s { + if !unicode.IsControl(r) { + result.WriteRune(r) + } + } + return result.String() +} diff --git a/internal/validation/sanitize_test.go b/internal/validation/sanitize_test.go new file mode 100644 index 0000000..575c933 --- /dev/null +++ b/internal/validation/sanitize_test.go @@ -0,0 +1,158 @@ +package validation + +import ( + "testing" +) + +func TestSanitizeRepository(t *testing.T) { + tests := []struct { + name string + input string + want string + wantErr bool + }{ + { + name: "valid repository", + input: "owner/repo", + want: "owner/repo", + wantErr: false, + }, + { + name: "repository with hyphens", + input: "my-org/my-repo", + want: "my-org/my-repo", + wantErr: false, + }, + { + name: "repository with dots", + input: "my.org/repo.name", + want: "my.org/repo.name", + wantErr: false, + }, + { + name: "removes dangerous characters", + input: "owner$bad/repo;rm", + want: "ownerbad/reporm", + wantErr: false, + }, + { + name: "invalid - no slash", + input: "invalidrepo", + want: "", + wantErr: true, + }, + { + name: "invalid - empty owner", + input: "/repo", + want: "", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := SanitizeRepository(tt.input) + + if (err != nil) != tt.wantErr { + t.Errorf("SanitizeRepository() error = %v, wantErr %v", err, tt.wantErr) + return + } + + if got != tt.want { + t.Errorf("SanitizeRepository() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestSanitizeEmail(t *testing.T) { + tests := []struct { + name string + input string + want string + wantErr bool + }{ + { + name: "valid email", + input: "user@example.com", + want: "user@example.com", + wantErr: false, + }, + { + name: "email with plus", + input: "user+tag@example.com", + want: "user+tag@example.com", + wantErr: false, + }, + { + name: "removes newlines", + input: "user@example.com\n", + want: "user@example.com", + wantErr: false, + }, + { + name: "invalid - no @", + input: "invalid-email", + want: "", + wantErr: true, + }, + { + name: "invalid - no domain", + input: "user@", + want: "", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := SanitizeEmail(tt.input) + + if (err != nil) != tt.wantErr { + t.Errorf("SanitizeEmail() error = %v, wantErr %v", err, tt.wantErr) + return + } + + if got != tt.want { + t.Errorf("SanitizeEmail() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestSanitizeString(t *testing.T) { + tests := []struct { + name string + input string + maxLength int + want string + }{ + { + name: "removes dangerous characters", + input: "test$command`echo hello`", + maxLength: 1000, + want: "testcommandecho hello", + }, + { + name: "respects length limit", + input: "abcdefghijklmnopqrstuvwxyz", + maxLength: 10, + want: "abcdefghij", + }, + { + name: "removes control characters", + input: "test\x00\x01\x02string", + maxLength: 1000, + want: "teststring", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := SanitizeString(tt.input, tt.maxLength) + if got != tt.want { + t.Errorf("SanitizeString() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/lib/common.sh b/lib/common.sh deleted file mode 100644 index 4aa9a42..0000000 --- a/lib/common.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash -# Common utilities used across all modules - -# Colors for output -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -ORANGE='\033[0;33m' -NC='\033[0m' # No Color - -# Logging functions -log_debug() { - if [[ "${DEBUG:-false}" == "true" ]]; then - echo -e "${ORANGE}[DEBUG]${NC} $1" - fi -} - -log_info() { - echo -e "${BLUE}[INFO]${NC} $1" -} - -log_success() { - echo -e "${GREEN}[SUCCESS]${NC} $1" -} - -log_warning() { - echo -e "${YELLOW}[WARNING]${NC} $1" -} - -log_error() { - echo -e "${RED}[ERROR]${NC} $1" -} - -# Cleanup function -cleanup() { - if [[ -n "$temp_dir" && -d "$temp_dir" ]]; then - log_info "Cleaning up temporary files" - rm -rf "$temp_dir" - fi -} diff --git a/lib/github.sh b/lib/github.sh deleted file mode 100644 index 19ee549..0000000 --- a/lib/github.sh +++ /dev/null @@ -1,136 +0,0 @@ -#!/bin/bash -# GitHub SBOM download and processing - -source "$(dirname "${BASH_SOURCE[0]}")/common.sh" - -# Download SBOM from GitHub repository -download_sbom() { - local repo="$1" - local output_file="$2" - local max_attempts=3 - local base_delay=30 - - log_info "Downloading SBOM from $repo" - - # GitHub API URL for SBOM - local api_url="https://api.github.com/repos/$repo/dependency-graph/sbom" - - # Authentication header - local auth_header="Authorization: Bearer $GITHUB_TOKEN" - - for attempt in $(seq 1 $max_attempts); do - # Download SBOM file with optimizations for large files - log_info "Starting SBOM download, attempt $attempt/$max_attempts (may take time for large files)..." - - # Calculate delay for this attempt (exponential backoff) - local delay=$((base_delay * attempt)) - - if curl -L \ - --max-time 600 \ - --connect-timeout 60 \ - --retry 2 \ - --retry-delay 10 \ - --retry-max-time 120 \ - --silent \ - --show-error \ - --compressed \ - -H "Accept: application/vnd.github+json" \ - -H "$auth_header" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - "$api_url" \ - -o "$output_file"; then - # Verify the download - if [[ -f "$output_file" && -s "$output_file" ]]; then - local file_size - file_size=$(du -h "$output_file" | cut -f1) - log_success "SBOM downloaded successfully ($file_size) on attempt $attempt" - - # Debug: Show first few lines of downloaded content - log_debug "First 200 characters of downloaded content:" - if [[ "${DEBUG:-false}" == "true" ]]; then - head -c 200 "$output_file" | tr '\n' ' ' | sed 's/[[:space:]]\+/ /g' - echo "" - fi - - # Quick validation that it's JSON - if ! jq . "$output_file" > /dev/null 2>&1; then - log_warning "Downloaded file is not valid JSON on attempt $attempt" - log_error "Content preview:" - head -n 5 "$output_file" || cat "$output_file" - - # If not last attempt, continue to retry - if [[ $attempt -lt $max_attempts ]]; then - log_info "Invalid JSON received, waiting ${delay} seconds before retry..." - sleep $delay - continue - else - log_error "Downloaded file is not valid JSON after all attempts" - exit 1 - fi - fi - - # Check if it looks like an error response - if jq -e '.message' "$output_file" > /dev/null 2>&1; then - local error_message - error_message=$(jq -r '.message' "$output_file") - # Check if it's a timeout or generation error that we can retry - if [[ "$error_message" =~ "Request timed out" ]] || [[ "$error_message" =~ "Failed to generate SBOM" ]] || [[ "$error_message" =~ "timeout" ]]; then - log_warning "GitHub SBOM generation timed out on attempt $attempt: $error_message" - - if [[ $attempt -lt $max_attempts ]]; then - log_info "GitHub's SBOM generation timed out, waiting ${delay} seconds before retry..." - sleep $delay - continue - else - log_error "GitHub SBOM generation failed after $max_attempts attempts" - log_error "Final error: $error_message" - log_error "This repository may be too large or complex for GitHub's SBOM generation" - log_error "Possible solutions:" - log_error " - Try again later when GitHub's service load is lower" - log_error " - Consider using alternative SBOM sources (Mend, Wiz)" - log_error " - Break down the repository analysis into smaller components" - exit 1 - fi - else - # Non-retryable error - log_error "GitHub API returned error: $error_message" - exit 1 - fi - fi - - # Success - SBOM downloaded and validated - log_success "SBOM download completed successfully" - return 0 - else - log_error "Downloaded file is empty or missing on attempt $attempt" - if [[ $attempt -lt $max_attempts ]]; then - log_info "Empty file received, waiting ${delay} seconds before retry..." - sleep $delay - continue - else - log_error "Downloaded file is empty or missing after all attempts" - exit 1 - fi - fi - else - local curl_exit_code=$? - log_warning "Curl failed on attempt $attempt with exit code: $curl_exit_code" - - if [[ $attempt -lt $max_attempts ]]; then - log_info "Network request failed, waiting ${delay} seconds before retry..." - sleep $delay - continue - else - log_error "Failed to download SBOM file after $max_attempts attempts" - log_error "This could be due to:" - log_error " - Repository is too large for GitHub's SBOM generation (common cause)" - log_error " - GitHub's SBOM service is experiencing high load or issues" - log_error " - Network connectivity problems" - log_error " - Authentication issues with the provided token" - log_error " - Repository doesn't have dependency graph enabled" - log_error " - SBOM feature not available for this repository type" - exit 1 - fi - fi - done -} diff --git a/lib/mend.sh b/lib/mend.sh deleted file mode 100644 index a19f626..0000000 --- a/lib/mend.sh +++ /dev/null @@ -1,392 +0,0 @@ -#!/bin/bash -# Mend API integration for SBOM downloads - -source "$(dirname "${BASH_SOURCE[0]}")/common.sh" - -# Authenticate with Mend API and get JWT token -authenticate_mend() { - log_info "Authenticating with Mend API 3.0" - - # Step 1: Login to get refresh token - log_info "Step 1: Logging in to get refresh token" - local login_payload=$(cat </dev/null) - if [[ -n "$jwt_token" && "$jwt_token" != "null" && "$jwt_token" != "empty" ]]; then - log_success "JWT token obtained via POST with empty body" - MEND_JWT_TOKEN="$jwt_token" - return 0 - fi - else - log_error "Failed to extract refresh token from login response" - log_error "Login response: $login_response" - exit 1 - fi - else - log_error "Failed to parse login response" - log_error "Response: $login_response" - exit 1 - fi - else - log_error "Failed to authenticate with Mend" - log_error "Check your email, org UUID, and user key credentials" - log_error "Response: $login_response" - exit 1 - fi -} - -# Request SBOM export from Mend API 3.0 -request_mend_sbom_export() { - local output_file="$1" - - log_info "Requesting SBOM export from Mend API 3.0" - - # Authenticate first to get JWT token - authenticate_mend - - # Build the request payload - local payload=$(cat </dev/null || echo "unknown") - log_debug "Downloaded file type: $file_type" - - if [[ "$file_type" =~ "Zip archive" ]] || [[ "$file_type" =~ "zip" ]] || head -c 2 "$download_file" | xxd | grep -q "504b"; then - log_info "Downloaded file is a ZIP archive, extracting..." - - # Create extraction directory - local extract_dir="$temp_dir/mend_extract_$report_uuid" - mkdir -p "$extract_dir" - - # Extract the ZIP file - if unzip -q "$download_file" -d "$extract_dir"; then - log_success "ZIP file extracted successfully" - - # Find JSON files in the extracted content - local json_files - json_files=$(find "$extract_dir" -name "*.json" -type f) - - if [[ -n "$json_files" ]]; then - # Use the first JSON file found (should be the SBOM) - local sbom_file - sbom_file=$(echo "$json_files" | head -n 1) - log_info "Found SBOM file: $(basename "$sbom_file")" - - # Copy the extracted JSON to our output file - if cp "$sbom_file" "$output_file"; then - log_success "SBOM extracted and copied successfully" - - # Validate JSON format - if jq . "$output_file" > /dev/null 2>&1; then - log_success "Extracted SBOM is valid JSON" - - # Log some basic info about the SBOM - local sbom_info - if sbom_info=$(jq -r '.bomFormat // .spdxVersion // "unknown"' "$output_file" 2>/dev/null); then - log_info "SBOM format detected: $sbom_info" - fi - - # Cleanup - rm -rf "$extract_dir" "$download_file" - return 0 - else - log_error "Extracted file is not valid JSON" - log_error "Content preview:" - head -n 5 "$output_file" - fi - else - log_error "Failed to copy extracted SBOM file" - fi - else - log_error "No JSON files found in extracted ZIP" - log_info "Extracted files:" - find "$extract_dir" -type f | head -10 - fi - - # Cleanup extraction directory - rm -rf "$extract_dir" - else - log_error "Failed to extract ZIP file" - log_error "ZIP file might be corrupted" - fi - - # Cleanup download file - rm -f "$download_file" - else - # Not a ZIP file, try to process as direct JSON - log_info "Downloaded file is not a ZIP archive, processing as direct JSON" - - # Move download to output file - if mv "$download_file" "$output_file"; then - # Validate JSON format - if jq . "$output_file" > /dev/null 2>&1; then - log_success "Downloaded SBOM is valid JSON" - - # Log some basic info about the SBOM - local sbom_info - if sbom_info=$(jq -r '.bomFormat // .spdxVersion // "unknown"' "$output_file" 2>/dev/null); then - log_info "SBOM format detected: $sbom_info" - fi - - return 0 - else - log_error "Downloaded file is not valid JSON" - log_error "Content preview:" - head -n 5 "$output_file" - log_error "File type: $file_type" - fi - else - log_error "Failed to move downloaded file" - fi - fi - else - log_error "Downloaded file is empty or missing" - fi - else - log_warning "Download attempt $attempt failed" - fi - - attempt=$((attempt + 1)) - if [[ $attempt -le $max_attempts ]]; then - log_info "Waiting 10s before retry..." - sleep 10 - fi - done - - log_error "Failed to download Mend SBOM after $max_attempts attempts" - exit 1 -} \ No newline at end of file diff --git a/lib/sanitize.sh b/lib/sanitize.sh deleted file mode 100644 index 5cd9eb0..0000000 --- a/lib/sanitize.sh +++ /dev/null @@ -1,464 +0,0 @@ -#!/bin/bash -# Sanitize input to prevent command injection and other vulnerabilities - -# Sanitize general string inputs - remove potentially dangerous characters -sanitize_string() { - local input="$1" - local max_length="${2:-1000}" # Default max length of 1000 characters - - # Remove null bytes, control characters, and limit length - local sanitized - sanitized=$(echo "$input" | tr -d '\0' | tr -d '\001-\037' | tr -d '\177-\377' | cut -c1-"$max_length") - - # Remove potentially dangerous patterns - sanitized=$(echo "$sanitized" | sed 's/[]$(){}|;&<>@[]//g' | tr -d '`') - - echo "$sanitized" -} - -# Sanitize repository names (owner/repo format) -sanitize_repository() { - local repo="$1" - - # Repository should only contain alphanumeric, hyphens, underscores, dots, and forward slash - local sanitized - sanitized=$(echo "$repo" | sed 's/[^a-zA-Z0-9._/-]//g') - - # Validate format: should be owner/repo - if [[ ! "$sanitized" =~ ^[a-zA-Z0-9._-]+/[a-zA-Z0-9._-]+$ ]]; then - log_error "Invalid repository format: $repo" - log_error "Repository must be in 'owner/repo' format with alphanumeric characters, dots, hyphens, and underscores only" - exit 1 - fi - - echo "$sanitized" -} - -# Sanitize URLs - validate format and allowed protocols -sanitize_url() { - local url="$1" - local url_type="${2:-general}" # general, clickhouse, mend, wiz - - # Remove null bytes and control characters - local sanitized - sanitized=$(echo "$url" | tr -d '\0' | tr -d '\001-\037' | tr -d '\177-\377') - - # Validate URL format and allowed protocols - case "$url_type" in - "clickhouse") - if [[ ! "$sanitized" =~ ^https?://[a-zA-Z0-9][a-zA-Z0-9.-]*(:[0-9]+)?/?$ ]]; then - log_error "Invalid ClickHouse URL format: $url" - log_error "ClickHouse URL must be HTTP/HTTPS with valid hostname" - exit 1 - fi - ;; - "mend") - if [[ ! "$sanitized" =~ ^https://[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}(/.*)?$ ]]; then - log_error "Invalid Mend URL format: $url" - log_error "Mend URL must be HTTPS with valid domain" - exit 1 - fi - ;; - "wiz") - if [[ ! "$sanitized" =~ ^https://[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}(/.*)?$ ]]; then - log_error "Invalid Wiz URL format: $url" - log_error "Wiz URL must be HTTPS with valid domain" - exit 1 - fi - ;; - *) - if [[ ! "$sanitized" =~ ^https?://[a-zA-Z0-9][a-zA-Z0-9.-]*(:[0-9]+)?(/.*)?$ ]]; then - log_error "Invalid URL format: $url" - log_error "URL must be HTTP/HTTPS with valid hostname" - exit 1 - fi - ;; - esac - - echo "$sanitized" -} - -# Sanitize S3 bucket names -sanitize_s3_bucket() { - local bucket="$1" - - # S3 bucket names have specific rules - local sanitized - sanitized=$(echo "$bucket" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9.-]//g') - - # Validate S3 bucket naming rules - if [[ ! "$sanitized" =~ ^[a-z0-9][a-z0-9.-]{1,61}[a-z0-9]$ ]]; then - log_error "Invalid S3 bucket name: $bucket" - log_error "S3 bucket names must be 3-63 characters, lowercase, and contain only letters, numbers, dots, and hyphens" - exit 1 - fi - - # Additional S3 bucket rules - if [[ "$sanitized" == *.* ]]; then - # If contains dots, validate it's not IP-like - if [[ "$sanitized" =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then - log_error "S3 bucket name cannot be formatted as IP address: $bucket" - exit 1 - fi - fi - - echo "$sanitized" -} - -# Sanitize S3 key -sanitize_s3_key() { - local key="$1" - - # S3 keys can contain most characters but we'll be restrictive for security - local sanitized - sanitized=$(echo "$key" | sed 's/[^a-zA-Z0-9._/-]//g') - - # Prevent path traversal attempts - sanitized=$(echo "$sanitized" | sed 's/\.\.//g' | sed 's/\/\+/\//g') - - # Remove leading/trailing slashes - sanitized=$(echo "$sanitized" | sed 's/^\/*//' | sed 's/\/*$//') - - if [[ -z "$sanitized" ]]; then - log_error "Invalid S3 key: $key" - log_error "S3 key must contain valid characters and cannot be empty" - exit 1 - fi - - echo "$sanitized" -} - -# Sanitize UUID format (for Mend/Wiz IDs) -sanitize_uuid() { - local uuid="$1" - local field_name="$2" - - # Remove any non-hex characters except hyphens - local sanitized - sanitized=$(echo "$uuid" | sed 's/[^a-fA-F0-9-]//g') - - # Validate UUID format (loose validation - some services use non-standard formats) - if [[ ! "$sanitized" =~ ^[0-9a-fA-F]{8}-?([0-9a-fA-F]{4}-?){3}[0-9a-fA-F]{12}$ ]]; then - log_error "Invalid UUID format for $field_name: $uuid" - log_error "UUID must contain only hexadecimal characters and hyphens" - exit 1 - fi - - echo "$sanitized" -} - -# Sanitize email addresses -sanitize_email() { - local email="$1" - - # Handle both literal escape sequences and actual control characters - local sanitized="$email" - - # Remove literal escape sequences - sanitized=$(echo "$sanitized" | sed 's/\\n//g; s/\\r//g; s/\\t//g; s/\\\\//g') - - # Remove actual control characters - sanitized=$(echo "$sanitized" | tr -d '\n\r\t\001-\037\177-\377') - - # Remove other dangerous characters but keep email-valid ones (including +) - sanitized=$(echo "$sanitized" | sed 's/[^a-zA-Z0-9@._+-]//g') - - # Basic email format validation (updated regex to allow + in local part) - if [[ ! "$sanitized" =~ ^[a-zA-Z0-9._+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$ ]]; then - log_error "Invalid email format: $email" - log_error "Email must be in valid format: user@domain.com" - exit 1 - fi - - echo "$sanitized" -} - -# Sanitize database/table names -sanitize_database_name() { - local name="$1" - - # Database names should only contain alphanumeric and underscores - local sanitized - sanitized=$(echo "$name" | sed 's/[^a-zA-Z0-9_]//g' | sed 's/^[0-9]/_&/') - - echo "$sanitized" -} - -# Sanitize comma-separated patterns (for include/exclude) -sanitize_patterns() { - local patterns="$1" - - if [[ -z "$patterns" ]]; then - echo "" - return - fi - - # Split by comma and sanitize each pattern - local sanitized_patterns=() - IFS=',' read -ra pattern_array <<< "$patterns" - - for pattern in "${pattern_array[@]}"; do - # Trim whitespace - pattern=$(echo "$pattern" | xargs) - - # Remove dangerous characters but keep wildcards - local sanitized_pattern - sanitized_pattern=$(echo "$pattern" | sed 's/[^a-zA-Z0-9.*_-]//g') - - if [[ -n "$sanitized_pattern" ]]; then - sanitized_patterns+=("$sanitized_pattern") - fi - done - - # Join back with commas - local result - result=$(IFS=','; echo "${sanitized_patterns[*]}") - echo "$result" -} - -# Sanitize numeric values -sanitize_numeric() { - local value="$1" - local field_name="$2" - local min_val="${3:-0}" - local max_val="${4:-999999}" - - # Remove non-numeric characters - local sanitized - sanitized=$(echo "$value" | sed 's/[^0-9]//g') - - # Validate it's a number - if [[ ! "$sanitized" =~ ^[0-9]+$ ]]; then - log_error "Invalid numeric value for $field_name: $value" - log_error "Value must be a positive integer" - return 1 - fi - - # Convert to integer (removes leading zeros) for range checking - local int_value=$((10#$sanitized)) - local int_min=$((min_val)) - local int_max=$((max_val)) - - # Check range using integer values - if (( int_value < int_min )) || (( int_value > int_max )); then - log_error "Numeric value for $field_name out of range: $int_value" - log_error "Value must be between $int_min and $int_max" - return 1 - fi - - # Return the integer value (without leading zeros) - echo "$int_value" -} - -# Main sanitization function - sanitizes all environment variables -sanitize_inputs() { - log_debug "Sanitizing input parameters..." - - # GitHub inputs - if [[ -n "${REPOSITORY:-}" ]]; then - REPOSITORY=$(sanitize_repository "$REPOSITORY") - log_debug "Sanitized REPOSITORY: $REPOSITORY" - fi - - if [[ -n "${GITHUB_TOKEN:-}" ]]; then - GITHUB_TOKEN=$(sanitize_string "$GITHUB_TOKEN" 1000) - log_debug "Sanitized GITHUB_TOKEN: [REDACTED]" - fi - - # Mend inputs - if [[ -n "${MEND_EMAIL:-}" ]]; then - MEND_EMAIL=$(sanitize_email "$MEND_EMAIL") - log_debug "Sanitized MEND_EMAIL: $MEND_EMAIL" - fi - - if [[ -n "${MEND_BASE_URL:-}" ]]; then - MEND_BASE_URL=$(sanitize_url "$MEND_BASE_URL" "mend") - log_debug "Sanitized MEND_BASE_URL: $MEND_BASE_URL" - fi - - if [[ -n "${MEND_ORG_UUID:-}" ]]; then - MEND_ORG_UUID=$(sanitize_uuid "$MEND_ORG_UUID" "MEND_ORG_UUID") - log_debug "Sanitized MEND_ORG_UUID: $MEND_ORG_UUID" - fi - - if [[ -n "${MEND_USER_KEY:-}" ]]; then - MEND_USER_KEY=$(sanitize_string "$MEND_USER_KEY" 500) - log_debug "Sanitized MEND_USER_KEY: [REDACTED]" - fi - - if [[ -n "${MEND_PROJECT_UUID:-}" ]]; then - MEND_PROJECT_UUID=$(sanitize_uuid "$MEND_PROJECT_UUID" "MEND_PROJECT_UUID") - log_debug "Sanitized MEND_PROJECT_UUID: $MEND_PROJECT_UUID" - fi - - if [[ -n "${MEND_PRODUCT_UUID:-}" ]]; then - MEND_PRODUCT_UUID=$(sanitize_uuid "$MEND_PRODUCT_UUID" "MEND_PRODUCT_UUID") - log_debug "Sanitized MEND_PRODUCT_UUID: $MEND_PRODUCT_UUID" - fi - - if [[ -n "${MEND_ORG_SCOPE_UUID:-}" ]]; then - MEND_ORG_SCOPE_UUID=$(sanitize_uuid "$MEND_ORG_SCOPE_UUID" "MEND_ORG_SCOPE_UUID") - log_debug "Sanitized MEND_ORG_SCOPE_UUID: $MEND_ORG_SCOPE_UUID" - fi - - if [[ -n "${MEND_PROJECT_UUIDS:-}" ]]; then - # Split by comma and sanitize each UUID - local sanitized_uuids=() - IFS=',' read -ra uuid_array <<< "$MEND_PROJECT_UUIDS" - for uuid in "${uuid_array[@]}"; do - uuid=$(echo "$uuid" | xargs) # trim whitespace - if [[ -n "$uuid" ]]; then - sanitized_uuids+=($(sanitize_uuid "$uuid" "MEND_PROJECT_UUIDS")) - fi - done - MEND_PROJECT_UUIDS=$(IFS=','; echo "${sanitized_uuids[*]}") - log_debug "Sanitized MEND_PROJECT_UUIDS: $MEND_PROJECT_UUIDS" - fi - - if [[ -n "${MEND_MAX_WAIT_TIME:-}" ]]; then - if ! MEND_MAX_WAIT_TIME=$(sanitize_numeric "$MEND_MAX_WAIT_TIME" "MEND_MAX_WAIT_TIME" 60 7200); then - log_error "Invalid numeric value for MEND_MAX_WAIT_TIME: $MEND_MAX_WAIT_TIME" - exit 1 - fi - log_debug "Sanitized MEND_MAX_WAIT_TIME: $MEND_MAX_WAIT_TIME" - fi - - if [[ -n "${MEND_POLL_INTERVAL:-}" ]]; then - if ! MEND_POLL_INTERVAL=$(sanitize_numeric "$MEND_POLL_INTERVAL" "MEND_POLL_INTERVAL" 10 300); then - log_error "Invalid numeric value for MEND_POLL_INTERVAL: $MEND_POLL_INTERVAL" - exit 1 - fi - log_debug "Sanitized MEND_POLL_INTERVAL: $MEND_POLL_INTERVAL" - fi - - # Wiz inputs - if [[ -n "${WIZ_AUTH_ENDPOINT:-}" ]]; then - WIZ_AUTH_ENDPOINT=$(sanitize_url "$WIZ_AUTH_ENDPOINT" "wiz") - log_debug "Sanitized WIZ_AUTH_ENDPOINT: $WIZ_AUTH_ENDPOINT" - fi - - if [[ -n "${WIZ_API_ENDPOINT:-}" ]]; then - WIZ_API_ENDPOINT=$(sanitize_url "$WIZ_API_ENDPOINT" "wiz") - log_debug "Sanitized WIZ_API_ENDPOINT: $WIZ_API_ENDPOINT" - fi - - if [[ -n "${WIZ_CLIENT_ID:-}" ]]; then - WIZ_CLIENT_ID=$(sanitize_string "$WIZ_CLIENT_ID" 200) - log_debug "Sanitized WIZ_CLIENT_ID: [REDACTED]" - fi - - if [[ -n "${WIZ_CLIENT_SECRET:-}" ]]; then - WIZ_CLIENT_SECRET=$(sanitize_string "$WIZ_CLIENT_SECRET" 500) - log_debug "Sanitized WIZ_CLIENT_SECRET: [REDACTED]" - fi - - if [[ -n "${WIZ_REPORT_ID:-}" ]]; then - WIZ_REPORT_ID=$(sanitize_string "$WIZ_REPORT_ID" 200) - log_debug "Sanitized WIZ_REPORT_ID: $WIZ_REPORT_ID" - fi - - # AWS inputs - if [[ -n "${AWS_ACCESS_KEY_ID:-}" ]]; then - AWS_ACCESS_KEY_ID=$(sanitize_string "$AWS_ACCESS_KEY_ID" 100) - log_debug "Sanitized AWS_ACCESS_KEY_ID: [REDACTED]" - fi - - if [[ -n "${AWS_SECRET_ACCESS_KEY:-}" ]]; then - AWS_SECRET_ACCESS_KEY=$(sanitize_string "$AWS_SECRET_ACCESS_KEY" 500) - log_debug "Sanitized AWS_SECRET_ACCESS_KEY: [REDACTED]" - fi - - if [[ -n "${AWS_DEFAULT_REGION:-}" ]]; then - AWS_DEFAULT_REGION=$(sanitize_string "$AWS_DEFAULT_REGION" 50) - log_debug "Sanitized AWS_DEFAULT_REGION: $AWS_DEFAULT_REGION" - fi - - if [[ -n "${S3_BUCKET:-}" ]]; then - S3_BUCKET=$(sanitize_s3_bucket "$S3_BUCKET") - log_debug "Sanitized S3_BUCKET: $S3_BUCKET" - fi - - if [[ -n "${S3_KEY:-}" ]]; then - S3_KEY=$(sanitize_s3_key "$S3_KEY") - log_debug "Sanitized S3_KEY: $S3_KEY" - fi - - # ClickHouse inputs - if [[ -n "${CLICKHOUSE_URL:-}" ]]; then - CLICKHOUSE_URL=$(sanitize_url "$CLICKHOUSE_URL" "clickhouse") - log_debug "Sanitized CLICKHOUSE_URL: $CLICKHOUSE_URL" - fi - - if [[ -n "${CLICKHOUSE_DATABASE:-}" ]]; then - CLICKHOUSE_DATABASE=$(sanitize_database_name "$CLICKHOUSE_DATABASE") - log_debug "Sanitized CLICKHOUSE_DATABASE: $CLICKHOUSE_DATABASE" - fi - - if [[ -n "${CLICKHOUSE_USERNAME:-}" ]]; then - CLICKHOUSE_USERNAME=$(sanitize_string "$CLICKHOUSE_USERNAME" 100) - log_debug "Sanitized CLICKHOUSE_USERNAME: $CLICKHOUSE_USERNAME" - fi - - if [[ -n "${CLICKHOUSE_PASSWORD:-}" ]]; then - CLICKHOUSE_PASSWORD=$(sanitize_string "$CLICKHOUSE_PASSWORD" 500) - log_debug "Sanitized CLICKHOUSE_PASSWORD: [REDACTED]" - fi - - if [[ -n "${TRUNCATE_TABLE:-}" ]]; then - if [[ ! "$TRUNCATE_TABLE" =~ ^(true|false)$ ]]; then - log_error "Invalid TRUNCATE_TABLE value: $TRUNCATE_TABLE" - log_error "TRUNCATE_TABLE must be either 'true' or 'false'" - exit 1 - fi - log_debug "Validated TRUNCATE_TABLE: $TRUNCATE_TABLE" - fi - - # General inputs - if [[ -n "${SBOM_SOURCE:-}" ]]; then - if [[ ! "$SBOM_SOURCE" =~ ^(github|mend|wiz)$ ]]; then - log_error "Invalid SBOM_SOURCE: $SBOM_SOURCE" - log_error "SBOM_SOURCE must be one of: github, mend, wiz" - exit 1 - fi - log_debug "Validated SBOM_SOURCE: $SBOM_SOURCE" - fi - - if [[ -n "${SBOM_FORMAT:-}" ]]; then - if [[ ! "$SBOM_FORMAT" =~ ^(cyclonedx|spdxjson)$ ]]; then - log_error "Invalid SBOM_FORMAT: $SBOM_FORMAT" - log_error "SBOM_FORMAT must be one of: cyclonedx, spdxjson" - exit 1 - fi - log_debug "Validated SBOM_FORMAT: $SBOM_FORMAT" - fi - - if [[ -n "${MERGE:-}" ]]; then - if [[ ! "$MERGE" =~ ^(true|false)$ ]]; then - log_error "Invalid MERGE value: $MERGE" - log_error "MERGE must be either 'true' or 'false'" - exit 1 - fi - log_debug "Validated MERGE: $MERGE" - fi - - if [[ -n "${INCLUDE:-}" ]]; then - INCLUDE=$(sanitize_patterns "$INCLUDE") - log_debug "Sanitized INCLUDE: $INCLUDE" - fi - - if [[ -n "${EXCLUDE:-}" ]]; then - EXCLUDE=$(sanitize_patterns "$EXCLUDE") - log_debug "Sanitized EXCLUDE: $EXCLUDE" - fi - - if [[ -n "${DEBUG:-}" ]]; then - if [[ ! "$DEBUG" =~ ^(true|false)$ ]]; then - log_error "Invalid DEBUG value: $DEBUG" - log_error "DEBUG must be either 'true' or 'false'" - exit 1 - fi - log_debug "Validated DEBUG: $DEBUG" - fi - - log_success "Input sanitization completed successfully" -} \ No newline at end of file diff --git a/lib/sbom-merging.sh b/lib/sbom-merging.sh deleted file mode 100644 index 639e409..0000000 --- a/lib/sbom-merging.sh +++ /dev/null @@ -1,533 +0,0 @@ -#!/bin/bash -# SBOM merging functionality - -source "$(dirname "${BASH_SOURCE[0]}")/common.sh" - -# Function to check if a filename matches any pattern in a list -matches_pattern() { - local filename="$1" - local patterns="$2" - - # If no patterns provided, return false (no match) - if [[ -z "$patterns" ]]; then - return 1 - fi - - # Split patterns by comma and check each one - IFS=',' read -ra pattern_array <<< "$patterns" - for pattern in "${pattern_array[@]}"; do - # Trim whitespace - pattern=$(echo "$pattern" | xargs) - - # Check if filename matches the pattern using bash pattern matching - if [[ "$filename" == $pattern ]]; then - return 0 - fi - done - - return 1 -} - -# Function to filter files based on include/exclude patterns -filter_files() { - local files_input="$1" - local include_patterns="${INCLUDE:-}" - local exclude_patterns="${EXCLUDE:-}" - - local filtered_files="" - - # Process each file - while IFS= read -r file; do - # Skip empty lines - [[ -z "$file" ]] && continue - - local filename=$(basename "$file") - local should_include=true - - # If include patterns are specified, file must match at least one include pattern - if [[ -n "$include_patterns" ]]; then - if matches_pattern "$filename" "$include_patterns"; then - should_include=true - else - should_include=false - fi - else - # No include patterns specified, so include all files by default - should_include=true - fi - - # If exclude patterns are specified and file matches, exclude it - if [[ "$should_include" == "true" && -n "$exclude_patterns" ]]; then - if matches_pattern "$filename" "$exclude_patterns"; then - should_include=false - fi - fi - - # Add to filtered list if it should be included - if [[ "$should_include" == "true" ]]; then - if [[ -n "$filtered_files" ]]; then - filtered_files="$filtered_files"$'\n'"$file" - else - filtered_files="$file" - fi - fi - done <<< "$files_input" - - echo "$filtered_files" -} - -# Download all CycloneDX SBOMs from S3 bucket and merge them -merge_cyclonedx_sboms() { - local output_file="$1" - - log_info "Merging all CycloneDX SBOMs from S3 bucket: $S3_BUCKET with source tracking" - - # Log include/exclude patterns if specified - if [[ -n "${INCLUDE:-}" ]]; then - log_info "Include patterns: ${INCLUDE}" - fi - if [[ -n "${EXCLUDE:-}" ]]; then - log_info "Exclude patterns: ${EXCLUDE}" - fi - - # Create temporary directory for downloaded SBOMs - local download_dir="$temp_dir/sboms" - mkdir -p "$download_dir" - - # List all JSON files in the S3 bucket - local s3_files - - # Debug: Show raw S3 ls output - log_debug "Raw S3 listing for bucket: $S3_BUCKET" - if [[ "${DEBUG:-false}" == "true" ]]; then - if ! aws s3 ls "s3://$S3_BUCKET" --recursive; then - log_error "Failed to list files in S3 bucket: $S3_BUCKET" - log_error "Check bucket name and AWS permissions" - exit 1 - fi - fi - - # Extract JSON files - log_info "Extracting JSON file paths..." - - # Debug: Show the filtering process step by step - local all_files - all_files=$(aws s3 ls "s3://$S3_BUCKET" --recursive | awk '{print $4}' || true) - log_info "All files found: $(echo "$all_files" | wc -l) files" - - local json_files - json_files=$(echo "$all_files" | grep '\.json$' || true) - log_info "JSON files found: $(echo "$json_files" | wc -l) files" - - # Also exclude the target S3_KEY file to avoid processing the merged output - local s3_key_basename=$(basename "${S3_KEY:-sbom.json}") - s3_files=$(echo "$json_files" | grep -v "^${s3_key_basename}$" || true) - log_info "JSON files after excluding target file ($s3_key_basename): $(echo "$s3_files" | wc -l) files" - - # Apply include/exclude filters - if [[ -n "${INCLUDE:-}" ]] || [[ -n "${EXCLUDE:-}" ]]; then - log_info "Applying include/exclude filters..." - s3_files=$(filter_files "$s3_files") - log_info "Files after filtering: $(echo "$s3_files" | wc -l) files" - fi - - # Debug: Show what files we're going to process - log_info "Files to process:" - echo "$s3_files" | while IFS= read -r file; do - [[ -n "$file" ]] && log_info " - $file" - done - - if [[ -z "$s3_files" ]] || [[ "$(echo "$s3_files" | wc -l)" -eq 0 ]]; then - log_error "No JSON files found in S3 bucket after filtering" - log_error "Check your include/exclude patterns and ensure there are valid files" - if [[ -n "${INCLUDE:-}" ]]; then - log_error "Include patterns: ${INCLUDE}" - fi - if [[ -n "${EXCLUDE:-}" ]]; then - log_error "Exclude patterns: ${EXCLUDE}" - fi - exit 1 - fi - - # Download and validate CycloneDX SBOMs with source tracking - local cyclonedx_files=() - local source_references=() # Parallel array to track source references - local file_count=0 - local total_files=0 - - log_info "Starting download loop with source tracking..." - - local files_array=() - while IFS= read -r line; do - [[ -n "$line" ]] && files_array+=("$line") - done <<< "$s3_files" - - log_info "Processing ${#files_array[@]} files with source extraction..." - - for s3_key_to_merge in "${files_array[@]}"; do - log_debug "Processing file: '$s3_key_to_merge'" - - # Skip empty entries - if [[ -z "$s3_key_to_merge" ]]; then - log_debug "Skipping empty s3_key_to_merge" - continue - fi - - # Safely increment counter - total_files=$((total_files + 1)) - - local filename - filename=$(basename "$s3_key_to_merge" 2>/dev/null) || { - log_warning "Failed to get basename for: $s3_key_to_merge" - continue - } - - local local_file="$download_dir/${filename}" - - log_debug "Downloading ($total_files/${#files_array[@]}): s3://$S3_BUCKET/$s3_key_to_merge" - - # Try to download the file - if aws s3 cp "s3://$S3_BUCKET/$s3_key_to_merge" "$local_file"; then - log_success "Downloaded: $filename" - - # Check if it's a valid CycloneDX SBOM - log_debug "Validating CycloneDX format for: $filename" - - # First check if it's valid JSON - if jq empty "$local_file" >/dev/null 2>&1; then - log_debug "JSON validation passed for: $filename" - else - log_warning "Skipping $filename - not valid JSON" - continue - fi - - # Check if it has bomFormat field or CycloneDX structure - local bom_format - bom_format=$(jq -r '.bomFormat // "missing"' "$local_file" 2>/dev/null) - - # Handle jq failure - if [[ $? -ne 0 ]]; then - log_warning "Failed to read bomFormat from $filename" - bom_format="missing" - fi - - log_debug "File $filename has bomFormat: $bom_format" - - # Check if it's CycloneDX (also check for metadata.component as backup) - local is_cyclonedx=false - - if [[ "$bom_format" == "CycloneDX" ]]; then - is_cyclonedx=true - elif jq -e '.metadata.component' "$local_file" >/dev/null 2>&1; then - is_cyclonedx=true - log_info "Detected CycloneDX via metadata.component field" - fi - - if [[ "$is_cyclonedx" == "true" ]]; then - # Extract source reference before adding to processing list - local source_ref - source_ref=$(extract_sbom_source_reference "$local_file" "$filename") - - cyclonedx_files+=("$local_file") - source_references+=("$source_ref") - file_count=$((file_count + 1)) - log_success "Valid CycloneDX SBOM: $filename (source: $source_ref)" - else - log_warning "Skipping $filename - bomFormat is '$bom_format', not 'CycloneDX'" - - # Debug: Show structure of the file to understand why it's not recognized - log_debug "File structure preview for $filename:" - if [[ "${DEBUG:-false}" == "true" ]]; then - if jq -r 'keys[]' "$local_file" 2>/dev/null | head -5; then - echo "Keys shown above" - else - echo "Unable to read keys from file" - fi - fi - fi - else - log_error "Failed to download: s3://$S3_BUCKET/$s3_key_to_merge" - log_error "AWS CLI exit code: $?" - continue - fi - done - - log_info "Downloaded $total_files files, found $file_count valid CycloneDX SBOMs" - - if [[ $file_count -eq 0 ]]; then - log_error "No valid CycloneDX SBOMs found in S3 bucket after filtering" - log_error "Check that your S3 bucket contains CycloneDX format SBOMs" - log_error "and that your include/exclude patterns are correct" - - # Show what files were actually downloaded for debugging - log_info "Files that were downloaded but rejected:" - for file in "$download_dir"/*; do - if [[ -f "$file" ]]; then - local fname=$(basename "$file") - local format_info - format_info=$(jq -r '.bomFormat // .spdxVersion // "unknown_format"' "$file" 2>/dev/null) || format_info="invalid_json" - log_info "- $fname: $format_info" - fi - done - exit 1 - fi - - log_info "Found $file_count CycloneDX SBOMs to merge with source tracking" - - # Create the merged SBOM structure - log_info "Creating merged CycloneDX SBOM with source tracking..." - - # Start with a proper CycloneDX template - local merged_metadata - merged_metadata=$(cat <<'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "serialNumber": "", - "version": 1, - "metadata": { - "timestamp": "", - "tools": [{ - "vendor": "ClickBOM", - "name": "cyclonedx-merge", - "version": "1.0.10" - }], - "component": { - "type": "application", - "name": "merged-sbom", - "version": "1.0.0" - } - }, - "components": [] -} -EOF -) - - # Generate a UUID-like serial number and timestamp - local timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ") - local serial_number - if command -v uuidgen >/dev/null 2>&1; then - serial_number="urn:uuid:$(uuidgen)" - else - serial_number="urn:uuid:$(openssl rand -hex 16 | sed 's/\(.{8}\)\(.{4}\)\(.{4}\)\(.{4}\)\(.{12}\)/\1-\2-\3-\4-\5/')" - fi - - # Update metadata with actual values - merged_metadata=$(echo "$merged_metadata" | jq --arg ts "$timestamp" --arg sn "$serial_number" ' - .metadata.timestamp = $ts | - .serialNumber = $sn - ') - - # Collect all components from all SBOMs with source tracking - log_info "Collecting components from all SBOMs..." - local all_components="$temp_dir/all_components.json" - - # Initialize empty array - echo "[]" > "$all_components" - - local i=0 - - # Collect all components with source information - for i in "${!cyclonedx_files[@]}"; do - local sbom_file="${cyclonedx_files[i]}" - local source_ref="${source_references[i]}" - - local component_count - component_count=$(jq '.components | length' "$sbom_file" 2>/dev/null) || component_count=0 - log_info "Processing $(basename "$sbom_file"): $component_count components (source: $source_ref)" - - # Extract components with source tracking - if [[ "$component_count" -gt 0 ]]; then - local temp_components="$temp_dir/temp_components_$(basename "$sbom_file").json" - if collect_components_with_source "$sbom_file" "$source_ref" "$temp_components"; then - # Merge with existing components - if jq -s 'flatten' "$all_components" "$temp_components" > "$temp_dir/merged_temp.json"; then - mv "$temp_dir/merged_temp.json" "$all_components" - else - log_warning "Failed to merge components from $(basename "$sbom_file")" - fi - else - log_warning "Failed to extract components from $(basename "$sbom_file")" - fi - fi - done - - # Remove duplicates based on name+version+purl combination - log_info "Removing duplicate components (preserving source information)..." - local unique_components="$temp_dir/unique_components.json" - if jq 'unique_by((.name // "unknown") + "@" + (.version // "unknown") + "#" + (.purl // "") + "^" + (.source // "unknown"))' "$all_components" > "$unique_components"; then - log_success "Deduplication completed with source preservation" - else - log_error "Failed to deduplicate components" - exit 1 - fi - - # Create final merged SBOM - log_info "Assembling final merged SBOM with source tracking..." - if echo "$merged_metadata" | jq --slurpfile comps "$unique_components" '. + {components: $comps[0]}' > "$output_file"; then - log_success "Final SBOM assembled with source tracking" - else - log_error "Failed to assemble final SBOM" - exit 1 - fi - - # Validate the merged SBOM - if ! jq . "$output_file" > /dev/null 2>&1; then - log_error "Generated merged SBOM is not valid JSON" - exit 1 - fi - - # Final validation that it's proper CycloneDX - if ! jq -e '.bomFormat == "CycloneDX"' "$output_file" > /dev/null 2>&1; then - log_error "Generated merged SBOM does not have proper CycloneDX format" - exit 1 - fi - - local component_count - component_count=$(jq '.components | length' "$output_file") - - log_success "Successfully merged $file_count SBOMs into one with $component_count unique components (with source tracking)" - - # Show a summary of what was merged with source information - log_info "Merge summary with source tracking:" - for i in "${!cyclonedx_files[@]}"; do - local sbom_file="${cyclonedx_files[i]}" - local source_ref="${source_references[i]}" - local fname=$(basename "$sbom_file" .json) - local comp_count - comp_count=$(jq '.components | length' "$sbom_file" 2>/dev/null) || comp_count=0 - log_info " - $fname: $comp_count components (source: $source_ref)" - done -} - -# Merge multiple local CycloneDX SBOMs into one -merge_local_cyclonedx_sboms() { - local output_file="${!#}" # Last argument is the output file - local input_files=("${@:1:$#-1}") # All arguments except the last one - - log_info "Merging ${#input_files[@]} local CycloneDX SBOMs with source tracking" - - # Create the merged SBOM structure - log_info "Creating merged CycloneDX SBOM with source tracking..." - - # Start with a proper CycloneDX template - local merged_metadata - merged_metadata=$(cat <<'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "serialNumber": "", - "version": 1, - "metadata": { - "timestamp": "", - "tools": [{ - "vendor": "ClickBOM", - "name": "cyclonedx-merge", - "version": "1.0.10" - }], - "component": { - "type": "application", - "name": "wiz-merged-sbom", - "version": "1.0.0" - } - }, - "components": [] -} -EOF -) - - # Generate a UUID-like serial number and timestamp - local timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ") - local serial_number - if command -v uuidgen >/dev/null 2>&1; then - serial_number="urn:uuid:$(uuidgen)" - else - serial_number="urn:uuid:$(openssl rand -hex 16 | sed 's/\(.{8}\)\(.{4}\)\(.{4}\)\(.{4}\)\(.{12}\)/\1-\2-\3-\4-\5/')" - fi - - # Update metadata with actual values - merged_metadata=$(echo "$merged_metadata" | jq --arg ts "$timestamp" --arg sn "$serial_number" ' - .metadata.timestamp = $ts | - .serialNumber = $sn - ') - - # Collect all components from all SBOMs with source tracking - log_info "Collecting components from all SBOMs with source tracking..." - local all_components="$temp_dir/wiz_all_components.json" - - # Initialize empty array - echo "[]" > "$all_components" - - # Collect all components with source tracking from input files - for sbom_file in "${input_files[@]}"; do - # Extract source reference for this SBOM - local source_ref - source_ref=$(extract_sbom_source_reference "$sbom_file" "$(basename "$sbom_file")") - - local component_count - component_count=$(jq '.components | length' "$sbom_file" 2>/dev/null) || component_count=0 - log_info "Processing $(basename "$sbom_file"): $component_count components (source: $source_ref)" - - # Extract components with source tracking - if [[ "$component_count" -gt 0 ]]; then - local temp_components="$temp_dir/wiz_temp_components_$(basename "$sbom_file").json" - if collect_components_with_source "$sbom_file" "$source_ref" "$temp_components"; then - # Merge with existing components - if jq -s 'flatten' "$all_components" "$temp_components" > "$temp_dir/wiz_merged_temp.json"; then - mv "$temp_dir/wiz_merged_temp.json" "$all_components" - else - log_warning "Failed to merge components from $(basename "$sbom_file")" - fi - else - log_warning "Failed to extract components from $(basename "$sbom_file")" - fi - fi - done - - # Remove duplicates based on name+version+purl+source combination - log_info "Removing duplicate components (preserving source information)..." - local unique_components="$temp_dir/wiz_unique_components.json" - if jq 'unique_by((.name // "unknown") + "@" + (.version // "unknown") + "#" + (.purl // "") + "^" + (.source // "unknown"))' "$all_components" > "$unique_components"; then - log_success "Deduplication completed with source preservation" - else - log_error "Failed to deduplicate components" - exit 1 - fi - - # Create final merged SBOM - log_info "Assembling final merged SBOM with source tracking..." - if echo "$merged_metadata" | jq --slurpfile comps "$unique_components" '. + {components: $comps[0]}' > "$output_file"; then - log_success "Final SBOM assembled with source tracking" - else - log_error "Failed to assemble final SBOM" - exit 1 - fi - - # Validate the merged SBOM - if ! jq . "$output_file" > /dev/null 2>&1; then - log_error "Generated merged SBOM is not valid JSON" - exit 1 - fi - - # Final validation that it's proper CycloneDX - if ! jq -e '.bomFormat == "CycloneDX"' "$output_file" > /dev/null 2>&1; then - log_error "Generated merged SBOM does not have proper CycloneDX format" - exit 1 - fi - - local component_count - component_count=$(jq '.components | length' "$output_file") - - log_success "Successfully merged ${#input_files[@]} SBOMs into one with $component_count unique components (with source tracking)" - - # Show a summary of what was merged with source information - log_info "Merge summary with source tracking:" - for sbom_file in "${input_files[@]}"; do - local source_ref - source_ref=$(extract_sbom_source_reference "$sbom_file" "$(basename "$sbom_file")") - local fname=$(basename "$sbom_file" .json) - local comp_count - comp_count=$(jq '.components | length' "$sbom_file" 2>/dev/null) || comp_count=0 - log_info " - $fname: $comp_count components (source: $source_ref)" - done -} diff --git a/lib/sbom-processing.sh b/lib/sbom-processing.sh deleted file mode 100644 index 2f3378d..0000000 --- a/lib/sbom-processing.sh +++ /dev/null @@ -1,152 +0,0 @@ -#!/bin/bash -# SBOM format detection, conversion, and processing - -source "$(dirname "${BASH_SOURCE[0]}")/common.sh" - -# Fix SPDX compatibility issues for CycloneDX conversion -fix_spdx_compatibility() { - local input_file="$1" - local output_file="$2" - - log_info "Fixing SPDX compatibility issues for CycloneDX conversion" - - # Fix referenceCategory values that CycloneDX doesn't recognize - # Based on SPDX 2.2 spec, valid values are: SECURITY, PACKAGE_MANAGER, PERSISTENT_ID, OTHER - if jq ' - walk( - if type == "object" and has("referenceCategory") then - .referenceCategory = ( - if .referenceCategory == "PACKAGE-MANAGER" then "PACKAGE_MANAGER" - elif .referenceCategory == "SECURITY" then "SECURITY" - elif .referenceCategory == "PERSISTENT_ID" then "PERSISTENT_ID" - elif .referenceCategory == "OTHER" then "OTHER" - else "OTHER" - end - ) - else . - end - ) - ' "$input_file" > "$output_file"; then - log_success "SPDX compatibility fixes applied" - else - log_error "Failed to apply SPDX compatibility fixes" - exit 1 - fi -} - -# Extract SBOM from wrapper if needed -extract_sbom_from_wrapper() { - local input_file="$1" - local output_file="$2" - - # Check if the file has the .sbom wrapper structure - if jq -e '.sbom' "$input_file" > /dev/null 2>&1; then - log_info "Detected SBOM wrapper, extracting nested SBOM" - if jq '.sbom' "$input_file" > "$output_file"; then - log_success "SBOM extracted from wrapper" - else - log_error "Failed to extract SBOM from wrapper" - exit 1 - fi - else - log_info "No wrapper detected, using SBOM as-is" - cp "$input_file" "$output_file" - fi -} - -# Detect SBOM format -detect_sbom_format() { - local sbom_file="$1" - - if ! [[ -f "$sbom_file" ]]; then - log_error "SBOM file not found: $sbom_file" - exit 1 - fi - - # Check if it's already CycloneDX format - if jq -e '.bomFormat // .metadata.component' "$sbom_file" > /dev/null 2>&1; then - local format - format=$(jq -r '.bomFormat // "cyclonedx"' "$sbom_file" 2>/dev/null || echo "unknown") - - if [[ "$format" == "CycloneDX" ]] || jq -e '.metadata.component' "$sbom_file" > /dev/null 2>&1; then - echo "cyclonedx" - return - fi - fi - - # Check if it's SPDX format - if jq -e '.spdxVersion // .SPDXID' "$sbom_file" > /dev/null 2>&1; then - echo "spdxjson" - return - fi - - # Check if it's SWID format (basic check) - if jq -e '.SoftwareIdentity' "$sbom_file" > /dev/null 2>&1; then - echo "swid" - return - fi - - log_warning "Unable to detect SBOM format, assuming SPDX" - echo "spdxjson" -} - -# Convert SBOM to desired format -convert_sbom() { - local input_file="$1" - local output_file="$2" - local detected_format="$3" - local desired_format="$4" - - # If no desired format specified, keep original - if [[ -z "$desired_format" ]]; then - log_info "No format conversion requested, keeping original format ($detected_format)" - cp "$input_file" "$output_file" - return - fi - - # Normalize format names for comparison - local detected_lower=$(echo "$detected_format" | tr '[:upper:]' '[:lower:]') - local desired_lower=$(echo "$desired_format" | tr '[:upper:]' '[:lower:]') - - # Map detected format to CLI input format - local cli_input_format="$detected_format" - case "$detected_lower" in - "spdxjson") cli_input_format="spdxjson" ;; - "cyclonedx") cli_input_format="json" ;; - *) cli_input_format="autodetect" ;; - esac - - # If already in desired format, no conversion needed - if [[ "$detected_lower" == "$desired_lower" ]]; then - log_info "SBOM is already in the desired format ($desired_format)" - cp "$input_file" "$output_file" - return - fi - - # Perform conversion based on desired format - case "$desired_lower" in - "cyclonedx") - log_info "Converting $detected_format SBOM to CycloneDX format" - if cyclonedx convert --input-file "$input_file" --input-format "$cli_input_format" --output-version v1_6 --output-file "$output_file" --output-format json; then - log_success "SBOM converted to CycloneDX format" - else - log_error "Failed to convert SBOM to CycloneDX format" - exit 1 - fi - ;; - "spdxjson") - log_info "Converting $detected_format SBOM to SPDX format" - if cyclonedx convert --input-file "$input_file" --input-format "$cli_input_format" --output-file "$output_file" --output-format spdxjson; then - log_success "SBOM converted to SPDX format" - else - log_error "Failed to convert SBOM to SPDX format" - exit 1 - fi - ;; - *) - log_error "Unsupported target format: $desired_format" - log_error "Supported formats: cyclonedx, spdxjson" - exit 1 - ;; - esac -} diff --git a/lib/validation.sh b/lib/validation.sh deleted file mode 100644 index 397326d..0000000 --- a/lib/validation.sh +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/bash -# Environment validation functions - -source "$(dirname "${BASH_SOURCE[0]}")/common.sh" - -# Validate required environment variables -validate_env() { - local required_vars=("AWS_ACCESS_KEY_ID" "AWS_SECRET_ACCESS_KEY" "S3_BUCKET") - - # Add REPOSITORY requirement only if not in merge mode - if [[ "${MERGE:-false}" != "true" && "${SBOM_SOURCE:-}" != "mend" && "${SBOM_SOURCE:-}" != "wiz" ]]; then - required_vars+=("REPOSITORY") - fi - - for var in "${required_vars[@]}"; do - if [[ -z "${!var:-}" ]]; then - log_error "Required environment variable $var is not set" - exit 1 - fi - done - - # Validate ClickHouse configuration if any ClickHouse parameter is provided - if [[ -n "${CLICKHOUSE_URL:-}" ]]; then - local clickhouse_vars=("CLICKHOUSE_URL" "CLICKHOUSE_DATABASE" "CLICKHOUSE_USERNAME") - for var in "${clickhouse_vars[@]}"; do - if [[ -z "${!var:-}" ]]; then - log_error "If using ClickHouse, $var must be provided" - exit 1 - fi - done - log_info "ClickHouse configuration validated" - fi -} - -# Validate Mend environment variables -validate_mend_env() { - if [[ "${SBOM_SOURCE:-}" == "mend" ]]; then - local required_mend_vars=("MEND_EMAIL" "MEND_ORG_UUID" "MEND_USER_KEY" "MEND_BASE_URL") - - for var in "${required_mend_vars[@]}"; do - if [[ -z "${!var:-}" ]]; then - log_error "Required Mend environment variable $var is not set" - exit 1 - fi - done - - # Validate at least one scope is provided - if [[ -z "${MEND_PROJECT_UUID:-}" && -z "${MEND_PRODUCT_UUID:-}" ]]; then - log_error "At least one Mend scope must be provided: MEND_PROJECT_UUID or MEND_PRODUCT_UUID" - exit 1 - fi - - log_info "Mend environment validated" - fi -} - -# Validate Wiz environment variables -validate_wiz_env() { - if [[ "${SBOM_SOURCE:-}" == "wiz" ]]; then - local required_wiz_vars=("WIZ_API_ENDPOINT" "WIZ_CLIENT_ID" "WIZ_CLIENT_SECRET" "WIZ_REPORT_ID") - - for var in "${required_wiz_vars[@]}"; do - if [[ -z "${!var:-}" ]]; then - log_error "Required Wiz environment variable $var is not set" - exit 1 - fi - done - - log_info "Wiz environment validated" - fi -} diff --git a/lib/wiz.sh b/lib/wiz.sh deleted file mode 100644 index 31d0c5e..0000000 --- a/lib/wiz.sh +++ /dev/null @@ -1,326 +0,0 @@ -#!/bin/bash -# Wiz API integration for SBOM downloads - -source "$(dirname "${BASH_SOURCE[0]}")/common.sh" - -# Authenticate with Wiz API and get access token -authenticate_wiz() { - log_info "Authenticating with Wiz API" - - # Get access token - local auth_response - if auth_response=$(curl -s \ - -X POST \ - -H "Content-Type: application/x-www-form-urlencoded" \ - -H "Accept: application/json" \ - --data grant_type=client_credentials \ - --data client_id="$WIZ_CLIENT_ID" \ - --data client_secret="$WIZ_CLIENT_SECRET" \ - --data audience=wiz-api \ - "$WIZ_AUTH_ENDPOINT"); then - - # Extract access token - local access_token - if access_token=$(echo "$auth_response" | jq -r '.access_token // empty'); then - if [[ -n "$access_token" && "$access_token" != "null" ]]; then - log_success "Wiz authentication successful" - WIZ_ACCESS_TOKEN="$access_token" - return 0 - else - log_error "Failed to extract access token from response" - log_error "Response: $auth_response" - exit 1 - fi - else - log_error "Failed to parse authentication response" - log_error "Response: $auth_response" - exit 1 - fi - else - log_error "Failed to authenticate with Wiz API" - log_error "Check your API endpoint, client ID, and client secret" - exit 1 - fi -} - -# Download SBOM report from Wiz using GraphQL -download_wiz_report() { - local output_file="$1" - - log_info "Downloading Wiz report: $WIZ_REPORT_ID" - - # Authenticate first - authenticate_wiz - - # Prepare GraphQL query - local graphql_query=$(cat <<'EOF' -{ - "query": "query ReportDownloadUrl($reportId: ID!) { report(id: $reportId) { lastRun { url } } }", - "variables": { - "reportId": "%s" - } -} -EOF -) - - # Format the query with the actual report ID - local formatted_query - formatted_query=$(printf "$graphql_query" "$WIZ_REPORT_ID") - - log_debug "GraphQL query: $formatted_query" - - # Execute GraphQL query to get download URL - local graphql_response - if graphql_response=$(curl -s \ - -X POST \ - -H "Authorization: Bearer $WIZ_ACCESS_TOKEN" \ - -H "Content-Type: application/json" \ - -H "Accept: application/json" \ - --data "$formatted_query" \ - "$WIZ_API_ENDPOINT/api/graphql"); then - - log_debug "GraphQL response: $graphql_response" - - # Check for GraphQL errors - if echo "$graphql_response" | jq -e '.errors' > /dev/null 2>&1; then - local error_message - error_message=$(echo "$graphql_response" | jq -r '.errors[0].message // "Unknown GraphQL error"') - log_error "Wiz GraphQL error: $error_message" - exit 1 - fi - - # Extract download URL - local download_url - if download_url=$(echo "$graphql_response" | jq -r '.data.report.lastRun.url // empty'); then - if [[ -n "$download_url" && "$download_url" != "null" ]]; then - log_info "Got download URL from Wiz" - log_debug "Download URL: $download_url" - - # Download the report from the URL - download_wiz_report_from_url "$download_url" "$output_file" - return 0 - else - log_error "No download URL found in response" - log_error "This could mean:" - log_error " - Report ID does not exist" - log_error " - Report has no completed runs" - log_error " - Report URL has expired" - log_error "Response: $graphql_response" - exit 1 - fi - else - log_error "Failed to parse GraphQL response" - log_error "Response: $graphql_response" - exit 1 - fi - else - log_error "Failed to execute GraphQL query" - exit 1 - fi -} - -# Download the report from the provided URL -download_wiz_report_from_url() { - local download_url="$1" - local output_file="$2" - - log_info "Downloading Wiz report from URL" - - # Create temporary file for the raw download - local temp_download="$temp_dir/wiz_raw_download" - - # Download the report file from the provided URL - if curl -L \ - --max-time 300 \ - --connect-timeout 30 \ - --retry 3 \ - --retry-delay 5 \ - --silent \ - --show-error \ - -H "Authorization: Bearer $WIZ_ACCESS_TOKEN" \ - -H "Accept: application/json" \ - "$download_url" \ - -o "$temp_download"; then - - # Verify the download - if [[ -f "$temp_download" && -s "$temp_download" ]]; then - local file_size - file_size=$(du -h "$temp_download" | cut -f1) - log_success "Wiz report downloaded successfully ($file_size)" - - # Detect file type and handle compression - local file_type - file_type=$(file -b "$temp_download" 2>/dev/null || echo "unknown") - log_info "Downloaded file type: $file_type" - - # Handle different file types - if [[ "$file_type" =~ "gzip compressed" ]] || [[ "$file_type" =~ "gzip" ]]; then - log_info "File is gzip compressed, decompressing..." - if gunzip -c "$temp_download" > "$output_file"; then - log_success "File decompressed successfully" - else - log_error "Failed to decompress gzip file" - exit 1 - fi - elif [[ "$file_type" =~ "Zip archive" ]] || [[ "$file_type" =~ "zip" ]] || head -c 2 "$temp_download" | xxd | grep -q "504b"; then - log_info "File is ZIP archive, extracting..." - - # Create extraction directory - local extract_dir="$temp_dir/wiz_extract" - mkdir -p "$extract_dir" - - # Extract the ZIP file - if unzip -q "$temp_download" -d "$extract_dir"; then - log_success "ZIP file extracted successfully" - - # Debug: Show what was extracted - log_debug "Extracted files:" - if [[ "${DEBUG:-false}" == "true" ]]; then - find "$extract_dir" -type f | while read -r file; do - log_debug " - $(basename "$file") ($(file -b "$file" 2>/dev/null || echo "unknown type"))" - done - fi - - # Find JSON files in the extracted content - local json_files - json_files=$(find "$extract_dir" -name "*.json" -type f) - - if [[ -n "$json_files" ]]; then - local json_count - json_count=$(echo "$json_files" | wc -l) - log_info "Found $json_count JSON files in ZIP archive" - - if [[ $json_count -eq 1 ]]; then - # Single JSON file - just copy it - local json_file - json_file=$(echo "$json_files" | head -1) - log_info "Single JSON file: $(basename "$json_file")" - - if cp "$json_file" "$output_file"; then - log_success "JSON file extracted and copied successfully" - else - log_error "Failed to copy extracted JSON file" - exit 1 - fi - else - # Multiple JSON files - merge them using existing function - log_info "Multiple JSON files found, merging CycloneDX SBOMs..." - - # Validate all are CycloneDX SBOMs - local cyclonedx_files=() - while IFS= read -r json_file; do - if [[ -f "$json_file" ]]; then - # Check if it's valid JSON first - if jq empty "$json_file" >/dev/null 2>&1; then - # Check if it's CycloneDX - local bom_format - bom_format=$(jq -r '.bomFormat // "missing"' "$json_file" 2>/dev/null) - - if [[ "$bom_format" == "CycloneDX" ]] || jq -e '.metadata.component' "$json_file" >/dev/null 2>&1; then - cyclonedx_files+=("$json_file") - log_debug " โœ“ $(basename "$json_file") is valid CycloneDX" - else - log_warning " โš  $(basename "$json_file") is not CycloneDX (format: $bom_format)" - fi - else - log_warning " โš  $(basename "$json_file") is not valid JSON" - fi - fi - done <<< "$json_files" - - if [[ ${#cyclonedx_files[@]} -eq 0 ]]; then - log_error "No valid CycloneDX SBOMs found in ZIP archive" - exit 1 - elif [[ ${#cyclonedx_files[@]} -eq 1 ]]; then - # Only one valid CycloneDX file found - log_info "Only one valid CycloneDX SBOM found, copying it" - if cp "${cyclonedx_files[0]}" "$output_file"; then - log_success "CycloneDX SBOM copied successfully" - else - log_error "Failed to copy CycloneDX SBOM" - exit 1 - fi - else - # Multiple valid CycloneDX files - merge them - log_info "Merging ${#cyclonedx_files[@]} CycloneDX SBOMs..." - merge_local_cyclonedx_sboms "${cyclonedx_files[@]}" "$output_file" - fi - fi - else - log_error "No JSON files found in extracted ZIP" - log_info "Looking for any files that might be JSON (without .json extension):" - - # Try to find files that might be JSON by content - local potential_json_files=() - while IFS= read -r -d '' file; do - if [[ -f "$file" && -s "$file" ]]; then - # Check if file content looks like JSON - if head -c 1 "$file" | grep -q '[{\[]'; then - potential_json_files+=("$file") - log_info " - $(basename "$file") might be JSON (starts with { or [)" - fi - fi - done < <(find "$extract_dir" -type f -print0) - - if [[ ${#potential_json_files[@]} -gt 0 ]]; then - log_info "Trying first potential JSON file: $(basename "${potential_json_files[0]}")" - if cp "${potential_json_files[0]}" "$output_file"; then - log_success "Potential JSON file copied successfully" - else - log_error "Failed to copy potential JSON file" - exit 1 - fi - else - log_error "No JSON or JSON-like files found in ZIP archive" - log_info "All extracted files:" - find "$extract_dir" -type f -exec basename {} \; | sort - exit 1 - fi - fi - - # Cleanup extraction directory - rm -rf "$extract_dir" - else - log_error "Failed to extract ZIP file" - exit 1 - fi - else - # Assume it's already a JSON file - log_info "File appears to be uncompressed, copying as-is..." - if cp "$temp_download" "$output_file"; then - log_success "File copied successfully" - else - log_error "Failed to copy file" - exit 1 - fi - fi - - # Cleanup temp download - rm -f "$temp_download" - - # Validate JSON format - if jq . "$output_file" > /dev/null 2>&1; then - log_success "Downloaded report is valid JSON" - - # Log some basic info about the report - local report_info - if report_info=$(jq -r '.bomFormat // .spdxVersion // .reportType // "unknown"' "$output_file" 2>/dev/null); then - log_info "Report format detected: $report_info" - fi - - return 0 - else - log_error "Downloaded file is not valid JSON after processing" - log_error "Content preview:" - head -n 5 "$output_file" - exit 1 - fi - else - log_error "Downloaded file is empty or missing" - exit 1 - fi - else - log_error "Failed to download Wiz report from URL" - exit 1 - fi -} diff --git a/pkg/logger/logger.go b/pkg/logger/logger.go new file mode 100644 index 0000000..a7d1908 --- /dev/null +++ b/pkg/logger/logger.go @@ -0,0 +1,64 @@ +// Package logger provides a simple logging interface. +package logger + +import ( + "fmt" + "log" + "os" +) + +// Logger is a simple logger with different log levels. +type Logger struct { + debug bool +} + +var defaultLogger *Logger + +func init() { + defaultLogger = &Logger{ + debug: os.Getenv("DEBUG") == "true", + } +} + +// SetDebug enables or disables debug logging. +func SetDebug(debug bool) { + defaultLogger.debug = debug +} + +// Debug logs the message as debug information. +func Debug(format string, args ...interface{}) { + if defaultLogger.debug { + msg := fmt.Sprintf(format, args...) + log.Printf("\033[0;33m[DEBUG]\033[0m %s", msg) + } +} + +// Info logs the message as informational. +func Info(format string, args ...interface{}) { + msg := fmt.Sprintf(format, args...) + log.Printf("\033[0;34m[INFO]\033[0m %s", msg) +} + +// Success logs the message as a success. +func Success(format string, args ...interface{}) { + msg := fmt.Sprintf(format, args...) + log.Printf("\033[0;32m[SUCCESS]\033[0m %s", msg) +} + +// Warning logs the message as a warning. +func Warning(format string, args ...interface{}) { + msg := fmt.Sprintf(format, args...) + log.Printf("\033[1;33m[WARNING]\033[0m %s", msg) +} + +// Error logs the message as an error. +func Error(format string, args ...interface{}) { + msg := fmt.Sprintf(format, args...) + log.Printf("\033[0;31m[ERROR]\033[0m %s", msg) +} + +// Fatal logs the message and exits the program. +func Fatal(format string, args ...interface{}) { + msg := fmt.Sprintf(format, args...) + log.Fatalf("\033[0;31m[ERROR]\033[0m %s", msg) +} diff --git a/run-tests.sh b/run-tests.sh deleted file mode 100755 index 31d4b53..0000000 --- a/run-tests.sh +++ /dev/null @@ -1,311 +0,0 @@ -#!/bin/bash - -# run-tests.sh - Enhanced test runner for your BATS tests - -set -euo pipefail - -# Colors for output -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -BLUE='\033[0;34m' -NC='\033[0m' # No Color - -# Print usage -usage() { - echo "Usage: $0 [OPTIONS] [TEST_FILE]" - echo "" - echo "Options:" - echo " -h, --help Show this help message" - echo " -v, --verbose Run tests with verbose output" - echo " -s, --simple Run only simple tests" - echo " -a, --advanced Run only advanced tests" - echo " -f, --filter Filter tests by pattern" - echo " --setup Check test setup and dependencies" - echo " --list List available test files" - echo "" - echo "Examples:" - echo " $0 # Run all tests" - echo " $0 -s # Run simple tests only" - echo " $0 -a # Run advanced tests only" - echo " $0 -v # Run with verbose output" - echo " $0 -f 'log_info' # Run tests matching 'log_info'" - echo " $0 test/simple.bats # Run specific test file" -} - -# Check if BATS is installed -check_bats() { - if ! command -v bats >/dev/null 2>&1; then - echo -e "${RED}โŒ BATS is not installed!${NC}" - echo "Run ./setup-bats.sh to install BATS first." - exit 1 - fi - - echo -e "${GREEN}โœ… BATS is installed:${NC} $(bats --version)" -} - -# Check if required dependencies are available -check_dependencies() { - echo -e "${BLUE}๐Ÿ” Checking dependencies...${NC}" - - local missing_deps=() - - # Check for jq (used by your script) - if ! command -v jq >/dev/null 2>&1; then - missing_deps+=("jq") - fi - - # Check for basic Unix tools - for tool in sed awk grep; do - if ! command -v "$tool" >/dev/null 2>&1; then - missing_deps+=("$tool") - fi - done - - if [[ ${#missing_deps[@]} -gt 0 ]]; then - echo -e "${YELLOW}โš ๏ธ Missing dependencies:${NC} ${missing_deps[*]}" - echo "Install them with your package manager (e.g., apt install jq)" - return 1 - else - echo -e "${GREEN}โœ… All dependencies available${NC}" - return 0 - fi -} - -# List available test files -list_tests() { - echo -e "${BLUE}๐Ÿ“‹ Available test files:${NC}" - - if [[ -d test ]]; then - local test_files - test_files=$(find test -name "*.bats" 2>/dev/null || true) - - if [[ -n "$test_files" ]]; then - while IFS= read -r file; do - local test_count - test_count=$(grep -c "^@test" "$file" 2>/dev/null || echo "0") - echo " ๐Ÿ“ $file ($test_count tests)" - - # Show test names - if [[ "$test_count" -gt 0 ]]; then - grep "^@test" "$file" | sed 's/@test / - /' | sed 's/ {$//' - fi - echo "" - done <<< "$test_files" - else - echo " (No test files found in test/ directory)" - fi - else - echo " (No test/ directory found)" - fi -} - -# Run setup checks -setup_check() { - echo -e "${BLUE}๐Ÿ”ง Checking test setup...${NC}" - - check_bats - check_dependencies - - # Check if entrypoint.sh exists and is readable - if [[ -f "entrypoint.sh" ]]; then - echo -e "${GREEN}โœ… entrypoint.sh found${NC}" - - # Check if it's executable - if [[ -x "entrypoint.sh" ]]; then - echo -e "${GREEN}โœ… entrypoint.sh is executable${NC}" - else - echo -e "${YELLOW}โš ๏ธ entrypoint.sh is not executable${NC}" - echo "Run: chmod +x entrypoint.sh" - fi - - # Basic syntax check - if bash -n entrypoint.sh; then - echo -e "${GREEN}โœ… entrypoint.sh syntax is valid${NC}" - else - echo -e "${RED}โŒ entrypoint.sh has syntax errors${NC}" - return 1 - fi - else - echo -e "${RED}โŒ entrypoint.sh not found${NC}" - echo "Make sure you're running this from the directory containing entrypoint.sh" - return 1 - fi - - # Check test directory - if [[ -d test ]]; then - echo -e "${GREEN}โœ… test/ directory found${NC}" - local bats_files - bats_files=$(find test -name "*.bats" | wc -l) - echo -e "${GREEN}โœ… Found $bats_files BATS test files${NC}" - else - echo -e "${YELLOW}โš ๏ธ test/ directory not found${NC}" - echo "Create it with: mkdir test" - fi - - echo -e "${GREEN}๐ŸŽ‰ Setup check complete!${NC}" -} - -# Run BATS tests with options -run_bats() { - local bats_args=() - local test_files=() - - # Add verbose flag if requested - if [[ "${VERBOSE:-false}" == "true" ]]; then - bats_args+=("--verbose-run") - fi - - # Add filter if provided - if [[ -n "${FILTER:-}" ]]; then - bats_args+=("--filter" "$FILTER") - fi - - # Determine which tests to run - if [[ "${SIMPLE_ONLY:-false}" == "true" ]]; then - test_files=("test/simple.bats") - elif [[ "${ADVANCED_ONLY:-false}" == "true" ]]; then - test_files=("test/advanced.bats") - elif [[ -n "${SPECIFIC_FILE:-}" ]]; then - test_files=("$SPECIFIC_FILE") - else - # Run all test files - if [[ -d test ]]; then - mapfile -t test_files < <(find test -name "*.bats" | sort) - fi - fi - - # Check if we have any test files - if [[ ${#test_files[@]} -eq 0 ]]; then - echo -e "${YELLOW}โš ๏ธ No test files found to run${NC}" - echo "Available options:" - echo " - Create test/simple.bats for simple tests" - echo " - Create test/advanced.bats for advanced tests" - echo " - Run ./setup-bats.sh to set up basic test structure" - return 1 - fi - - # Run the tests - echo -e "${BLUE}๐Ÿงช Running BATS tests...${NC}" - echo "Test files: ${test_files[*]}" - echo "BATS args: ${bats_args[*]}" - echo "" - - local exit_code=0 - local failed_files=() - - for test_file in "${test_files[@]}"; do - if [[ -f "$test_file" ]]; then - echo -e "${BLUE}๐Ÿ“ Running $test_file...${NC}" - if bats "${bats_args[@]}" "$test_file"; then - echo -e "${GREEN}โœ… $test_file passed${NC}" - else - echo -e "${RED}โŒ $test_file failed${NC}" - failed_files+=("$test_file") - exit_code=1 - fi - echo "" - else - echo -e "${YELLOW}โš ๏ธ Test file not found: $test_file${NC}" - exit_code=1 - fi - done - - # Summary - if [[ $exit_code -eq 0 ]]; then - echo -e "${GREEN}๐ŸŽ‰ All tests passed!${NC}" - else - echo -e "${RED}๐Ÿ’ฅ Some tests failed:${NC}" - for file in "${failed_files[@]}"; do - echo " - $file" - done - echo "" - echo -e "${YELLOW}๐Ÿ’ก Debugging tips:${NC}" - echo " - Run with -v for verbose output" - echo " - Check test setup with --setup" - echo " - Run individual test files to isolate issues" - fi - - return $exit_code -} - -# Parse command line arguments -VERBOSE=false -SIMPLE_ONLY=false -ADVANCED_ONLY=false -FILTER="" -SPECIFIC_FILE="" -SETUP_ONLY=false -LIST_ONLY=false - -echo "Starting argument parsing with $# arguments: $*" - -while [[ $# -gt 0 ]]; do - echo "Processing argument: '$1'" - case $1 in - -h|--help) - echo "Help requested" - usage - exit 0 - ;; - -v|--verbose) - echo "Setting VERBOSE=true" - VERBOSE=true - shift - ;; - -s|--simple) - echo "Setting SIMPLE_ONLY=true" - SIMPLE_ONLY=true - shift - ;; - -a|--advanced) - echo "Setting ADVANCED_ONLY=true" - ADVANCED_ONLY=true - shift - ;; - -f|--filter) - echo "Setting FILTER=$2" - FILTER="$2" - shift 2 - ;; - --setup) - echo "Setting SETUP_ONLY=true" - SETUP_ONLY=true - shift - ;; - --list) - echo "Setting LIST_ONLY=true" - LIST_ONLY=true - shift - ;; - *.bats) - echo "Setting SPECIFIC_FILE=$1" - SPECIFIC_FILE="$1" - shift - ;; - *) - echo "Unknown option: $1" - usage - exit 1 - ;; - esac -done - -# Handle special modes first -if [[ "$SETUP_ONLY" == "true" ]]; then - echo "Running setup check..." - setup_check - exit $? -fi - -if [[ "$LIST_ONLY" == "true" ]]; then - echo "Listing tests..." - list_tests - exit 0 -fi - -# Check basic setup first -check_bats - -# Run the tests -run_bats \ No newline at end of file diff --git a/setup-bats.sh b/setup-bats.sh deleted file mode 100755 index 4e8a651..0000000 --- a/setup-bats.sh +++ /dev/null @@ -1,112 +0,0 @@ -#!/bin/bash - -# setup-bats.sh - Install and configure BATS for testing - -set -euo pipefail - -# Colors for output -GREEN='\033[0;32m' -BLUE='\033[0;34m' -YELLOW='\033[1;33m' -NC='\033[0m' # No Color - -echo -e "${BLUE}Setting up BATS testing framework...${NC}" - -# Create test directory structure -mkdir -p test - -# Check if bats is already installed -if command -v bats >/dev/null 2>&1; then - echo -e "${GREEN}BATS is already installed!${NC}" - bats --version -else - echo -e "${YELLOW}Installing BATS...${NC}" - - # Install BATS based on the operating system - if [[ "$OSTYPE" == "linux-gnu"* ]]; then - # Linux - try different package managers - if command -v apt-get >/dev/null 2>&1; then - echo "Installing via apt..." - sudo apt-get update && sudo apt-get install -y bats - elif command -v yum >/dev/null 2>&1; then - echo "Installing via yum..." - sudo yum install -y bats - elif command -v dnf >/dev/null 2>&1; then - echo "Installing via dnf..." - sudo dnf install -y bats - else - echo "Installing from source..." - git clone https://github.com/bats-core/bats-core.git /tmp/bats-core - cd /tmp/bats-core - sudo ./install.sh /usr/local - cd - - rm -rf /tmp/bats-core - fi - elif [[ "$OSTYPE" == "darwin"* ]]; then - # macOS - if command -v brew >/dev/null 2>&1; then - echo "Installing via Homebrew..." - brew install bats-core - else - echo "Please install Homebrew first, then run: brew install bats-core" - exit 1 - fi - else - echo "Unsupported OS. Please install BATS manually." - echo "Visit: https://github.com/bats-core/bats-core" - exit 1 - fi -fi - -# Verify installation -if command -v bats >/dev/null 2>&1; then - echo -e "${GREEN}BATS installation verified!${NC}" - bats --version -else - echo "BATS installation failed. Please install manually." - exit 1 -fi - -# Create a basic test file if it doesn't exist -if [[ ! -f "test/entrypoint.bats" ]]; then - echo -e "${BLUE}Creating basic test file...${NC}" - # The test file content would go here, but since we already created it above, - # we'll just create a placeholder or copy the content - cat > test/entrypoint.bats << 'EOF' -#!/usr/bin/env bats - -# Basic test to verify BATS is working -@test "basic test - addition" { - result="$((2 + 2))" - [ "$result" -eq 4 ] -} - -# Add more tests here... -EOF -fi - -# Create a test runner script only if it doesn't exist -if [[ ! -f "run-tests.sh" ]]; then - # Create a test runner script - cat > run-tests.sh << 'EOF' -#!/bin/bash - -# run-tests.sh - Test runner script - -set -euo pipefail - -echo "Running BATS tests..." - -# Run all tests in the test directory -if bats test/*.bats; then - echo "โœ… All tests passed!" -else - echo "โŒ Some tests failed!" - exit 1 -fi -EOF - - chmod +x run-tests.sh -else - echo -e "${YELLOW}Test runner script already exists: run-tests.sh${NC}" -fi diff --git a/test/advanced.bats b/test/advanced.bats deleted file mode 100644 index 2616343..0000000 --- a/test/advanced.bats +++ /dev/null @@ -1,2326 +0,0 @@ -#!/usr/bin/env bats - -# test/advanced_tests.bats -# Advanced BATS tests for entrypoint.sh - -# Setup function runs before each test -setup() { - # Get the directory where this test is located - export BATS_TEST_DIRNAME="$(cd "$(dirname "$BATS_TEST_FILENAME")" && pwd)" - export PROJECT_ROOT="$(dirname "$BATS_TEST_DIRNAME")" - - # Create a temporary test script that sources functions without executing main - export TEST_SCRIPT="$BATS_TEST_TMPDIR/test_entrypoint.sh" - - # Extract only the functions from entrypoint.sh (everything before main function call) - sed '/^# Run main function/,$d' "$PROJECT_ROOT/entrypoint.sh" > "$TEST_SCRIPT" - - # Replace the source line in the extracted script - sed -i "s|source \"\$SCRIPT_DIR/lib/sanitize.sh\"|source \"$PROJECT_ROOT/lib/sanitize.sh\"|" "$TEST_SCRIPT" - sed -i "s|source \"\$SCRIPT_DIR/lib/common.sh\"|source \"$PROJECT_ROOT/lib/common.sh\"|" "$TEST_SCRIPT" - sed -i "s|source \"\$SCRIPT_DIR/lib/validation.sh\"|source \"$PROJECT_ROOT/lib/validation.sh\"|" "$TEST_SCRIPT" - sed -i "s|source \"\$SCRIPT_DIR/lib/github.sh\"|source \"$PROJECT_ROOT/lib/github.sh\"|" "$TEST_SCRIPT" - sed -i "s|source \"\$SCRIPT_DIR/lib/mend.sh\"|source \"$PROJECT_ROOT/lib/mend.sh\"|" "$TEST_SCRIPT" - sed -i "s|source \"\$SCRIPT_DIR/lib/wiz.sh\"|source \"$PROJECT_ROOT/lib/wiz.sh\"|" "$TEST_SCRIPT" - sed -i "s|source \"\$SCRIPT_DIR/lib/sbom-processing.sh\"|source \"$PROJECT_ROOT/lib/sbom-processing.sh\"|" "$TEST_SCRIPT" - sed -i "s|source \"\$SCRIPT_DIR/lib/sbom-merging.sh\"|source \"$PROJECT_ROOT/lib/sbom-merging.sh\"|" "$TEST_SCRIPT" - - # Source the functions - source "$TEST_SCRIPT" - - # Create a temporary directory for this test session - # BATS_TEST_TMPDIR is provided by BATS automatically - export TEST_TEMP_DIR="$BATS_TEST_TMPDIR" - - # Set up basic required environment variables - export AWS_ACCESS_KEY_ID="test-key" - export AWS_SECRET_ACCESS_KEY="test-secret" - export S3_BUCKET="test-bucket" - export REPOSITORY="test-owner/test-repo" - export GITHUB_TOKEN="test-token" - - # Create mock directory in PATH (for mocking external commands) - export MOCK_DIR="$BATS_TEST_TMPDIR/mocks" - mkdir -p "$MOCK_DIR" - - # Prepend mock directory to PATH so our mocks are found first - export PATH="$MOCK_DIR:$PATH" -} - -# Teardown function runs after each test -teardown() { - # Clean up environment variables - unset AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY S3_BUCKET REPOSITORY GITHUB_TOKEN - unset TEST_TEMP_DIR MOCK_DIR - - # BATS automatically cleans up BATS_TEST_TMPDIR, but we can do extra cleanup if needed -} - -# ============================================================================ -# TESTS WITH TEMPORARY FILES -# ============================================================================ - -# Test 1: detect_sbom_format with a temporary CycloneDX SBOM file -@test "detect_sbom_format works with temporary CycloneDX file" { - # Create a temporary CycloneDX SBOM file - local test_sbom="$TEST_TEMP_DIR/cyclonedx_test.json" - cat > "$test_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "serialNumber": "urn:uuid:test-123", - "metadata": { - "component": { - "name": "test-component", - "version": "1.0.0" - } - }, - "components": [] -} -EOF - - # Verify the file was created - [ -f "$test_sbom" ] - [ -s "$test_sbom" ] # File exists and is not empty - - # Test the function - run detect_sbom_format "$test_sbom" - - [ "$status" -eq 0 ] - [ "$output" = "cyclonedx" ] -} - -# Test 2: detect_sbom_format with a temporary SPDX SBOM file -@test "detect_sbom_format works with temporary SPDX file" { - # Create a temporary SPDX SBOM file - local test_sbom="$TEST_TEMP_DIR/spdx_test.json" - - cat > "$test_sbom" << 'EOF' -{ - "spdxVersion": "SPDX-2.2", - "SPDXID": "SPDXRef-DOCUMENT", - "name": "test-document", - "documentNamespace": "https://example.com/test", - "packages": [ - { - "SPDXID": "SPDXRef-Package", - "name": "test-package", - "versionInfo": "1.0.0" - } - ] -} -EOF - - # Verify the file was created correctly - [ -f "$test_sbom" ] - [ -s "$test_sbom" ] - - # Verify it's valid JSON - run jq . "$test_sbom" - [ "$status" -eq 0 ] - - # Test the function - run detect_sbom_format "$test_sbom" - - [ "$status" -eq 0 ] - [ "$output" = "spdxjson" ] -} - -# Test 3: extract_sbom_from_wrapper with a temporary wrapped SBOM file -@test "extract_sbom_from_wrapper handles wrapped SBOM" { - # Create a wrapped SBOM file - local wrapped_sbom="$TEST_TEMP_DIR/wrapped_sbom.json" - local extracted_sbom="$TEST_TEMP_DIR/extracted_sbom.json" - - cat > "$wrapped_sbom" << 'EOF' -{ - "status": "success", - "sbom": { - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "components": [ - { - "name": "test-component", - "version": "1.0.0" - } - ] - } -} -EOF - - # Test the extraction function - run extract_sbom_from_wrapper "$wrapped_sbom" "$extracted_sbom" - - [ "$status" -eq 0 ] - [ -f "$extracted_sbom" ] - - # Verify the extracted content is correct - local extracted_format - extracted_format=$(jq -r '.bomFormat' "$extracted_sbom") - [ "$extracted_format" = "CycloneDX" ] - - # Verify the wrapper properties are gone - run jq -e '.status' "$extracted_sbom" - [ "$status" -ne 0 ] # Should fail because .status shouldn't exist in extracted file -} - -# Test 4: extract_sbom_from_wrapper handles unwrapped SBOM -@test "extract_sbom_from_wrapper handles non-wrapped SBOM" { - # Create a non-wrapped SBOM file - local normal_sbom="$TEST_TEMP_DIR/normal_sbom.json" - local output_sbom="$TEST_TEMP_DIR/output_sbom.json" - - cat > "$normal_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "components": [] -} -EOF - - # Test the extraction function (should just copy the file) - run extract_sbom_from_wrapper "$normal_sbom" "$output_sbom" - - [ "$status" -eq 0 ] - [ -f "$output_sbom" ] - - # Files should be identical - run diff "$normal_sbom" "$output_sbom" - [ "$status" -eq 0 ] -} - -# ============================================================================ -# TESTS WITH COMMAND MOCKING -# ============================================================================ - -# Test 5: upload_to_s3 with mocked aws command -@test "upload_to_s3 calls aws s3 cp with correct parameters" { - # Create a mock aws command that logs what it was called with - cat > "$MOCK_DIR/aws" << 'EOF' -#!/bin/bash -# Mock aws command - just log the arguments and succeed -echo "aws called with: $*" >> "$BATS_TEST_TMPDIR/aws_calls.log" -exit 0 -EOF - chmod +x "$MOCK_DIR/aws" - - # Create a test file to upload - local test_file="$TEST_TEMP_DIR/test_sbom.json" - echo '{"bomFormat": "CycloneDX"}' > "$test_file" - - # Test the upload function - run upload_to_s3 "$test_file" "my-bucket" "path/to/sbom.json" - - [ "$status" -eq 0 ] - - # Verify aws was called with correct parameters - [ -f "$BATS_TEST_TMPDIR/aws_calls.log" ] - local aws_call - aws_call=$(cat "$BATS_TEST_TMPDIR/aws_calls.log") - - [[ "$aws_call" == *"s3 cp"* ]] - [[ "$aws_call" == *"$test_file"* ]] - [[ "$aws_call" == *"s3://my-bucket/path/to/sbom.json"* ]] - [[ "$aws_call" == *"--content-type"* ]] - [[ "$aws_call" == *"application/json"* ]] -} - -# Test 6: upload_to_s3 handles aws command failure -@test "upload_to_s3 handles aws command failure" { - # Create a mock aws command that fails - cat > "$MOCK_DIR/aws" << 'EOF' -#!/bin/bash -echo "AWS Error: Access denied" >&2 -exit 1 -EOF - chmod +x "$MOCK_DIR/aws" - - # Create a test file - local test_file="$TEST_TEMP_DIR/test_sbom.json" - echo '{"bomFormat": "CycloneDX"}' > "$test_file" - - # Test the upload function - should fail - run upload_to_s3 "$test_file" "my-bucket" "path/to/sbom.json" - - [ "$status" -eq 1 ] - [[ "$output" == *"Failed to upload SBOM to S3"* ]] -} - -# Test 7: download_sbom with mocked curl command -@test "download_sbom calls curl with correct GitHub API parameters" { - # Create a mock curl command - cat > "$MOCK_DIR/curl" << 'EOF' -#!/bin/bash -# Mock curl command - log the call and return fake SBOM data -echo "curl called with: $*" >> "$BATS_TEST_TMPDIR/curl_calls.log" - -# Check if this is the GitHub API call we expect -if [[ "$*" == *"api.github.com/repos"* ]] && [[ "$*" == *"dependency-graph/sbom"* ]]; then - # Find the output file from the arguments - local output_file="" - local next_is_output=false - for arg in "$@"; do - if [[ "$next_is_output" == "true" ]]; then - output_file="$arg" - break - fi - if [[ "$arg" == "-o" ]]; then - next_is_output=true - fi - done - - # Write fake SBOM data to the output file - if [[ -n "$output_file" ]]; then - cat > "$output_file" << 'SBOM_EOF' -{ - "sbom": { - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "components": [ - { - "name": "test-component", - "version": "1.0.0" - } - ] - } -} -SBOM_EOF - fi - exit 0 -else - exit 1 -fi -EOF - chmod +x "$MOCK_DIR/curl" - - # Test the download function - local output_file="$TEST_TEMP_DIR/downloaded_sbom.json" - run download_sbom "owner/repo" "$output_file" - - [ "$status" -eq 0 ] - [ -f "$output_file" ] - - # Verify curl was called correctly - [ -f "$BATS_TEST_TMPDIR/curl_calls.log" ] - local curl_call - curl_call=$(cat "$BATS_TEST_TMPDIR/curl_calls.log") - - [[ "$curl_call" == *"api.github.com/repos/owner/repo/dependency-graph/sbom"* ]] - [[ "$curl_call" == *"Authorization: Bearer $GITHUB_TOKEN"* ]] - [[ "$curl_call" == *"-o $output_file"* ]] - - # Verify the downloaded file is valid JSON - run jq . "$output_file" - [ "$status" -eq 0 ] -} - -# Test 8: download_sbom handles curl failure -@test "download_sbom handles curl failure" { - # Create a mock curl command that fails - cat > "$MOCK_DIR/curl" << 'EOF' -#!/bin/bash -echo "curl: (7) Failed to connect to api.github.com" >&2 -exit 7 -EOF - chmod +x "$MOCK_DIR/curl" - - # Test the download function - should fail - local output_file="$TEST_TEMP_DIR/failed_download.json" - run download_sbom "owner/repo" "$output_file" - - [ "$status" -eq 1 ] - [[ "$output" == *"Failed to download SBOM file"* ]] -} - -# Test 9: mock jq command for testing JSON processing -@test "mock jq command for testing JSON processing" { - # Create a mock jq that returns predictable output - cat > "$MOCK_DIR/jq" << 'EOF' -#!/bin/bash -# Mock jq command -echo "jq called with: $*" >> "$BATS_TEST_TMPDIR/jq_calls.log" - -# Handle different jq operations -case "$*" in - *".bomFormat"*) - echo "CycloneDX" - ;; - *".spdxVersion"*) - echo "SPDX-2.2" - ;; - *". | empty"*) - # JSON validation - just succeed - exit 0 - ;; - *) - # Default - just succeed - exit 0 - ;; -esac -EOF - chmod +x "$MOCK_DIR/jq" - - # Create a test file - local test_file="$TEST_TEMP_DIR/test.json" - echo '{"bomFormat": "CycloneDX"}' > "$test_file" - - # Test detect_sbom_format with our mock - run detect_sbom_format "$test_file" - - [ "$status" -eq 0 ] - [ "$output" = "cyclonedx" ] - - # Verify jq was called - [ -f "$BATS_TEST_TMPDIR/jq_calls.log" ] - local jq_call - jq_call=$(cat "$BATS_TEST_TMPDIR/jq_calls.log") - [[ "$jq_call" == *".bomFormat"* ]] -} - -# ============================================================================ -# COMPLEX SCENARIOS - COMBINING MOCKING AND TEMP FILES -# ============================================================================ - -# Test 10: full workflow simulation with mocks and temp files -@test "full workflow simulation with mocks and temp files" { - # Set up multiple mocks - - # Mock curl for downloading - cat > "$MOCK_DIR/curl" << 'EOF' -#!/bin/bash -if [[ "$*" == *"dependency-graph/sbom"* ]]; then - # Find output file - local output_file="" - local next_is_output=false - for arg in "$@"; do - if [[ "$next_is_output" == "true" ]]; then - output_file="$arg" - break - fi - if [[ "$arg" == "-o" ]]; then - next_is_output=true - fi - done - - # Create a realistic wrapped SBOM - cat > "$output_file" << 'SBOM_EOF' -{ - "sbom": { - "spdxVersion": "SPDX-2.2", - "SPDXID": "SPDXRef-DOCUMENT", - "name": "test-repo", - "packages": [ - { - "SPDXID": "SPDXRef-Package-test", - "name": "lodash", - "versionInfo": "4.17.21", - "licenseConcluded": "MIT" - } - ] - } -} -SBOM_EOF - exit 0 -fi -exit 1 -EOF - chmod +x "$MOCK_DIR/curl" - - # Mock cyclonedx convert command - cat > "$MOCK_DIR/cyclonedx" << 'EOF' -#!/bin/bash -# Mock cyclonedx convert -echo "cyclonedx called with: $*" >> "$BATS_TEST_TMPDIR/cyclonedx_calls.log" - -# Find input and output files - handle --flag value format -input_file="" -output_file="" -i=1 -while [[ $i -le $# ]]; do - case "${!i}" in - --input-file) - ((i++)) - input_file="${!i}" - ;; - --output-file) - ((i++)) - output_file="${!i}" - ;; - esac - ((i++)) -done - -echo "Mock cyclonedx: input=$input_file, output=$output_file" >> "$BATS_TEST_TMPDIR/cyclonedx_calls.log" - -# Convert SPDX to CycloneDX (simplified simulation) -if [[ -n "$output_file" ]]; then - cat > "$output_file" << 'CONVERTED_EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "components": [ - { - "name": "lodash", - "version": "4.17.21", - "licenses": [ - { - "license": { - "id": "MIT" - } - } - ] - } - ] -} -CONVERTED_EOF - echo "Mock cyclonedx: Created output file $output_file" >> "$BATS_TEST_TMPDIR/cyclonedx_calls.log" -else - echo "Mock cyclonedx: No output file specified!" >> "$BATS_TEST_TMPDIR/cyclonedx_calls.log" - exit 1 -fi -exit 0 -EOF - chmod +x "$MOCK_DIR/cyclonedx" - - # Mock aws s3 cp - cat > "$MOCK_DIR/aws" << 'EOF' -#!/bin/bash -echo "aws s3 cp successful" >> "$BATS_TEST_TMPDIR/aws_calls.log" -exit 0 -EOF - chmod +x "$MOCK_DIR/aws" - - # Set up test environment - export SBOM_FORMAT="cyclonedx" - export SBOM_SOURCE="github" - - # Create temporary files for the workflow - local original_sbom="$TEST_TEMP_DIR/original.json" - local extracted_sbom="$TEST_TEMP_DIR/extracted.json" - local converted_sbom="$TEST_TEMP_DIR/converted.json" - - # Test the workflow steps - - # Step 1: Download SBOM - run download_sbom "test/repo" "$original_sbom" - [ "$status" -eq 0 ] - [ -f "$original_sbom" ] - - # Step 2: Extract from wrapper - run extract_sbom_from_wrapper "$original_sbom" "$extracted_sbom" - [ "$status" -eq 0 ] - [ -f "$extracted_sbom" ] - - # Step 3: Detect format - run detect_sbom_format "$extracted_sbom" - [ "$status" -eq 0 ] - [ "$output" = "spdxjson" ] - - # Step 4: Convert format - run convert_sbom "$extracted_sbom" "$converted_sbom" "spdxjson" "cyclonedx" - [ "$status" -eq 0 ] - [ -f "$converted_sbom" ] - - # Step 5: Upload to S3 - run upload_to_s3 "$converted_sbom" "test-bucket" "test-key.json" - [ "$status" -eq 0 ] - - # Verify all our mocks were called - [ -f "$BATS_TEST_TMPDIR/cyclonedx_calls.log" ] - [ -f "$BATS_TEST_TMPDIR/aws_calls.log" ] - - # Verify final file format - local final_format - # Use real jq here since we want to actually check the file - final_format=$(jq -r '.bomFormat' "$converted_sbom") - [ "$final_format" = "CycloneDX" ] -} - -# ============================================================================ -# SANITIZE_INPUTS INTEGRATION TESTS -# ============================================================================ - -# Test 11: sanitize_inputs processes repository correctly -@test "sanitize_inputs processes repository correctly" { - export REPOSITORY="test-org/test-repo" - - run sanitize_inputs - [ "$status" -eq 0 ] - [[ "$output" == *"Input sanitization completed successfully"* ]] -} - -# Test 12: sanitize_inputs processes Mend email correctly -@test "sanitize_inputs processes Mend email correctly" { - export SBOM_SOURCE="mend" - export MEND_EMAIL="test@example.com" - export MEND_ORG_UUID="123e4567-e89b-12d3-a456-426614174000" - export MEND_USER_KEY="test-key" - export MEND_BASE_URL="https://api.mend.io" - export MEND_PROJECT_UUID="123e4567-e89b-12d3-a456-426614174000" - - run sanitize_inputs - [ "$status" -eq 0 ] - [[ "$output" == *"Input sanitization completed successfully"* ]] -} - -# Test 13: sanitize_inputs processes S3 bucket correctly -@test "sanitize_inputs processes S3 bucket correctly" { - export S3_BUCKET="My-Test-Bucket" - - run sanitize_inputs - [ "$status" -eq 0 ] - [[ "$output" == *"Input sanitization completed successfully"* ]] -} - -# Test 14: sanitize_inputs validates SBOM_SOURCE enum -@test "sanitize_inputs validates SBOM_SOURCE enum" { - export SBOM_SOURCE="invalid-source" - - run sanitize_inputs - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid SBOM_SOURCE: invalid-source"* ]] -} - -# Test 15: sanitize_inputs validates SBOM_FORMAT enum -@test "sanitize_inputs validates SBOM_FORMAT enum" { - export SBOM_FORMAT="invalid-format" - - run sanitize_inputs - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid SBOM_FORMAT: invalid-format"* ]] -} - -# Test 16: sanitize_inputs validates MERGE boolean -@test "sanitize_inputs validates MERGE boolean" { - export MERGE="maybe" - - run sanitize_inputs - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid MERGE value: maybe"* ]] -} - -# Test 17: sanitize_inputs processes include patterns correctly -@test "sanitize_inputs processes include patterns correctly" { - export INCLUDE=" *.json , test*.txt , file.log " - - run sanitize_inputs - [ "$status" -eq 0 ] - [[ "$output" == *"Input sanitization completed successfully"* ]] -} - -# Test 18: sanitize_inputs processes exclude patterns correctly -@test "sanitize_inputs processes exclude patterns correctly" { - export EXCLUDE="*-dev.json,*-test.json" - - run sanitize_inputs - [ "$status" -eq 0 ] - [[ "$output" == *"Input sanitization completed successfully"* ]] -} - -# Test 19: sanitize_inputs processes ClickHouse URL correctly -@test "sanitize_inputs processes ClickHouse URL correctly" { - export CLICKHOUSE_URL="https://clickhouse.example.com:8443" - - run sanitize_inputs - [ "$status" -eq 0 ] - [[ "$output" == *"Input sanitization completed successfully"* ]] -} - -# Test 20: sanitize_inputs processes multiple Mend project UUIDs -@test "sanitize_inputs processes multiple Mend project UUIDs" { - export MEND_PROJECT_UUIDS="123e4567-e89b-12d3-a456-426614174000, 456e7890-e89b-12d3-a456-426614174000" - - run sanitize_inputs - [ "$status" -eq 0 ] - [[ "$output" == *"Input sanitization completed successfully"* ]] -} - -# Test 21: sanitize_inputs processes numeric values with validation -@test "sanitize_inputs processes numeric values with validation" { - export MEND_MAX_WAIT_TIME="1800" - export MEND_POLL_INTERVAL="30" - - run sanitize_inputs - [ "$status" -eq 0 ] - [[ "$output" == *"Input sanitization completed successfully"* ]] -} - -# Test 22: sanitize_inputs rejects invalid numeric values -@test "sanitize_inputs rejects invalid numeric values" { - export MEND_MAX_WAIT_TIME="8000" # Too high - run sanitize_inputs - [ "$status" -eq 1 ] -} - -# Test 23: Diagnostic test for range checking -@test "diagnostic test for range checking" { - export MEND_MAX_WAIT_TIME="8000" # Too high (max is 7200) - export DEBUG="true" # Enable debug output - - run sanitize_inputs - - # This should fail if range checking works - [ "$status" -eq 1 ] - [[ "$output" == *"out of range"* ]] -} - -# Test 24: Test with valid value to ensure function works -@test "sanitize_inputs accepts valid numeric values" { - export MEND_MAX_WAIT_TIME="1800" # Valid (within 60-7200 range) - - run sanitize_inputs - [ "$status" -eq 0 ] - [[ "$output" == *"Input sanitization completed successfully"* ]] -} - -# Test 25: Test with value below minimum -@test "sanitize_inputs rejects value below minimum" { - export MEND_MAX_WAIT_TIME="30" # Too low (min is 60) - - run sanitize_inputs - [ "$status" -eq 1 ] - [[ "$output" == *"out of range"* ]] -} - -# Test 26: sanitize_inputs skips empty values -@test "sanitize_inputs skips empty values" { - export REPOSITORY="" - export MEND_EMAIL="" - - run sanitize_inputs - [ "$status" -eq 0 ] - # Should not contain any sanitization messages for empty values - [[ "$output" != *"Sanitized REPOSITORY:"* ]] - [[ "$output" != *"Sanitized MEND_EMAIL:"* ]] -} - -# Test 27: sanitize_inputs redacts sensitive information in logs -@test "sanitize_inputs redacts sensitive information in logs" { - export GITHUB_TOKEN="secret-token" - export AWS_ACCESS_KEY_ID="secret-key" - export AWS_SECRET_ACCESS_KEY="secret-access-key" - export CLICKHOUSE_PASSWORD="secret-password" - - run sanitize_inputs - [ "$status" -eq 0 ] - [[ "$output" == *"Input sanitization completed successfully"* ]] - - # Make sure actual values are not in the output - [[ "$output" != *"secret-token"* ]] - [[ "$output" != *"secret-key"* ]] - [[ "$output" != *"secret-access-key"* ]] - [[ "$output" != *"secret-password"* ]] -} - -# ============================================================================ -# SECURITY ATTACK VECTOR TESTS -# ============================================================================ - -# Test 28: sanitize_string prevents command injection via backticks -@test "sanitize_string prevents command injection via backticks" { - run sanitize_string "normal\`rm -rf /\`text" - [ "$status" -eq 0 ] - [[ "$output" == "normalrm -rf /text" ]] - # Should not contain backticks - [[ "$output" != *"\`"* ]] -} - -# Test 29: sanitize_string prevents command injection via dollar parentheses -@test "sanitize_string prevents command injection via dollar parentheses" { - run sanitize_string "normal\$(rm -rf /)text" - [ "$status" -eq 0 ] - [[ "$output" == "normalrm -rf /text" ]] - # Should not contain $( or ) - [[ "$output" != *"\$("* ]] - [[ "$output" != *")"* ]] -} - -# Test 30: sanitize_string prevents pipe injection -@test "sanitize_string prevents pipe injection" { - run sanitize_string "normal|rm -rf /|text" - [ "$status" -eq 0 ] - [[ "$output" == "normalrm -rf /text" ]] - # Should not contain pipes - [[ "$output" != *"|"* ]] -} - -# Test 31: sanitize_string prevents semicolon command chaining -@test "sanitize_string prevents semicolon command chaining" { - run sanitize_string "normal;rm -rf /;text" - [ "$status" -eq 0 ] - [[ "$output" == "normalrm -rf /text" ]] - # Should not contain semicolons - [[ "$output" != *";"* ]] -} - -# Test 32: sanitize_string prevents ampersand backgrounding -@test "sanitize_string prevents ampersand backgrounding" { - run sanitize_string "normal&rm -rf /&text" - [ "$status" -eq 0 ] - [[ "$output" == "normalrm -rf /text" ]] - # Should not contain ampersands - [[ "$output" != *"&"* ]] -} - -# Test 33: sanitize_string prevents redirection attacks -@test "sanitize_string prevents redirection attacks" { - run sanitize_string "normal>>/etc/passwd<"* ]] - [[ "$output" != *"<"* ]] -} - -# Test 34: sanitize_repository prevents path traversal in repository names -@test "sanitize_repository prevents path traversal in repository names" { - run sanitize_repository "../../../etc/passwd" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid repository format"* ]] -} - -# Test 35: sanitize_repository prevents null byte injection -@test "sanitize_repository prevents null byte injection" { - local test_repo=$(printf "owner/repo\000malicious") - run sanitize_repository "$test_repo" - [ "$status" -eq 0 ] - [[ "$output" == "owner/repomalicious" ]] -} - -# Test 36: sanitize_url prevents javascript protocol injection -@test "sanitize_url prevents javascript protocol injection" { - run sanitize_url "javascript:alert('xss')" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid URL format"* ]] -} - -# Test 37: sanitize_url prevents data URL injection -@test "sanitize_url prevents data URL injection" { - run sanitize_url "data:text/html," - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid URL format"* ]] -} - -# Test 38: sanitize_url prevents file protocol access -@test "sanitize_url prevents file protocol access" { - run sanitize_url "file:///etc/passwd" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid URL format"* ]] -} - -# Test 39: sanitize_s3_key prevents directory traversal -@test "sanitize_s3_key prevents directory traversal" { - run sanitize_s3_key "../../../../etc/passwd" - [ "$status" -eq 0 ] - [[ "$output" == "etc/passwd" ]] - # Should not contain ../ sequences - [[ "$output" != *".."* ]] -} - -# Test 40: sanitize_s3_key prevents null byte injection -@test "sanitize_s3_key prevents null byte file injection" { - local test_key=$(printf "file.json\000.sh") - run sanitize_s3_key "$test_key" - [ "$status" -eq 0 ] - [[ "$output" == "file.json.sh" ]] -} - -# Test 41: sanitize_email prevents email header injection -@test "sanitize_email prevents header injection" { - run sanitize_email "user@example.com\nBcc: admin@evil.com" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid email format"* ]] -} - -# Test 42: sanitize_email prevents SQL injection attempts -@test "sanitize_database_name prevents SQL injection attempts" { - run sanitize_database_name "test'; DROP TABLE users; --" - [ "$status" -eq 0 ] - [[ "$output" == "testDROPTABLEusers" ]] -} - -# ============================================================================ -# UNICODE AND ENCODING EDGE CASES -# ============================================================================ - -# Test 43: sanitize_string handles unicode characters -@test "sanitize_string handles unicode characters" { - run sanitize_string "test-รผรฑรญรงรธdรฉ-string" - [ "$status" -eq 0 ] - # Should remove non-ASCII characters - [[ "$output" == "test-d-string" ]] -} - -# Test 44: sanitize_string handles mixed encoding -@test "sanitize_string handles mixed encoding" { - # Test with mixed ASCII and control characters - local mixed_string=$(printf "test\x1b[31mred\x1b[0mnormal") - run sanitize_string "$mixed_string" - [ "$status" -eq 0 ] - [[ "$output" == "test31mred0mnormal" ]] -} - -# Test 45: sanitize_repository handles locales with special characters -@test "sanitize_repository handles locales with special characters" { - # Note: This should fail validation as our regex is ASCII-only - run sanitize_repository "รผser/repรถ" - [ "$status" -eq 0 ] - [[ "$output" == "ser/rep" ]] -} - -# Test 46: sanitize_url handles internationalized domain names -@test "sanitize_url handles internationalized domain names" { - # Test with punycode (internationalized domain) - run sanitize_url "https://xn--n3h.com" - [ "$status" -eq 0 ] - [[ "$output" == "https://xn--n3h.com" ]] -} - -# Test 47: sanitize_email handles unicode in email addresses -@test "sanitize_email handles unicode in email addresses" { - # Should remove unicode characters - run sanitize_email "รผser@example.com" - [ "$status" -eq 0 ] - [[ "$output" == "ser@example.com" ]] -} - -# ============================================================================ -# BOUNDARY CONDITION TESTS -# ============================================================================ - -# Test 48: sanitize_string handles empty string -@test "sanitize_string handles empty string" { - run sanitize_string "" - [ "$status" -eq 0 ] - [[ "$output" == "" ]] -} - -# Test 49: sanitize_string handles very long string -@test "sanitize_string handles very long string" { - local long_string=$(printf 'a%.0s' {1..10000}) - run sanitize_string "$long_string" 1000 - [ "$status" -eq 0 ] - [ "${#output}" -eq 1000 ] - [[ "$output" == "$(printf 'a%.0s' {1..1000})" ]] -} - -# Test 50: sanitize_string handles string with only dangerous characters -@test "sanitize_string handles string with only dangerous characters" { - run sanitize_string "\$\`(){}|;&<>" - [ "$status" -eq 0 ] - [[ "$output" == "" ]] -} - -# Test 51: sanitize_repository handles minimum valid length -@test "sanitize_repository handles minimum valid length" { - run sanitize_repository "a/b" - [ "$status" -eq 0 ] - [[ "$output" == "a/b" ]] -} - -# Test 52: sanitize_repository handles maximum practical length -@test "sanitize_repository handles maximum practical length" { - # GitHub has limits, but test with reasonable long names - local long_owner=$(printf 'a%.0s' {1..50}) - local long_repo=$(printf 'b%.0s' {1..50}) - run sanitize_repository "$long_owner/$long_repo" - [ "$status" -eq 0 ] - [[ "$output" == "$long_owner/$long_repo" ]] -} - -# Test 53: sanitize_s3_bucket handles minimum valid length -@test "sanitize_s3_bucket handles minimum valid length" { - run sanitize_s3_bucket "abc" - [ "$status" -eq 0 ] - [[ "$output" == "abc" ]] -} - -# Test 54: sanitize_s3_bucket handles maximum valid length -@test "sanitize_s3_bucket handles maximum valid length" { - local max_bucket=$(printf 'a%.0s' {1..63}) - run sanitize_s3_bucket "$max_bucket" - [ "$status" -eq 0 ] - [[ "$output" == "$max_bucket" ]] -} - -# Test 55: sanitize_numeric handles zero -@test "sanitize_numeric handles zero" { - run sanitize_numeric "0" "TEST_FIELD" - [ "$status" -eq 0 ] - [[ "$output" == "0" ]] -} - -# Test 56: sanitize_numeric handles leading zeros -@test "sanitize_numeric handles leading zeros" { - run sanitize_numeric "00123" "TEST_FIELD" - [ "$status" -eq 0 ] - [[ "$output" == "123" ]] -} - -# Test 57: sanitize_uuid handles minimum valid length -@test "sanitize_uuid handles minimum valid length" { - run sanitize_uuid "12345678" "TEST_UUID" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid UUID format"* ]] -} - -# ============================================================================ -# MALFORMED INPUT TESTS -# ============================================================================ - -# Test 58: sanitize_repository handles malformed repository - double slash -@test "sanitize_repository handles malformed repository - double slash" { - run sanitize_repository "owner//repo" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid repository format"* ]] -} - -# Test 59: sanitize_repository handles malformed repository - trailing slash -@test "sanitize_repository handles malformed repository - trailing slash" { - run sanitize_repository "owner/repo/" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid repository format"* ]] -} - -# Test 60: sanitize_url handles malformed URL - missing protocol -@test "sanitize_url handles malformed URL - missing protocol" { - run sanitize_url "example.com" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid URL format"* ]] -} - -# Test 61: sanitize_url handles malformed URL - double protocol -@test "sanitize_url handles malformed URL - double protocol" { - run sanitize_url "https://http://example.com" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid URL format"* ]] -} - -# Test 62: sanitize_email handles malformed email - double @ -@test "sanitize_email handles malformed email - double @" { - run sanitize_email "user@@example.com" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid email format"* ]] -} - -# Test 63: sanitize_email handles malformed email - missing domain -@test "sanitize_email handles malformed email - missing domain" { - run sanitize_email "user@" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid email format"* ]] -} - -# Test 64: sanitize_patterns handles malformed patterns - only commas -@test "sanitize_patterns handles malformed patterns - only commas" { - run sanitize_patterns ",,," - [ "$status" -eq 0 ] - [[ "$output" == "" ]] -} - -# Test 65: sanitize_patterns handles malformed patterns - mixed valid/invalid -@test "sanitize_patterns handles malformed patterns - mixed valid/invalid" { - run sanitize_patterns "*.json,\$\$\$,test*.txt" - [ "$status" -eq 0 ] - [[ "$output" == "*.json,test*.txt" ]] -} - -# ============================================================================ -# INTEGRATION TESTS WITH REALISTIC ATTACK SCENARIOS -# ============================================================================ - -# Test 66: sanitize_inputs handles comprehensive injection attempt -@test "sanitize_inputs handles comprehensive injection attempt" { - # Set up a comprehensive attack scenario - export REPOSITORY="evil\`rm -rf /\`/repo" - export MEND_EMAIL="evil@example.com; cat /etc/passwd" - export S3_BUCKET="evil-bucket\$(whoami)" - export S3_KEY="../../../etc/passwd" - export CLICKHOUSE_URL="https://evil.com/\`id\`" - export INCLUDE="*.json; rm -rf /" - export EXCLUDE="*.txt|cat /etc/passwd" - - run sanitize_inputs - [ "$status" -eq 0 ] # Handles sanitization without crashing - - # Check that dangerous characters were removed or validation failed - [[ "$output" == *"Input sanitization completed successfully"* ]] -} - -# Test 67: sanitize_inputs handles null byte injection across multiple fields -@test "sanitize_inputs handles null byte injection across multiple fields" { - # Test null byte injection in multiple fields - local null_repo=$(printf "owner/repo\000malicious") - local null_email=$(printf "user@example.com\000admin@evil.com") - local null_bucket=$(printf "bucket\000evil") - - export REPOSITORY="$null_repo" - export MEND_EMAIL="$null_email" - export S3_BUCKET="$null_bucket" - - run sanitize_inputs - [ "$status" -eq 0 ] - [[ "$output" == *"Input sanitization completed successfully"* ]] -} - -# Test 68: sanitize_inputs handles control character injection -@test "sanitize_inputs handles control character injection" { - # Test various control characters - local control_string=$(printf "test\001\002\003\004\005string") - - export REPOSITORY="owner/repo" - export GITHUB_TOKEN="$control_string" - - run sanitize_inputs - [ "$status" -eq 0 ] - - # Control characters should be removed - [[ "$output" == *"Input sanitization completed successfully"* ]] -} - -# Test 69: sanitize_inputs preserves valid complex inputs -@test "sanitize_inputs preserves valid complex inputs" { - # Test that valid complex inputs are preserved - export REPOSITORY="my-org/my-repo.name" - export MEND_EMAIL="user.name+tag@example-domain.co.uk" - export S3_BUCKET="my-test-bucket-123" - export S3_KEY="path/to/sbom-file.json" - export CLICKHOUSE_URL="https://clickhouse.example.com:8443" - export INCLUDE="*-prod.json,production-*.json" - export EXCLUDE="*-dev.json,*-test.json" - export MEND_PROJECT_UUIDS="123e4567-e89b-12d3-a456-426614174000,456e7890-e89b-12d3-a456-426614174001" - - run sanitize_inputs - [ "$status" -eq 0 ] - - # All valid inputs should be preserved - [[ "$output" == *"Input sanitization completed successfully"* ]] -} - -# ============================================================================ -# PERFORMANCE AND RESOURCE TESTS -# ============================================================================ - -# Test 70: sanitize_string handles extremely long input efficiently -@test "sanitize_string handles extremely long input efficiently" { - # Test with very long input to ensure no performance issues - local huge_string=$(printf 'a%.0s' {1..50000}) - - run sanitize_string "$huge_string" 1000 - [ "$status" -eq 0 ] - [ "${#output}" -eq 1000 ] -} - -# Test 71: sanitize_patterns handles many patterns efficiently -@test "sanitize_patterns handles many patterns efficiently" { - # Test with many patterns - local many_patterns="" - for i in {1..100}; do - many_patterns+=",pattern$i*.json" - done - many_patterns=${many_patterns:1} # Remove leading comma - - run sanitize_patterns "$many_patterns" - [ "$status" -eq 0 ] - [[ "$output" == *"pattern1*.json"* ]] - [[ "$output" == *"pattern100*.json"* ]] -} - -# Test 72: sanitize_inputs handles all fields simultaneously -@test "sanitize_inputs handles all fields simultaneously" { - # Test with all possible fields set to ensure no conflicts - export REPOSITORY="owner/repo" - export MEND_EMAIL="user@example.com" - export MEND_ORG_UUID="123e4567-e89b-12d3-a456-426614174000" - export MEND_USER_KEY="test-key" - export MEND_BASE_URL="https://api.mend.io" - export MEND_PROJECT_UUID="123e4567-e89b-12d3-a456-426614174001" - export MEND_PRODUCT_UUID="123e4567-e89b-12d3-a456-426614174002" - export MEND_ORG_SCOPE_UUID="123e4567-e89b-12d3-a456-426614174003" - export MEND_PROJECT_UUIDS="123e4567-e89b-12d3-a456-426614174004,123e4567-e89b-12d3-a456-426614174005" - export MEND_MAX_WAIT_TIME="1800" - export MEND_POLL_INTERVAL="30" - export WIZ_AUTH_ENDPOINT="https://auth.wiz.io" - export WIZ_API_ENDPOINT="https://api.wiz.io" - export WIZ_CLIENT_ID="wiz-client-id" - export WIZ_CLIENT_SECRET="wiz-client-secret" - export WIZ_REPORT_ID="wiz-report-123" - export AWS_ACCESS_KEY_ID="aws-key" - export AWS_SECRET_ACCESS_KEY="aws-secret" - export AWS_DEFAULT_REGION="us-east-1" - export S3_BUCKET="test-bucket" - export S3_KEY="test/sbom.json" - export CLICKHOUSE_URL="https://clickhouse.example.com" - export CLICKHOUSE_DATABASE="test_db" - export CLICKHOUSE_USERNAME="user" - export CLICKHOUSE_PASSWORD="pass" - export SBOM_SOURCE="github" - export SBOM_FORMAT="cyclonedx" - export MERGE="false" - export INCLUDE="*.json" - export EXCLUDE="*-test.json" - export GITHUB_TOKEN="github-token" - - run sanitize_inputs - [ "$status" -eq 0 ] - [[ "$output" == *"Input sanitization completed successfully"* ]] -} - -# ============================================================================ -# CHECK_AND_MIGRATE_TABLE TESTS -# ============================================================================ - -# Test 73: check_and_migrate_table adds missing source column -@test "check_and_migrate_table adds missing source column" { - export CLICKHOUSE_DATABASE="test_db" - # Mock curl command that simulates column doesn't exist (returns 0) - cat > "$MOCK_DIR/curl" << 'EOF' -#!/bin/bash -echo "curl called with: $*" >> "$BATS_TEST_TMPDIR/curl_calls.log" - -# Check what query is being executed -if [[ "$*" == *"system.columns"* ]] && [[ "$*" == *"name='source'"* ]]; then - # Column doesn't exist - echo "0" - exit 0 -elif [[ "$*" == *"ALTER TABLE"* ]] && [[ "$*" == *"ADD COLUMN source"* ]]; then - # ALTER TABLE succeeds - echo "ALTER TABLE executed" - exit 0 -else - # Other queries succeed - exit 0 -fi -EOF - chmod +x "$MOCK_DIR/curl" - - # Test the migration function - run check_and_migrate_table "test_table" "http://clickhouse:8123" "-u user:pass" - [ "$status" -eq 0 ] - [[ "$output" == *"source column not found, migrating table: test_table"* ]] - [[ "$output" == *"source column added to table test_table"* ]] - - # Verify curl was called correctly - [ -f "$BATS_TEST_TMPDIR/curl_calls.log" ] - local curl_calls - curl_calls=$(cat "$BATS_TEST_TMPDIR/curl_calls.log") - - # Should have been called twice: once to check, once to alter - [[ "$curl_calls" == *"system.columns"* ]] - [[ "$curl_calls" == *"ALTER TABLE"* ]] - [[ "$curl_calls" == *"ADD COLUMN source LowCardinality(String) DEFAULT 'unknown'"* ]] -} - -# Test 74: check_and_migrate_table skips migration when column exists -@test "check_and_migrate_table skips migration when column exists" { - export CLICKHOUSE_DATABASE="test_db" - # Mock curl command that simulates column exists (returns 1) - cat > "$MOCK_DIR/curl" << 'EOF' -#!/bin/bash -echo "curl called with: $*" >> "$BATS_TEST_TMPDIR/curl_calls.log" - -# Check what query is being executed -if [[ "$*" == *"system.columns"* ]] && [[ "$*" == *"name='source'"* ]]; then - # Column exists - echo "1" - exit 0 -else - # Other queries succeed - exit 0 -fi -EOF - chmod +x "$MOCK_DIR/curl" - - # Test the migration function - run check_and_migrate_table "existing_table" "http://clickhouse:8123" "-u user:pass" - [ "$status" -eq 0 ] - [[ "$output" == *"source column already exists in table existing_table"* ]] - [[ "$output" != *"migrating table"* ]] - - # Verify curl was called only once (to check) - [ -f "$BATS_TEST_TMPDIR/curl_calls.log" ] - local curl_calls - curl_calls=$(cat "$BATS_TEST_TMPDIR/curl_calls.log") - - # Should only have column check, no ALTER - [[ "$curl_calls" == *"system.columns"* ]] - [[ "$curl_calls" != *"ALTER TABLE"* ]] -} - -# Test 75: check_and_migrate_table handles column check failure -@test "check_and_migrate_table handles column check failure" { - # Mock curl command that fails on column check - cat > "$MOCK_DIR/curl" << 'EOF' -#!/bin/bash -echo "curl called with: $*" >> "$BATS_TEST_TMPDIR/curl_calls.log" - -# Check what query is being executed -if [[ "$*" == *"system.columns"* ]]; then - # Column check fails - echo "Error: Connection failed" >&2 - exit 1 -else - exit 0 -fi -EOF - chmod +x "$MOCK_DIR/curl" - - # Test the migration function - should fail - run check_and_migrate_table "test_table" "http://clickhouse:8123" "-u user:pass" - - [ "$status" -eq 1 ] - [[ "$output" == *"Failed to check column existence for table test_table"* ]] -} - -# Test 76: check_and_migrate_table handles ALTER TABLE failure -@test "check_and_migrate_table handles ALTER TABLE failure" { - export CLICKHOUSE_DATABASE="test_db" - # Mock curl command that succeeds on check but fails on ALTER - cat > "$MOCK_DIR/curl" << 'EOF' -#!/bin/bash -echo "curl called with: $*" >> "$BATS_TEST_TMPDIR/curl_calls.log" - -# Check what query is being executed -if [[ "$*" == *"system.columns"* ]] && [[ "$*" == *"name='source'"* ]]; then - # Column doesn't exist - echo "0" - exit 0 -elif [[ "$*" == *"ALTER TABLE"* ]]; then - # ALTER TABLE fails - echo "Error: ALTER failed" >&2 - exit 1 -else - exit 0 -fi -EOF - chmod +x "$MOCK_DIR/curl" - - # Test the migration function - should fail - run check_and_migrate_table "test_table" "http://clickhouse:8123" "-u user:pass" - [ "$status" -eq 1 ] - [[ "$output" == *"source column not found, migrating table: test_table"* ]] - [[ "$output" == *"Failed to add source column to table test_table"* ]] -} - -# Test 77: check_and_migrate_table uses correct database and table names -@test "check_and_migrate_table uses correct database and table names" { - export CLICKHOUSE_DATABASE="custom_db" - - # Mock curl command that captures the exact queries - cat > "$MOCK_DIR/curl" << 'EOF' -#!/bin/bash -echo "QUERY: $*" >> "$BATS_TEST_TMPDIR/detailed_calls.log" - -if [[ "$*" == *"system.columns"* ]]; then - # Column doesn't exist - echo "0" - exit 0 -elif [[ "$*" == *"ALTER TABLE"* ]]; then - # ALTER TABLE succeeds - echo "ALTER success" - exit 0 -else - exit 0 -fi -EOF - chmod +x "$MOCK_DIR/curl" - - # Test with custom database and table - run check_and_migrate_table "my_custom_table" "http://clickhouse:8123" "" - - [ "$status" -eq 0 ] - - # Verify the correct database and table names were used - [ -f "$BATS_TEST_TMPDIR/detailed_calls.log" ] - local detailed_calls - detailed_calls=$(cat "$BATS_TEST_TMPDIR/detailed_calls.log") - - # Check column query includes correct database and table - [[ "$detailed_calls" == *"database='custom_db'"* ]] - [[ "$detailed_calls" == *"table='my_custom_table'"* ]] - [[ "$detailed_calls" == *"name='source'"* ]] - - # Check ALTER query includes correct database and table - [[ "$detailed_calls" == *"ALTER TABLE custom_db.my_custom_table"* ]] - [[ "$detailed_calls" == *"ADD COLUMN source LowCardinality(String) DEFAULT 'unknown'"* ]] -} - -# Test 78: check_and_migrate_table handles authentication parameters correctly -@test "check_and_migrate_table handles authentication parameters correctly" { - export CLICKHOUSE_DATABASE="auth_db" - # Mock curl command that logs authentication - cat > "$MOCK_DIR/curl" << 'EOF' -#!/bin/bash -# Log all arguments to see auth parameters -echo "FULL_ARGS: $*" >> "$BATS_TEST_TMPDIR/auth_calls.log" - -# Extract auth parameters if present -for arg in "$@"; do - if [[ "$arg" == "-u" ]]; then - echo "AUTH_FOUND: -u" >> "$BATS_TEST_TMPDIR/auth_calls.log" - elif [[ "$arg" =~ ^user: ]]; then - echo "AUTH_CREDS: $arg" >> "$BATS_TEST_TMPDIR/auth_calls.log" - fi -done - -if [[ "$*" == *"system.columns"* ]]; then - echo "0" - exit 0 -elif [[ "$*" == *"ALTER TABLE"* ]]; then - exit 0 -else - exit 0 -fi -EOF - chmod +x "$MOCK_DIR/curl" - - # Test with authentication parameters - run check_and_migrate_table "auth_table" "http://clickhouse:8123" "-u testuser:testpass" - [ "$status" -eq 0 ] - - # Verify authentication parameters were passed correctly - [ -f "$BATS_TEST_TMPDIR/auth_calls.log" ] - local auth_calls - auth_calls=$(cat "$BATS_TEST_TMPDIR/auth_calls.log") - - [[ "$auth_calls" == *"AUTH_FOUND: -u"* ]] - [[ "$auth_calls" == *"testuser:testpass"* ]] -} - -# Test 79: check_and_migrate_table handles empty auth parameters -@test "check_and_migrate_table handles empty auth parameters" { - export CLICKHOUSE_DATABASE="no_auth_db" - # Mock curl command - cat > "$MOCK_DIR/curl" << 'EOF' -#!/bin/bash -echo "NO_AUTH_CALL: $*" >> "$BATS_TEST_TMPDIR/no_auth_calls.log" - -if [[ "$*" == *"system.columns"* ]]; then - echo "1" # Column exists - exit 0 -else - exit 0 -fi -EOF - chmod +x "$MOCK_DIR/curl" - - # Test with empty authentication - run check_and_migrate_table "no_auth_table" "http://clickhouse:8123" "" - [ "$status" -eq 0 ] - [[ "$output" == *"source column already exists"* ]] - - # Verify no auth parameters were passed - [ -f "$BATS_TEST_TMPDIR/no_auth_calls.log" ] - local no_auth_calls - no_auth_calls=$(cat "$BATS_TEST_TMPDIR/no_auth_calls.log") - - [[ "$no_auth_calls" != *"-u"* ]] -} - -# Test 80: check_and_migrate_table generates correct SQL with proper escaping -@test "check_and_migrate_table generates correct SQL with proper escaping" { - # Mock curl that captures exact SQL - cat > "$MOCK_DIR/curl" << 'EOF' -#!/bin/bash -# Capture the SQL data parameter -if [[ "$*" == *"--data"* ]]; then - # Find the --data parameter and log it - local capture_next=false - for arg in "$@"; do - if [[ "$capture_next" == "true" ]]; then - echo "SQL: $arg" >> "$BATS_TEST_TMPDIR/sql_calls.log" - capture_next=false - elif [[ "$arg" == "--data" ]]; then - capture_next=true - fi - done -fi - -if [[ "$*" == *"system.columns"* ]]; then - echo "0" # Column missing - exit 0 -elif [[ "$*" == *"ALTER TABLE"* ]]; then - exit 0 # ALTER succeeds -else - exit 0 -fi -EOF - chmod +x "$MOCK_DIR/curl" - - export CLICKHOUSE_DATABASE="test_db" - - # Test the function - run check_and_migrate_table "test_table" "http://clickhouse:8123" "" - - [ "$status" -eq 0 ] - - # Verify the SQL was generated correctly - [ -f "$BATS_TEST_TMPDIR/sql_calls.log" ] - local sql_calls - sql_calls=$(cat "$BATS_TEST_TMPDIR/sql_calls.log") - - # Check column existence query - [[ "$sql_calls" == *"SELECT COUNT(*) FROM system.columns"* ]] - [[ "$sql_calls" == *"database='test_db'"* ]] - [[ "$sql_calls" == *"table='test_table'"* ]] - [[ "$sql_calls" == *"name='source'"* ]] - - # Check ALTER TABLE query - [[ "$sql_calls" == *"ALTER TABLE test_db.test_table ADD COLUMN source LowCardinality(String) DEFAULT 'unknown'"* ]] -} - -# Test 81: check_and_migrate_table integration with setup_clickhouse_table -@test "check_and_migrate_table integrates properly with setup_clickhouse_table" { - # Mock curl for the complete workflow - cat > "$MOCK_DIR/curl" << 'EOF' -#!/bin/bash -echo "INTEGRATION_CALL: $*" >> "$BATS_TEST_TMPDIR/integration_calls.log" - -if [[ "$*" == *"SELECT 1"* ]]; then - # Connection test - echo "1" - exit 0 -elif [[ "$*" == *"system.tables"* ]]; then - # Table exists - echo "1" - exit 0 -elif [[ "$*" == *"system.columns"* ]]; then - # Column doesn't exist - echo "0" - exit 0 -elif [[ "$*" == *"ALTER TABLE"* ]]; then - # ALTER succeeds - exit 0 -else - exit 0 -fi -EOF - chmod +x "$MOCK_DIR/curl" - - # Set up environment for ClickHouse - export CLICKHOUSE_URL="http://localhost:8123" - export CLICKHOUSE_DATABASE="integration_test" - export CLICKHOUSE_USERNAME="testuser" - export CLICKHOUSE_PASSWORD="testpass" - export DEBUG="true" - - # Test setup_clickhouse_table which should call check_and_migrate_table - run setup_clickhouse_table "integration_table" - [ "$status" -eq 0 ] - [[ "$output" == *"Table integration_table already exists"* ]] - [[ "$output" == *"source column not found, migrating table"* ]] - [[ "$output" == *"source column added to table integration_table"* ]] - - # Verify the complete workflow was executed - [ -f "$BATS_TEST_TMPDIR/integration_calls.log" ] - local integration_calls - integration_calls=$(cat "$BATS_TEST_TMPDIR/integration_calls.log") - - # Should have connection test, table check, column check, and ALTER - [[ "$integration_calls" == *"SELECT 1"* ]] - [[ "$integration_calls" == *"system.tables"* ]] - [[ "$integration_calls" == *"system.columns"* ]] - [[ "$integration_calls" == *"ALTER TABLE"* ]] -} - -# ============================================================================ -# TESTS FOR extract_sbom_source_reference -# ============================================================================ - -# Test 82: extract_sbom_source_reference finds spdx document name from GitHub SBOM -@test "extract_sbom_source_reference finds spdx document name from GitHub SBOM" { - # Create a GitHub-style SBOM with spdx:document:name - local test_sbom="$TEST_TEMP_DIR/github_sbom.json" - cat > "$test_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "metadata": { - "timestamp": "2025-08-03T17:52:15Z", - "tools": [ - { - "name": "protobom-v0.0.0-20250731140552", - "version": "613e75aeb253+dirty" - }, - { - "name": "GitHub.com-Dependency", - "version": "Graph" - } - ], - "properties": [ - { - "name": "spdx:spdxid", - "value": "SPDXRef-DOCUMENT" - }, - { - "name": "spdx:document:spdx-version", - "value": "SPDX-2.2" - }, - { - "name": "spdx:document:name", - "value": "com.github.ClickHouse/clickhouse-js" - }, - { - "name": "spdx:document:document-namespace", - "value": "https://spdx.org/spdxdocs/protobom/f00b0bff-1270-4c18-aae2-8c69fab0d995" - } - ] - } -} -EOF - - # Test the function - run extract_sbom_source_reference "$test_sbom" "fallback.json" - [ "$status" -eq 0 ] - [ "$output" = "com.github.ClickHouse/clickhouse-js" ] -} - -# Test 83: extract_sbom_source_reference finds component name from Wiz SBOM -@test "extract_sbom_source_reference finds component name from Wiz SBOM" { - # Create a Wiz-style SBOM with metadata.component.name - local test_sbom="$TEST_TEMP_DIR/wiz_sbom.json" - cat > "$test_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "serialNumber": "urn:uuid:4cda9e9d4700eae5ccb87782970170e4", - "version": 1, - "metadata": { - "timestamp": "2025-08-03T17:54:23Z", - "tools": [ - { - "vendor": "ClickBOM", - "name": "cyclonedx-merge", - "version": "1.0.10" - } - ], - "component": { - "type": "application", - "name": "wiz-merged-sbom", - "version": "1.0.0" - } - } -} -EOF - - # Test the function - run extract_sbom_source_reference "$test_sbom" "fallback.json" - [ "$status" -eq 0 ] - [ "$output" = "wiz-merged-sbom" ] -} - -# Test 84: extract_sbom_source_reference finds bom-ref from Mend SBOM -@test "extract_sbom_source_reference finds bom-ref from Mend SBOM" { - # Create a Mend-style SBOM with metadata.component.bom-ref - local test_sbom="$TEST_TEMP_DIR/mend_sbom.json" - cat > "$test_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.5", - "serialNumber": "urn:uuid:1fb54e69-e58c-49c0-81a7-49cbb81a54ed", - "version": 1, - "metadata": { - "timestamp": "2025-08-03T17:54:34Z", - "tools": { - "components": [{ - "author": "Mend.io", - "name": "CycloneDX report generator", - "version": "1.0.0", - "type": "application" - }] - }, - "authors": [{ - "name": "Organization: ClickHouse" - }, { - "name": "Person: sbom_download (sbom_download@clickhouse.com)" - }], - "component": { - "name": "master-branch", - "type": "application", - "bom-ref": "5ee38db1-6bec-449c-9908-070b77ac10db" - }, - "properties": [{ - "name": "reportName", - "value": "test" - }] - } -} -EOF - - # Test the function - should prefer component.name over bom-ref - run extract_sbom_source_reference "$test_sbom" "fallback.json" - [ "$status" -eq 0 ] - [ "$output" = "master-branch" ] -} - -# Test 85: extract_sbom_source_reference finds bom-ref when component name is missing -@test "extract_sbom_source_reference finds bom-ref when component name is missing" { - # Create a SBOM with only bom-ref - local test_sbom="$TEST_TEMP_DIR/bomref_sbom.json" - cat > "$test_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.5", - "metadata": { - "component": { - "type": "application", - "bom-ref": "5ee38db1-6bec-449c-9908-070b77ac10db" - } - } -} -EOF - - # Test the function - run extract_sbom_source_reference "$test_sbom" "fallback.json" - [ "$status" -eq 0 ] - [ "$output" = "5ee38db1-6bec-449c-9908-070b77ac10db" ] -} - -# Test 86: extract_sbom_source_reference finds top-level name field -@test "extract_sbom_source_reference finds top-level name field" { - # Create a SBOM with top-level name - local test_sbom="$TEST_TEMP_DIR/toplevel_name_sbom.json" - cat > "$test_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "name": "my-project-sbom", - "metadata": { - "timestamp": "2025-08-03T17:52:15Z" - } -} -EOF - - # Test the function - run extract_sbom_source_reference "$test_sbom" "fallback.json" - [ "$status" -eq 0 ] - [ "$output" = "my-project-sbom" ] -} - -# Test 87: extract_sbom_source_reference finds tool name hint -@test "extract_sbom_source_reference finds tool name hint" { - # Create a SBOM with custom tool name - local test_sbom="$TEST_TEMP_DIR/tool_hint_sbom.json" - cat > "$test_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "metadata": { - "timestamp": "2025-08-03T17:52:15Z", - "tools": [ - { - "name": "my-custom-scanner", - "version": "1.2.3" - }, - { - "name": "GitHub.com-Dependency", - "version": "Graph" - } - ] - } -} -EOF - - # Test the function - run extract_sbom_source_reference "$test_sbom" "fallback.json" - [ "$status" -eq 0 ] - [ "$output" = "my-custom-scanner" ] -} - -# Test 88: extract_sbom_source_reference ignores common tool names -@test "extract_sbom_source_reference ignores common tool names" { - # Create a SBOM with only common tool names that should be ignored - local test_sbom="$TEST_TEMP_DIR/common_tools_sbom.json" - cat > "$test_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "metadata": { - "timestamp": "2025-08-03T17:52:15Z", - "tools": [ - { - "name": "GitHub.com-Dependency", - "version": "Graph" - }, - { - "name": "protobom-v1.0.0", - "version": "1.0.0" - }, - { - "name": "CycloneDX", - "version": "1.6" - } - ] - } -} -EOF - - # Test the function - should use fallback since all tools are ignored - run extract_sbom_source_reference "$test_sbom" "my-fallback.json" - [ "$status" -eq 0 ] - [ "$output" = "my-fallback" ] -} - -# Test 89: extract_sbom_source_reference uses fallback filename -@test "extract_sbom_source_reference uses fallback filename" { - # Create a minimal SBOM with no identifying information - local test_sbom="$TEST_TEMP_DIR/minimal_sbom.json" - cat > "$test_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6" -} -EOF - - # Test the function with fallback - run extract_sbom_source_reference "$test_sbom" "my-project.json" - [ "$status" -eq 0 ] - [ "$output" = "my-project" ] -} - -# Test 90: extract_sbom_source_reference uses unknown when no fallback -@test "extract_sbom_source_reference uses unknown when no fallback" { - # Create a minimal SBOM with no identifying information - local test_sbom="$TEST_TEMP_DIR/minimal_sbom.json" - cat > "$test_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6" -} -EOF - - # Test the function without fallback - run extract_sbom_source_reference "$test_sbom" "" - [ "$status" -eq 0 ] - [[ "$output" == *"unknown"* ]] -} - -# Test 91: extract_sbom_source_reference prioritizes strategies correctly -@test "extract_sbom_source_reference prioritizes strategies correctly" { - # Create a SBOM with multiple potential sources to test priority - local test_sbom="$TEST_TEMP_DIR/priority_sbom.json" - cat > "$test_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "name": "top-level-name", - "metadata": { - "timestamp": "2025-08-03T17:52:15Z", - "tools": [ - { - "name": "ClickBOM", - "version": "1.0.10" - } - ], - "component": { - "type": "application", - "name": "component-name", - "bom-ref": "some-bom-ref" - }, - "properties": [ - { - "name": "spdx:document:name", - "value": "spdx-document-name" - } - ] - } -} -EOF - - # Test the function - should prioritize spdx:document:name (Strategy 1) - run extract_sbom_source_reference "$test_sbom" "fallback.json" - [ "$status" -eq 0 ] - [ "$output" = "spdx-document-name" ] -} - -# Test 92: extract_sbom_source_reference handles invalid JSON -@test "extract_sbom_source_reference handles invalid JSON gracefully" { - # Create an invalid JSON file - local test_sbom="$TEST_TEMP_DIR/invalid_sbom.json" - cat > "$test_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6" - # invalid comment -} -EOF - - # Test the function - should use fallback when jq fails - run extract_sbom_source_reference "$test_sbom" "fallback-name.json" - [ "$status" -eq 0 ] - [ "$output" = "fallback-name" ] -} - -# Test 93: extract_sbom_source_reference handles empty values gracefully -@test "extract_sbom_source_reference handles empty values gracefully" { - # Create a SBOM with empty/null values - local test_sbom="$TEST_TEMP_DIR/empty_values_sbom.json" - cat > "$test_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "name": "", - "metadata": { - "component": { - "name": null, - "bom-ref": "" - }, - "properties": [ - { - "name": "spdx:document:name", - "value": "" - } - ] - } -} -EOF - - # Test the function - should use fallback when values are empty/null - run extract_sbom_source_reference "$test_sbom" "fallback.json" - [ "$status" -eq 0 ] - [ "$output" = "fallback" ] -} - -# Test 94: extract_sbom_source_reference handles missing file -@test "extract_sbom_source_reference handles missing file" { - # Test with non-existent file - run extract_sbom_source_reference "/nonexistent/file.json" "missing-fallback.json" - [ "$status" -eq 0 ] - [ "$output" = "missing-fallback" ] -} - -# ============================================================================ -# TESTS FOR collect_components_with_source -# ============================================================================ - -# Test 95: collect_components_with_source adds source to components -@test "collect_components_with_source adds source to components" { - # Create a SBOM with components - local test_sbom="$TEST_TEMP_DIR/components_sbom.json" - cat > "$test_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "components": [ - { - "name": "lodash", - "version": "4.17.21", - "type": "library", - "licenses": [ - { - "license": { - "id": "MIT" - } - } - ] - }, - { - "name": "express", - "version": "4.18.2", - "type": "library", - "licenses": [ - { - "license": { - "id": "MIT" - } - } - ] - } - ] -} -EOF - - local output_file="$TEST_TEMP_DIR/output_components.json" - - # Test the function - run collect_components_with_source "$test_sbom" "test-source-ref" "$output_file" - - [ "$status" -eq 0 ] - [ -f "$output_file" ] - - # With compact JSON output (-c flag), each component should be on one line - local component_count - component_count=$(wc -l < "$output_file") - [ "$component_count" -eq 2 ] - - # Check that both components have the source field - # Each line is a separate JSON object, so we need to check each line - local lodash_found=false - local express_found=false - - while IFS= read -r line; do - if [[ -n "$line" ]]; then - local name - name=$(echo "$line" | jq -r '.name' 2>/dev/null || echo "") - local source - source=$(echo "$line" | jq -r '.source' 2>/dev/null || echo "") - - if [[ "$name" == "lodash" && "$source" == "test-source-ref" ]]; then - lodash_found=true - # Verify original fields are preserved - local version - version=$(echo "$line" | jq -r '.version' 2>/dev/null || echo "") - [ "$version" = "4.17.21" ] - elif [[ "$name" == "express" && "$source" == "test-source-ref" ]]; then - express_found=true - fi - fi - done < "$output_file" - - [ "$lodash_found" = true ] - [ "$express_found" = true ] -} - -# Test 96: collect_components_with_source handles SBOM with no components -@test "collect_components_with_source handles SBOM with no components" { - # Create a SBOM with no components array - local test_sbom="$TEST_TEMP_DIR/no_components_sbom.json" - cat > "$test_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "metadata": { - "component": { - "name": "test-project", - "version": "1.0.0" - } - } -} -EOF - - local output_file="$TEST_TEMP_DIR/empty_output.json" - - # Test the function - run collect_components_with_source "$test_sbom" "test-source" "$output_file" - [ "$status" -eq 0 ] # Should fail but not crash - [ -f "$output_file" ] # Should create empty file -} - -# Test 97: collect_components_with_source handles empty components array -@test "collect_components_with_source handles empty components array" { - # Create a SBOM with empty components array - local test_sbom="$TEST_TEMP_DIR/empty_components_sbom.json" - cat > "$test_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "components": [] -} -EOF - - local output_file="$TEST_TEMP_DIR/empty_components_output.json" - - # Test the function - run collect_components_with_source "$test_sbom" "test-source" "$output_file" - [ "$status" -eq 0 ] - [ -f "$output_file" ] - - # Output file should be empty (no components to process) - [ ! -s "$output_file" ] # File should be empty -} - -# Test 98: collect_components_with_source handles components with existing source field -@test "collect_components_with_source overwrites existing source field" { - # Create a SBOM with components that already have source fields - local test_sbom="$TEST_TEMP_DIR/existing_source_sbom.json" - cat > "$test_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "components": [ - { - "name": "lodash", - "version": "4.17.21", - "source": "old-source", - "type": "library" - } - ] -} -EOF - - local output_file="$TEST_TEMP_DIR/overwrite_source_output.json" - - # Test the function - run collect_components_with_source "$test_sbom" "new-source-ref" "$output_file" - [ "$status" -eq 0 ] - [ -f "$output_file" ] - - # Check that the source field was overwritten - local component_source=$(jq -r '.source' "$output_file") - [ "$component_source" = "new-source-ref" ] -} - -# Test 99: collect_components_with_source handles components with complex structure -@test "collect_components_with_source preserves complex component structure" { - # Create a SBOM with complex components - local test_sbom="$TEST_TEMP_DIR/complex_components_sbom.json" - cat > "$test_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "components": [ - { - "name": "@types/node", - "version": "18.15.0", - "type": "library", - "scope": "optional", - "purl": "pkg:npm/%40types/node@18.15.0", - "licenses": [ - { - "license": { - "id": "MIT" - } - } - ], - "properties": [ - { - "name": "cdx:npm:package:path", - "value": "node_modules/@types/node" - } - ], - "externalReferences": [ - { - "type": "website", - "url": "https://github.com/DefinitelyTyped/DefinitelyTyped.git" - } - ] - } - ] -} -EOF - - local output_file="$TEST_TEMP_DIR/complex_output.json" - - # Test the function - run collect_components_with_source "$test_sbom" "complex-source" "$output_file" - [ "$status" -eq 0 ] - [ -f "$output_file" ] - - # Verify all original fields are preserved - local component_name=$(jq -r '.name' "$output_file") - [ "$component_name" = "@types/node" ] - - local component_purl=$(jq -r '.purl' "$output_file") - [ "$component_purl" = "pkg:npm/%40types/node@18.15.0" ] - - local component_properties_count=$(jq '.properties | length' "$output_file") - [ "$component_properties_count" -eq 1 ] - - local component_refs_count=$(jq '.externalReferences | length' "$output_file") - [ "$component_refs_count" -eq 1 ] - - # Verify source was added - local component_source=$(jq -r '.source' "$output_file") - [ "$component_source" = "complex-source" ] -} - -# Test 100: collect_components_with_source handles invalid JSON -@test "collect_components_with_source handles invalid JSON gracefully" { - # Create an invalid JSON file - local test_sbom="$TEST_TEMP_DIR/invalid_json.json" - cat > "$test_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "components": [ - { - "name": "test" - # missing comma - "version": "1.0.0" - } - ] -} -EOF - - local output_file="$TEST_TEMP_DIR/invalid_json_output.json" - - # Test the function - run collect_components_with_source "$test_sbom" "source-ref" "$output_file" - [ "$status" -eq 1 ] - [ -f "$output_file" ] # Should create empty file -} - -# Test 101: collect_components_with_source handles missing input file -@test "collect_components_with_source handles missing input file" { - local output_file="$TEST_TEMP_DIR/missing_input_output.json" - - # Test with non-existent input file - run collect_components_with_source "/nonexistent/file.json" "source-ref" "$output_file" - [ "$status" -eq 1 ] - [ -f "$output_file" ] # Should create empty file -} - -# Test 102: collect_components_with_source handles special characters in source -@test "collect_components_with_source handles special characters in source" { - # Create a SBOM with components - local test_sbom="$TEST_TEMP_DIR/special_chars_sbom.json" - cat > "$test_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "components": [ - { - "name": "test-component", - "version": "1.0.0", - "type": "library" - } - ] -} -EOF - - local output_file="$TEST_TEMP_DIR/special_chars_output.json" - local special_source="com.github.ClickHouse/clickhouse-js@main:v2.0.0" - - # Test the function with special characters in source - run collect_components_with_source "$test_sbom" "$special_source" "$output_file" - [ "$status" -eq 0 ] - [ -f "$output_file" ] - - # Check that special characters are preserved - local component_source=$(jq -r '.source' "$output_file") - [ "$component_source" = "$special_source" ] -} - -# Test 103: collect_components_with_source produces valid JSON output -@test "collect_components_with_source produces valid JSON output" { - # Create a SBOM with multiple components - local test_sbom="$TEST_TEMP_DIR/multi_components_sbom.json" - cat > "$test_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "components": [ - { - "name": "component1", - "version": "1.0.0" - }, - { - "name": "component2", - "version": "2.0.0" - }, - { - "name": "component3", - "version": "3.0.0" - } - ] -} -EOF - - local output_file="$TEST_TEMP_DIR/valid_json_output.json" - - # Test the function - run collect_components_with_source "$test_sbom" "multi-source" "$output_file" - [ "$status" -eq 0 ] - [ -f "$output_file" ] - - # With compact JSON output, each component should be on one line - local line_count - line_count=$(wc -l < "$output_file") - [ "$line_count" -eq 3 ] - - # Verify each line is valid JSON and has the source field - local components_with_source=0 - local valid_json_lines=0 - - while IFS= read -r line; do - if [[ -n "$line" ]]; then - # Verify it's valid JSON (each line should be a JSON object) - local line_type - line_type=$(echo "$line" | jq -r 'type' 2>/dev/null || echo "invalid") - if [[ "$line_type" == "object" ]]; then - valid_json_lines=$((valid_json_lines + 1)) - - # Check if it has source field - local has_source - has_source=$(echo "$line" | jq 'has("source")' 2>/dev/null || echo "false") - if [[ "$has_source" == "true" ]]; then - components_with_source=$((components_with_source + 1)) - fi - fi - fi - done < "$output_file" - - # All lines should be valid JSON objects - [ "$valid_json_lines" -eq 3 ] - # All components should have the source field - [ "$components_with_source" -eq 3 ] -} - -# Test 104: collect_components_with_source handles unicode in source reference -@test "collect_components_with_source handles unicode in source reference" { - # Create a simple SBOM - local test_sbom="$TEST_TEMP_DIR/unicode_source_sbom.json" - cat > "$test_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "components": [ - { - "name": "test-component", - "version": "1.0.0" - } - ] -} -EOF - - local output_file="$TEST_TEMP_DIR/unicode_source_output.json" - local unicode_source="ๆต‹่ฏ•้กน็›ฎ/test-project" - - # Test the function with unicode in source - run collect_components_with_source "$test_sbom" "$unicode_source" "$output_file" - [ "$status" -eq 0 ] - [ -f "$output_file" ] - - # Check that unicode is preserved (this may depend on locale settings) - local component_source=$(jq -r '.source' "$output_file") - [ "$component_source" = "$unicode_source" ] -} - -# Test 105: Integration test - extract source and collect components -@test "integration test - extract source and collect components work together" { - # Create a realistic GitHub SBOM - local test_sbom="$TEST_TEMP_DIR/integration_sbom.json" - cat > "$test_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "metadata": { - "properties": [ - { - "name": "spdx:document:name", - "value": "com.github.ClickHouse/clickhouse-js" - } - ] - }, - "components": [ - { - "name": "lodash", - "version": "4.17.21", - "type": "library" - }, - { - "name": "express", - "version": "4.18.2", - "type": "library" - } - ] -} -EOF - - # First extract the source reference - local extracted_source - extracted_source=$(extract_sbom_source_reference "$test_sbom" "fallback.json") - - [ "$extracted_source" = "com.github.ClickHouse/clickhouse-js" ] - - # Then collect components with that source - local output_file="$TEST_TEMP_DIR/integration_output.json" - run collect_components_with_source "$test_sbom" "$extracted_source" "$output_file" - [ "$status" -eq 0 ] - [ -f "$output_file" ] - - # Verify both components have the extracted source - local lodash_source=$(jq -r '. | select(.name == "lodash") | .source' "$output_file") - [ "$lodash_source" = "com.github.ClickHouse/clickhouse-js" ] - - local express_source=$(jq -r '. | select(.name == "express") | .source' "$output_file") - [ "$express_source" = "com.github.ClickHouse/clickhouse-js" ] -} diff --git a/test/simple.bats b/test/simple.bats deleted file mode 100644 index e3c28d4..0000000 --- a/test/simple.bats +++ /dev/null @@ -1,832 +0,0 @@ -#!/usr/bin/env bats - -# test/simple.bats -# Simple BATS tests for entrypoint.sh - -# Setup function runs before each test -setup() { - # Load the script to test (source it to access functions) - # We'll source only the functions, not execute main - export BATS_TEST_DIRNAME="$(cd "$(dirname "$BATS_TEST_FILENAME")" && pwd)" - export PROJECT_ROOT="$(dirname "$BATS_TEST_DIRNAME")" - - # Create a temporary test script that sources functions without executing main - export TEST_SCRIPT="$BATS_TEST_TMPDIR/test_entrypoint.sh" - - # Extract only the functions from entrypoint.sh (everything before main function call) - sed '/^# Run main function/,$d' "$PROJECT_ROOT/entrypoint.sh" > "$TEST_SCRIPT" - - # Replace the source line in the extracted script - sed -i "s|source \"\$SCRIPT_DIR/lib/sanitize.sh\"|source \"$PROJECT_ROOT/lib/sanitize.sh\"|" "$TEST_SCRIPT" - sed -i "s|source \"\$SCRIPT_DIR/lib/common.sh\"|source \"$PROJECT_ROOT/lib/common.sh\"|" "$TEST_SCRIPT" - sed -i "s|source \"\$SCRIPT_DIR/lib/validation.sh\"|source \"$PROJECT_ROOT/lib/validation.sh\"|" "$TEST_SCRIPT" - sed -i "s|source \"\$SCRIPT_DIR/lib/github.sh\"|source \"$PROJECT_ROOT/lib/github.sh\"|" "$TEST_SCRIPT" - sed -i "s|source \"\$SCRIPT_DIR/lib/mend.sh\"|source \"$PROJECT_ROOT/lib/mend.sh\"|" "$TEST_SCRIPT" - sed -i "s|source \"\$SCRIPT_DIR/lib/wiz.sh\"|source \"$PROJECT_ROOT/lib/wiz.sh\"|" "$TEST_SCRIPT" - sed -i "s|source \"\$SCRIPT_DIR/lib/sbom-processing.sh\"|source \"$PROJECT_ROOT/lib/sbom-processing.sh\"|" "$TEST_SCRIPT" - sed -i "s|source \"\$SCRIPT_DIR/lib/sbom-merging.sh\"|source \"$PROJECT_ROOT/lib/sbom-merging.sh\"|" "$TEST_SCRIPT" - - # Source the functions - source "$TEST_SCRIPT" - - # Set up test environment variables - export AWS_ACCESS_KEY_ID="test-key" - export AWS_SECRET_ACCESS_KEY="test-secret" - export S3_BUCKET="test-bucket" - export REPOSITORY="test-owner/test-repo" - export GITHUB_TOKEN="test-token" -} - -# Teardown function runs after each test -teardown() { - # Clean up any test files or variables if needed - unset AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY S3_BUCKET REPOSITORY GITHUB_TOKEN -} - -# Test 1: Basic test to verify BATS is working -@test "basic test - addition" { - result="$((2 + 2))" - [ "$result" -eq 4 ] -} - -# Test 2: Basic logging functions work -@test "logging functions produce colored output" { - # Test that log_info produces expected output format - run log_info "test message" - - # Check that the command succeeded (exit code 0) - [ "$status" -eq 0 ] - - # Check that output contains the expected format - [[ "$output" == *"[INFO]"* ]] - [[ "$output" == *"test message"* ]] -} - -# Test 3: log_error produces error message -@test "log_error produces error message" { - run log_error "error message" - - [ "$status" -eq 0 ] - [[ "$output" == *"[ERROR]"* ]] - [[ "$output" == *"error message"* ]] -} - -# Test 4: Environment validation -@test "validate_env succeeds with all required variables" { - # All required variables are already set in setup() - run validate_env - - [ "$status" -eq 0 ] -} - -# Test 5: validate_env fails when AWS_SECRET_ACCESS_KEY is missing -@test "validate_env fails when AWS_ACCESS_KEY_ID is missing" { - unset AWS_ACCESS_KEY_ID - - run validate_env - - # Should exit with code 1 - [ "$status" -eq 1 ] - [[ "$output" == *"Required environment variable AWS_ACCESS_KEY_ID is not set"* ]] -} - -# Test 6: validate_env fails when AWS_SECRET_ACCESS_KEY is missing -@test "validate_env fails when S3_BUCKET is missing" { - unset S3_BUCKET - - run validate_env - - [ "$status" -eq 1 ] - [[ "$output" == *"Required environment variable S3_BUCKET is not set"* ]] -} - -# Test 7: CycloneDXSBOM format detection -@test "detect_sbom_format identifies CycloneDX format" { - # Create a temporary CycloneDX SBOM file - local test_sbom="$BATS_TEST_TMPDIR/cyclonedx_test.json" - cat > "$test_sbom" << 'EOF' -{ - "bomFormat": "CycloneDX", - "specVersion": "1.6", - "metadata": { - "component": { - "name": "test" - } - } -} -EOF - run detect_sbom_format "$test_sbom" - - [ "$status" -eq 0 ] - [ "$output" = "cyclonedx" ] -} - -# Test 8: SPDX format detection -@test "detect_sbom_format identifies SPDX format" { - # Create a temporary SPDX SBOM file - local test_sbom="$BATS_TEST_TMPDIR/spdx_test.json" - cat > "$test_sbom" << 'EOF' -{ - "spdxVersion": "SPDX-2.2", - "SPDXID": "SPDXRef-DOCUMENT" -} -EOF - - run detect_sbom_format "$test_sbom" - - [ "$status" -eq 0 ] - [ "$output" = "spdxjson" ] -} - -# Test 9: Mend environment validation -@test "validate_mend_env succeeds with all required Mend variables when SBOM_SOURCE is mend" { - export SBOM_SOURCE="mend" - export MEND_EMAIL="test@example.com" - export MEND_ORG_UUID="test-org-uuid" - export MEND_USER_KEY="test-user-key" - export MEND_BASE_URL="https://saas.mend.io" - export MEND_PROJECT_UUID="test-project-uuid" - - run validate_mend_env - - [ "$status" -eq 0 ] - [[ "$output" == *"Mend environment validated"* ]] -} - -# Test 10: validate_mend_env fails when MEND_EMAIL is missing and SBOM_SOURCE is mend -@test "validate_mend_env is skipped when SBOM_SOURCE is not mend" { - export SBOM_SOURCE="github" - - run validate_mend_env - - [ "$status" -eq 0 ] - # Should not contain Mend validation messages since source is github -} - -# Test 11: validate_mend_env fails when MEND_EMAIL is missing -@test "validate_mend_env fails when MEND_EMAIL is missing" { - export SBOM_SOURCE="mend" - export MEND_ORG_UUID="test-org-uuid" - export MEND_USER_KEY="test-user-key" - export MEND_BASE_URL="https://saas.mend.io" - export MEND_PROJECT_UUID="test-project-uuid" - # MEND_EMAIL is intentionally not set - - run validate_mend_env - - [ "$status" -eq 1 ] - [[ "$output" == *"Required Mend environment variable MEND_EMAIL is not set"* ]] -} - -# Test 12: matches_pattern function with exact filename -@test "matches_pattern works with exact filename" { - run matches_pattern "test.json" "test.json" - [ "$status" -eq 0 ] - - run matches_pattern "test.json" "other.json" - [ "$status" -eq 1 ] -} - -# Test 13: matches_pattern function with wildcard patterns -@test "matches_pattern works with wildcard patterns" { - run matches_pattern "test-prod.json" "*-prod.json" - [ "$status" -eq 0 ] - - run matches_pattern "production-test.json" "production-*.json" - [ "$status" -eq 0 ] - - run matches_pattern "test-dev.json" "*-prod.json" - [ "$status" -eq 1 ] -} - -# Test 14: matches_pattern function with multiple patterns -@test "matches_pattern works with multiple comma-separated patterns" { - run matches_pattern "test-prod.json" "test.json,*-prod.json,other.json" - [ "$status" -eq 0 ] - - run matches_pattern "production-test.json" "test.json,production-*.json,other.json" - [ "$status" -eq 0 ] - - run matches_pattern "random.json" "test.json,*-prod.json,other.json" - [ "$status" -eq 1 ] -} - -# Test 15: matches_pattern function with empty patterns -@test "matches_pattern returns false for empty patterns" { - run matches_pattern "test.json" "" - [ "$status" -eq 1 ] -} - -# Test 16: filter_files function with include only -@test "filter_files works with include patterns only" { - local test_files="test-prod.json"$'\n'"test-dev.json"$'\n'"production-main.json" - - export INCLUDE="*-prod.json,production-*.json" - export EXCLUDE="" - - local result=$(filter_files "$test_files") - - # Should include test-prod.json and production-main.json - [[ "$result" =~ test-prod.json ]] - [[ "$result" =~ production-main.json ]] - [[ ! "$result" =~ test-dev.json ]] -} - -# Test 17: filter_files function with exclude only -@test "filter_files works with exclude patterns only" { - local test_files="test-prod.json"$'\n'"test-dev.json"$'\n'"production-main.json" - - export INCLUDE="" - export EXCLUDE="*-dev.json" - - local result=$(filter_files "$test_files") - - # Should exclude test-dev.json but include others - [[ "$result" =~ test-prod.json ]] - [[ "$result" =~ production-main.json ]] - [[ ! "$result" =~ test-dev.json ]] -} - -# Test 18: filter_files function with both include and exclude -@test "filter_files works with both include and exclude patterns" { - local test_files="test-prod.json"$'\n'"test-dev.json"$'\n'"production-main.json"$'\n'"production-test.json" - - export INCLUDE="*-prod.json,production-*.json" - export EXCLUDE="*-test.json" - - local result=$(filter_files "$test_files") - - # Should include test-prod.json and production-main.json - # Should exclude test-dev.json (not in include) and production-test.json (in exclude) - [[ "$result" =~ test-prod.json ]] - [[ "$result" =~ production-main.json ]] - [[ ! "$result" =~ test-dev.json ]] - [[ ! "$result" =~ production-test.json ]] -} - -# Test 19: filter_files function with no patterns (should return all files) -@test "filter_files returns all files when no patterns specified" { - local test_files="test-prod.json"$'\n'"test-dev.json"$'\n'"production-main.json" - - export INCLUDE="" - export EXCLUDE="" - - local result=$(filter_files "$test_files") - - # Should include all files - [[ "$result" =~ test-prod.json ]] - [[ "$result" =~ test-dev.json ]] - [[ "$result" =~ production-main.json ]] -} - -# Test 20: filter_files function with empty file list -@test "filter_files handles empty file list" { - local test_files="" - - export INCLUDE="*.json" - export EXCLUDE="" - - local result=$(filter_files "$test_files") - - # Should return empty result - [[ -z "$result" ]] -} - -# Test 21: filter_files function with whitespace in patterns -@test "filter_files handles whitespace in patterns correctly" { - local test_files="test-prod.json"$'\n'"test-dev.json" - - export INCLUDE=" *-prod.json , production-*.json " - export EXCLUDE="" - - local result=$(filter_files "$test_files") - - # Should include test-prod.json (whitespace should be trimmed) - [[ "$result" =~ test-prod.json ]] - [[ ! "$result" =~ test-dev.json ]] -} - -# Test 22: sanitize_string removes dangerous characters -@test "sanitize_string removes dangerous characters" { - run sanitize_string "test\$command\`echo hello\`" - [ "$status" -eq 0 ] - [[ "$output" == "testcommandecho hello" ]] -} - -# Test 23: sanitize_string removes null bytes and control characters -@test "sanitize_string removes control characters" { - # Test string with null byte, control characters - local test_string=$(printf "test\000string\001\002\003") - run sanitize_string "$test_string" - [ "$status" -eq 0 ] - [[ "$output" == "teststring" ]] -} - -# Test 24: sanitize_string limits length -@test "sanitize_string respects length limit" { - local long_string=$(printf 'a%.0s' {1..2000}) - run sanitize_string "$long_string" 100 - [ "$status" -eq 0 ] - [ "${#output}" -eq 100 ] -} - -# Test 25: sanitize_string removes shell metacharacters -@test "sanitize_string removes shell metacharacters" { - run sanitize_string "test|command;rm -rf /&" - [ "$status" -eq 0 ] - [[ "$output" == "testcommandrm -rf /" ]] -} - -# Test 26: sanitize_string preserves safe characters -@test "sanitize_string preserves safe characters" { - run sanitize_string "test-string_with.safe@characters123" - [ "$status" -eq 0 ] - [[ "$output" == "test-string_with.safecharacters123" ]] -} - -# Test 27: sanitize_repository valid input -@test "sanitize_repository accepts valid repository format" { - run sanitize_repository "owner/repo" - [ "$status" -eq 0 ] - [[ "$output" == "owner/repo" ]] -} - -# Test 28: sanitize_repository accepts repository with hyphens and underscores -@test "sanitize_repository accepts repository with hyphens and underscores" { - run sanitize_repository "my-org/my_repo-name" - [ "$status" -eq 0 ] - [[ "$output" == "my-org/my_repo-name" ]] -} - -# Test 29: sanitize_repository accepts repository with dots -@test "sanitize_repository accepts repository with dots" { - run sanitize_repository "my.org/repo.name" - [ "$status" -eq 0 ] - [[ "$output" == "my.org/repo.name" ]] -} - -# Test 30: sanitize_repository removes dangerous characters -@test "sanitize_repository removes dangerous characters" { - run sanitize_repository "owner\$bad/repo;rm" - [ "$status" -eq 0 ] - [[ "$output" == "ownerbad/reporm" ]] -} - -# Test 31: sanitize_repository rejects invalid format - special characters -@test "sanitize_repository rejects invalid format - no slash" { - run sanitize_repository "invalidrepo" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid repository format"* ]] -} - -# Test 32: sanitize_repository rejects invalid format - multiple slashes -@test "sanitize_repository rejects invalid format - multiple slashes" { - run sanitize_repository "owner/repo/extra" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid repository format"* ]] -} - -# Test 33: sanitize_repository rejects invalid format - empty owner or repo -@test "sanitize_repository rejects empty owner or repo" { - run sanitize_repository "/repo" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid repository format"* ]] - - run sanitize_repository "owner/" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid repository format"* ]] -} - -# Test 34: sanitize_url accepts valid HTTP URL -@test "sanitize_url accepts valid HTTP URL" { - run sanitize_url "http://example.com" - [ "$status" -eq 0 ] - [[ "$output" == "http://example.com" ]] -} - -# Test 35: sanitize_url accepts valid HTTPS URL -@test "sanitize_url accepts valid HTTPS URL" { - run sanitize_url "https://api.example.com:8080" - [ "$status" -eq 0 ] - [[ "$output" == "https://api.example.com:8080" ]] -} - -# Test 36: sanitize_url accepts valid ClickHouse URL -@test "sanitize_url accepts ClickHouse URL format" { - run sanitize_url "https://clickhouse.example.com:8443" "clickhouse" - [ "$status" -eq 0 ] - [[ "$output" == "https://clickhouse.example.com:8443" ]] -} - -# Test 37: sanitize_url enforces HTTPS for Mend URLs -@test "sanitize_url enforces HTTPS for Mend URLs" { - run sanitize_url "https://api.mend.io/path" "mend" - [ "$status" -eq 0 ] - [[ "$output" == "https://api.mend.io/path" ]] -} - -# Test 38: sanitize_url rejects non-HTTPS for Mend URLs -@test "sanitize_url rejects HTTP for Mend URLs" { - run sanitize_url "http://api.mend.io" "mend" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid Mend URL format"* ]] -} - -# Test 39: sanitize_url enforces HTTPS for Wiz URLs -@test "sanitize_url enforces HTTPS for Wiz URLs" { - run sanitize_url "https://api.wiz.io/graphql" "wiz" - [ "$status" -eq 0 ] - [[ "$output" == "https://api.wiz.io/graphql" ]] -} - -# Test 40: sanitize_url rejects invalid URL format -@test "sanitize_url rejects invalid URL format" { - run sanitize_url "not-a-url" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid URL format"* ]] -} - -# Test 41: sanitize_url rejects FTP URLs -@test "sanitize_url rejects FTP URLs" { - run sanitize_url "ftp://example.com" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid URL format"* ]] -} - -# Test 42: sanitize_url removes control characters -@test "sanitize_url removes control characters" { - local test_url=$(printf "https://example.com\001\002") - run sanitize_url "$test_url" - [ "$status" -eq 0 ] - [[ "$output" == "https://example.com" ]] -} - -# Test 43: sanitize_s3_bucket accepts valid bucket name -@test "sanitize_s3_bucket accepts valid bucket name" { - run sanitize_s3_bucket "my-test-bucket" - [ "$status" -eq 0 ] - [[ "$output" == "my-test-bucket" ]] -} - -# Test 44: sanitize_s3_bucket converts bucket name to lowercase -@test "sanitize_s3_bucket converts to lowercase" { - run sanitize_s3_bucket "My-Test-Bucket" - [ "$status" -eq 0 ] - [[ "$output" == "my-test-bucket" ]] -} - -# Test 45: sanitize_s3_bucket accepts bucket with dots -@test "sanitize_s3_bucket accepts bucket with dots" { - run sanitize_s3_bucket "my.test.bucket" - [ "$status" -eq 0 ] - [[ "$output" == "my.test.bucket" ]] -} - -# Test 46: sanitize_s3_bucket removes invalid characters -@test "sanitize_s3_bucket removes invalid characters" { - run sanitize_s3_bucket "my_test@bucket!" - [ "$status" -eq 0 ] - [[ "$output" == "mytestbucket" ]] -} - -# Test 47: sanitize_s3_bucket rejects short bucket name -@test "sanitize_s3_bucket rejects too short name" { - run sanitize_s3_bucket "ab" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid S3 bucket name"* ]] -} - -# Test 48: sanitize_s3_bucket rejects long bucket name -@test "sanitize_s3_bucket rejects too long name" { - local long_name=$(printf 'a%.0s' {1..70}) - run sanitize_s3_bucket "$long_name" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid S3 bucket name"* ]] -} - -# Test 49: sanitize_s3_bucket rejects IP-like format -@test "sanitize_s3_bucket rejects IP-like format" { - run sanitize_s3_bucket "192.168.1.1" - [ "$status" -eq 1 ] - [[ "$output" == *"cannot be formatted as IP address"* ]] -} - -# Test 50: sanitize_s3_bucket rejects bucket starting with dash -@test "sanitize_s3_bucket rejects bucket starting with dash" { - run sanitize_s3_bucket "-invalid-bucket" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid S3 bucket name"* ]] -} - -# Test 51: sanitize_s3_bucket rejects bucket ending with dash -@test "sanitize_s3_bucket rejects bucket ending with dash" { - run sanitize_s3_bucket "invalid-bucket-" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid S3 bucket name"* ]] -} - -# Test 52: sanitize_s3_key accepts valid S3 key -@test "sanitize_s3_key accepts valid key" { - run sanitize_s3_key "path/to/file.json" - [ "$status" -eq 0 ] - [[ "$output" == "path/to/file.json" ]] -} - -# Test 53: sanitize_s3_key removes dangerous characters -@test "sanitize_s3_key removes dangerous characters" { - run sanitize_s3_key "path/to/file\$bad.json" - [ "$status" -eq 0 ] - [[ "$output" == "path/to/filebad.json" ]] -} - -# Test 54: sanitize_s3_key prevents path traversal -@test "sanitize_s3_key prevents path traversal" { - run sanitize_s3_key "../../../etc/passwd" - [ "$status" -eq 0 ] - [[ "$output" == "etc/passwd" ]] -} - -# Test 55: sanitize_s3_key removes multiple slashes -@test "sanitize_s3_key removes multiple slashes" { - run sanitize_s3_key "path//to///file.json" - [ "$status" -eq 0 ] - [[ "$output" == "path/to/file.json" ]] -} - -# Test 56: sanitize_s3_key removes leading slash -@test "sanitize_s3_key removes leading slash" { - run sanitize_s3_key "/path/to/file.json" - [ "$status" -eq 0 ] - [[ "$output" == "path/to/file.json" ]] -} - -# Test 57: sanitize_s3_key removes trailing slash -@test "sanitize_s3_key removes trailing slash" { - run sanitize_s3_key "path/to/file.json/" - [ "$status" -eq 0 ] - [[ "$output" == "path/to/file.json" ]] -} - -# Test 58: sanitize_s3_key rejects empty key -@test "sanitize_s3_key rejects empty key" { - run sanitize_s3_key "" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid S3 key"* ]] -} - -# Test 59: sanitize_s3_key rejects key with only invalid characters -@test "sanitize_s3_key rejects key with only invalid characters" { - run sanitize_s3_key "\$%^&*()" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid S3 key"* ]] -} - -# Test 60: sanitize_uuid accepts valid UUID -@test "sanitize_uuid accepts valid UUID" { - run sanitize_uuid "123e4567-e89b-12d3-a456-426614174000" "TEST_UUID" - [ "$status" -eq 0 ] - [[ "$output" == "123e4567-e89b-12d3-a456-426614174000" ]] -} - -# Test 61: sanitize_uuid accepts UUID without hyphens -@test "sanitize_uuid accepts UUID without hyphens" { - run sanitize_uuid "123e4567e89b12d3a456426614174000" "TEST_UUID" - [ "$status" -eq 0 ] - [[ "$output" == "123e4567e89b12d3a456426614174000" ]] -} - -# Test 62: sanitize_uuid accepts UUID with uppercase letters -@test "sanitize_uuid accepts uppercase UUID" { - run sanitize_uuid "123E4567-E89B-12D3-A456-426614174000" "TEST_UUID" - [ "$status" -eq 0 ] - [[ "$output" == "123E4567-E89B-12D3-A456-426614174000" ]] -} - -# Test 63: sanitize_uuid removes invalid characters -@test "sanitize_uuid removes invalid characters" { - run sanitize_uuid "123e4567-e89b-12d3-a456-426614174000!@#" "TEST_UUID" - [ "$status" -eq 0 ] - [[ "$output" == "123e4567-e89b-12d3-a456-426614174000" ]] -} - -# Test 64: sanitize_uuid rejects too short UUID -@test "sanitize_uuid rejects too short UUID" { - run sanitize_uuid "123" "TEST_UUID" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid UUID format for TEST_UUID"* ]] -} - -# Test 65: sanitize_uuid rejects non-hex characters -@test "sanitize_uuid rejects non-hex characters" { - run sanitize_uuid "123g45678-e89b-12d3-a456-426614174000" "TEST_UUID" - [ "$status" -eq 0 ] - [[ "$output" == "12345678-e89b-12d3-a456-426614174000" ]] -} - -# Test 66: sanitize_email accepts valid email -@test "sanitize_email accepts valid email" { - run sanitize_email "user@example.com" - [ "$status" -eq 0 ] - [[ "$output" == "user@example.com" ]] -} - -# Test 67: sanitize_email accepts with dots and hyphens -@test "sanitize_email accepts email with dots and hyphens" { - run sanitize_email "user.name-test@example-domain.com" - [ "$status" -eq 0 ] - [[ "$output" == "user.name-test@example-domain.com" ]] -} - -# Test 68: sanitize_email removes dangerous characters -@test "sanitize_email removes dangerous characters" { - run sanitize_email "user\$bad@example.com" - [ "$status" -eq 0 ] - [[ "$output" == "userbad@example.com" ]] -} - -# Test 69: sanitize_email rejects invalid format - no @ sign -@test "sanitize_email rejects invalid format - no @" { - run sanitize_email "invalid-email" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid email format"* ]] -} - -# Test 70: sanitize_email rejects invalid format - multiple @ signs -@test "sanitize_email rejects invalid format - multiple @" { - run sanitize_email "user@@example.com" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid email format"* ]] -} - -# Test 71: sanitize_email rejects invalid format - no domain -@test "sanitize_email rejects invalid format - no domain" { - run sanitize_email "user@" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid email format"* ]] -} - -# Test 72: sanitize_email rejects invalid format - no TLD -@test "sanitize_email rejects invalid format - no TLD" { - run sanitize_email "user@domain" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid email format"* ]] -} - -# Test 73: sanitize_email accepts valid name -@test "sanitize_database_name accepts valid name" { - run sanitize_database_name "test_database" - [ "$status" -eq 0 ] - [[ "$output" == "test_database" ]] -} - -# Test 73a: sanitize_email removes newlines from input -@test "sanitize_email removes newlines from input" { - run sanitize_email "user@example.com\n" # Just a trailing newline - [ "$status" -eq 0 ] - [[ "$output" == "user@example.com" ]] - [[ "$output" != *$'\n'* ]] -} - -# Test 74: sanitize_database_name accepts name with underscores -@test "sanitize_database_name accepts name starting with underscore" { - run sanitize_database_name "_test_database" - [ "$status" -eq 0 ] - [[ "$output" == "_test_database" ]] -} - -# Test 75: sanitize_database_name accepts name with numbers -@test "sanitize_database_name accepts name with numbers" { - run sanitize_database_name "test_database_123" - [ "$status" -eq 0 ] - [[ "$output" == "test_database_123" ]] -} - -# Test 76: sanitize_database_name removes dangerous characters -@test "sanitize_database_name removes dangerous characters" { - run sanitize_database_name "test-database.name" - [ "$status" -eq 0 ] - [[ "$output" == "testdatabasename" ]] -} - -# Test 77: sanitize_database_name rejects name with starting with number -@test "sanitize_database_name rejects name starting with number" { - run sanitize_database_name "1test_database" - [ "$status" -eq 0 ] - [[ "$output" == "_1test_database" ]] -} - -# Test 78: sanitize_database_name rejects name with spaces -@test "sanitize_database_name rejects name with spaces" { - run sanitize_database_name "test database" - [ "$status" -eq 0 ] - [[ "$output" == "testdatabase" ]] -} - -# Test 79: sanitize_patterns accepts valid patterns -@test "sanitize_patterns accepts valid patterns" { - run sanitize_patterns "*.json,test*.txt,file.log" - [ "$status" -eq 0 ] - [[ "$output" == "*.json,test*.txt,file.log" ]] -} - -# Test 80: sanitize_patterns trims whitespace -@test "sanitize_patterns trims whitespace" { - run sanitize_patterns " *.json , test*.txt , file.log " - [ "$status" -eq 0 ] - [[ "$output" == "*.json,test*.txt,file.log" ]] -} - -# Test 81: sanitize_patterns removes dangerous characters -@test "sanitize_patterns removes dangerous characters" { - run sanitize_patterns "*.json,test\$bad.txt" - [ "$status" -eq 0 ] - [[ "$output" == "*.json,testbad.txt" ]] -} - -# Test 82: sanitize_patterns preserves valid wildcards -@test "sanitize_patterns preserves wildcards" { - run sanitize_patterns "*-prod.json,production-*.json" - [ "$status" -eq 0 ] - [[ "$output" == "*-prod.json,production-*.json" ]] -} - -# Test 83: sanitize_patterns handles empty input -@test "sanitize_patterns handles empty input" { - run sanitize_patterns "" - [ "$status" -eq 0 ] - [[ "$output" == "" ]] -} - -# Test 84: sanitize_patterns removes empty patterns -@test "sanitize_patterns removes empty patterns" { - run sanitize_patterns "*.json,,test*.txt" - [ "$status" -eq 0 ] - [[ "$output" == "*.json,test*.txt" ]] -} - -# Test 85: sanitize_patterns handles single pattern -@test "sanitize_patterns handles single pattern" { - run sanitize_patterns "*.json" - [ "$status" -eq 0 ] - [[ "$output" == "*.json" ]] -} - -# Test 86: sanitize_numeric accepts valid number -@test "sanitize_numeric accepts valid number" { - run sanitize_numeric "123" "TEST_FIELD" - [ "$status" -eq 0 ] - [[ "$output" == "123" ]] -} - -# Test 87: sanitize_numeric accepts number within range -@test "sanitize_numeric accepts number within range" { - run sanitize_numeric "50" "TEST_FIELD" 1 100 - [ "$status" -eq 0 ] - [[ "$output" == "50" ]] -} - -# Test 88: sanitize_numeric removes non-numeric characters -@test "sanitize_numeric removes non-numeric characters" { - run sanitize_numeric "1a2b3c" "TEST_FIELD" - [ "$status" -eq 0 ] - [[ "$output" == "123" ]] -} - -# Test 89: sanitize_numeric rejects non-numeric input -@test "sanitize_numeric rejects non-numeric input" { - run sanitize_numeric "abc" "TEST_FIELD" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid numeric value for TEST_FIELD"* ]] -} - -# Test 90: sanitize_numeric rejects number below minimum -@test "sanitize_numeric rejects number below minimum" { - run sanitize_numeric "5" "TEST_FIELD" 10 100 - [ "$status" -eq 1 ] - [[ "$output" == *"Numeric value for TEST_FIELD out of range"* ]] -} - -# Test 91: sanitize_numeric rejects number above maximum -@test "sanitize_numeric rejects number above maximum" { - run sanitize_numeric "150" "TEST_FIELD" 10 100 - [ "$status" -eq 1 ] - [[ "$output" == *"Numeric value for TEST_FIELD out of range"* ]] -} - -# Test 92: sanitize_numeric accepts boundary values -@test "sanitize_numeric accepts boundary values" { - run sanitize_numeric "10" "TEST_FIELD" 10 100 - [ "$status" -eq 0 ] - [[ "$output" == "10" ]] - - run sanitize_numeric "100" "TEST_FIELD" 10 100 - [ "$status" -eq 0 ] - [[ "$output" == "100" ]] -} - -# Test 93: sanitize_numeric rejects empty input -@test "sanitize_numeric rejects empty input" { - run sanitize_numeric "" "TEST_FIELD" - [ "$status" -eq 1 ] - [[ "$output" == *"Invalid numeric value for TEST_FIELD"* ]] -} From 5b3636db01c76d4eef6560db63bb212cdc24b7a9 Mon Sep 17 00:00:00 2001 From: Julio Jimenez Date: Mon, 27 Oct 2025 14:33:19 -0500 Subject: [PATCH 2/4] chore(feature/go): License Mapper (#54) Signed-off-by: Julio Jimenez --- .golangci.yml | 2 +- internal/config/config.go | 16 +- internal/sbom/license_mapper.go | 79 +++++++ .../sbom/license_mapper_integration_test.go | 34 +++ internal/sbom/license_mapper_test.go | 221 ++++++++++++++++++ internal/storage/clickhouse.go | 25 +- 6 files changed, 368 insertions(+), 9 deletions(-) create mode 100644 internal/sbom/license_mapper.go create mode 100644 internal/sbom/license_mapper_integration_test.go create mode 100644 internal/sbom/license_mapper_test.go diff --git a/.golangci.yml b/.golangci.yml index bab89ec..f3a2665 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -20,7 +20,7 @@ linters: run: timeout: 5m - tests: true + tests: false issues: max-issues-per-linter: 0 diff --git a/internal/config/config.go b/internal/config/config.go index 6e3e7fe..0ad7994 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -54,6 +54,9 @@ type Config struct { Include string Exclude string Debug bool + + // License mapping + LicenseMappingFile string } // LoadConfig loads configuration from environment variables. @@ -97,12 +100,13 @@ func LoadConfig() (*Config, error) { TruncateTable: getEnvAsBool("TRUNCATE_TABLE", false), // General - SBOMSource: getEnvOrDefault("SBOM_SOURCE", "github"), - SBOMFormat: getEnvOrDefault("SBOM_FORMAT", "cyclonedx"), - Merge: getEnvAsBool("MERGE", false), - Include: os.Getenv("INCLUDE"), - Exclude: os.Getenv("EXCLUDE"), - Debug: getEnvAsBool("DEBUG", false), + SBOMSource: getEnvOrDefault("SBOM_SOURCE", "github"), + SBOMFormat: getEnvOrDefault("SBOM_FORMAT", "cyclonedx"), + Merge: getEnvAsBool("MERGE", false), + Include: os.Getenv("INCLUDE"), + Exclude: os.Getenv("EXCLUDE"), + Debug: getEnvAsBool("DEBUG", false), + LicenseMappingFile: getEnvOrDefault("LICENSE_MAPPING_FILE", "/app/license-mappings.json"), } // Sanitize inputs diff --git a/internal/sbom/license_mapper.go b/internal/sbom/license_mapper.go new file mode 100644 index 0000000..ec48a9f --- /dev/null +++ b/internal/sbom/license_mapper.go @@ -0,0 +1,79 @@ +package sbom + +import ( + "encoding/json" + "os" + + "github.com/ClickHouse/ClickBOM/pkg/logger" +) + +// LicenseMapper handles mapping of unknown licenses to known licenses +type LicenseMapper struct { + mappings map[string]string +} + +// NewLicenseMapper creates a new license mapper from a JSON file +func NewLicenseMapper(mappingFile string) (*LicenseMapper, error) { + data, err := os.ReadFile(mappingFile) + if err != nil { + return nil, err + } + + var mappings map[string]string + if err := json.Unmarshal(data, &mappings); err != nil { + return nil, err + } + + logger.Debug("Loaded %d license mappings", len(mappings)) + + return &LicenseMapper{ + mappings: mappings, + }, nil +} + +// MapLicense maps an unknown license to a known one, or returns the original +func (m *LicenseMapper) MapLicense(componentName, license string) string { + // If license is already known, return it + if license != "" && license != "unknown" && license != "null" { + return license + } + + // Try to find a mapping for this component + if mapped, exists := m.mappings[componentName]; exists { + logger.Debug("Mapped license for %s: unknown -> %s", componentName, mapped) + return mapped + } + + // No mapping found, return unknown + return "unknown" +} + +// MapComponent maps the license for a component (modifies in place) +func (m *LicenseMapper) MapComponent(comp map[string]interface{}) { + name, _ := comp["name"].(string) + license, _ := comp["license"].(string) + + if name != "" { + mappedLicense := m.MapLicense(name, license) + comp["license"] = mappedLicense + } +} + +// MapComponents maps licenses for multiple components +func (m *LicenseMapper) MapComponents(components []map[string]interface{}) { + for _, comp := range components { + m.MapComponent(comp) + } +} + +// GetMapping returns the mapping for a specific component, if it exists +func (m *LicenseMapper) GetMapping(componentName string) (string, bool) { + license, exists := m.mappings[componentName] + return license, exists +} + +// HasMapping checks if a mapping exists for a component +func (m *LicenseMapper) HasMapping(componentName string) bool { + _, exists := m.mappings[componentName] + return exists +} diff --git a/internal/sbom/license_mapper_integration_test.go b/internal/sbom/license_mapper_integration_test.go new file mode 100644 index 0000000..bc85861 --- /dev/null +++ b/internal/sbom/license_mapper_integration_test.go @@ -0,0 +1,34 @@ +//go:build integration + +package sbom + +import ( + "testing" +) + +func TestLicenseMapperWithRealFile(t *testing.T) { + // Test with the actual license-mappings.json file + mapper, err := NewLicenseMapper("../../license-mappings.json") + if err != nil { + t.Fatalf("Failed to load real license mappings: %v", err) + } + + // Test some known mappings + tests := []struct { + component string + want string + }{ + {"4d63.com/gocheckcompilerdirectives", "MIT"}, + {"actions/cache", "MIT"}, + {"CycloneDX/gh-gomod-generate-sbom", "Apache-2.0"}, + } + + for _, tt := range tests { + t.Run(tt.component, func(t *testing.T) { + got := mapper.MapLicense(tt.component, "unknown") + if got != tt.want { + t.Errorf("MapLicense(%s) = %v, want %v", tt.component, got, tt.want) + } + }) + } +} diff --git a/internal/sbom/license_mapper_test.go b/internal/sbom/license_mapper_test.go new file mode 100644 index 0000000..8d3bcaa --- /dev/null +++ b/internal/sbom/license_mapper_test.go @@ -0,0 +1,221 @@ +package sbom + +import ( + "os" + "path/filepath" + "testing" +) + +func TestNewLicenseMapper(t *testing.T) { + // Create temp mapping file + tempDir := t.TempDir() + mappingFile := filepath.Join(tempDir, "test-mappings.json") + + mappingContent := `{ + "4d63.com/gocheckcompilerdirectives": "MIT", + "actions/cache": "MIT", + "test-component": "Apache-2.0" + }` + + if err := os.WriteFile(mappingFile, []byte(mappingContent), 0644); err != nil { + t.Fatalf("Failed to create test mapping file: %v", err) + } + + mapper, err := NewLicenseMapper(mappingFile) + if err != nil { + t.Fatalf("NewLicenseMapper() error = %v", err) + } + + if mapper == nil { + t.Fatal("Expected mapper, got nil") + } + + if len(mapper.mappings) != 3 { + t.Errorf("Expected 3 mappings, got %d", len(mapper.mappings)) + } +} + +func TestNewLicenseMapper_FileNotFound(t *testing.T) { + _, err := NewLicenseMapper("/nonexistent/file.json") + if err == nil { + t.Error("Expected error for nonexistent file, got nil") + } +} + +func TestNewLicenseMapper_InvalidJSON(t *testing.T) { + tempDir := t.TempDir() + mappingFile := filepath.Join(tempDir, "invalid.json") + + if err := os.WriteFile(mappingFile, []byte("not valid json"), 0644); err != nil { + t.Fatalf("Failed to create test file: %v", err) + } + + _, err := NewLicenseMapper(mappingFile) + if err == nil { + t.Error("Expected error for invalid JSON, got nil") + } +} + +func TestMapLicense(t *testing.T) { + mapper := &LicenseMapper{ + mappings: map[string]string{ + "test-component": "MIT", + "another-component": "Apache-2.0", + }, + } + + tests := []struct { + name string + componentName string + license string + want string + }{ + { + name: "known license - keep it", + componentName: "any-component", + license: "BSD-3-Clause", + want: "BSD-3-Clause", + }, + { + name: "unknown license with mapping", + componentName: "test-component", + license: "unknown", + want: "MIT", + }, + { + name: "empty license with mapping", + componentName: "test-component", + license: "", + want: "MIT", + }, + { + name: "null license with mapping", + componentName: "test-component", + license: "null", + want: "MIT", + }, + { + name: "unknown license without mapping", + componentName: "unmapped-component", + license: "unknown", + want: "unknown", + }, + { + name: "different component with mapping", + componentName: "another-component", + license: "", + want: "Apache-2.0", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := mapper.MapLicense(tt.componentName, tt.license) + if got != tt.want { + t.Errorf("MapLicense() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestMapComponent(t *testing.T) { + mapper := &LicenseMapper{ + mappings: map[string]string{ + "test-component": "MIT", + }, + } + + comp := map[string]interface{}{ + "name": "test-component", + "version": "1.0.0", + "license": "unknown", + } + + mapper.MapComponent(comp) + + if comp["license"] != "MIT" { + t.Errorf("Expected license to be mapped to MIT, got %v", comp["license"]) + } +} + +func TestMapComponents(t *testing.T) { + mapper := &LicenseMapper{ + mappings: map[string]string{ + "component-a": "MIT", + "component-b": "Apache-2.0", + }, + } + + components := []map[string]interface{}{ + { + "name": "component-a", + "license": "unknown", + }, + { + "name": "component-b", + "license": "", + }, + { + "name": "component-c", + "license": "BSD-3-Clause", + }, + } + + mapper.MapComponents(components) + + // Check first component + if components[0]["license"] != "MIT" { + t.Errorf("Component A: expected MIT, got %v", components[0]["license"]) + } + + // Check second component + if components[1]["license"] != "Apache-2.0" { + t.Errorf("Component B: expected Apache-2.0, got %v", components[1]["license"]) + } + + // Check third component (should remain unchanged) + if components[2]["license"] != "BSD-3-Clause" { + t.Errorf("Component C: expected BSD-3-Clause, got %v", components[2]["license"]) + } +} + +func TestGetMapping(t *testing.T) { + mapper := &LicenseMapper{ + mappings: map[string]string{ + "test-component": "MIT", + }, + } + + t.Run("existing mapping", func(t *testing.T) { + license, exists := mapper.GetMapping("test-component") + if !exists { + t.Error("Expected mapping to exist") + } + if license != "MIT" { + t.Errorf("Expected MIT, got %v", license) + } + }) + + t.Run("non-existing mapping", func(t *testing.T) { + _, exists := mapper.GetMapping("nonexistent") + if exists { + t.Error("Expected mapping to not exist") + } + }) +} + +func TestHasMapping(t *testing.T) { + mapper := &LicenseMapper{ + mappings: map[string]string{ + "test-component": "MIT", + }, + } + + if !mapper.HasMapping("test-component") { + t.Error("Expected HasMapping to return true for test-component") + } + + if mapper.HasMapping("nonexistent") { + t.Error("Expected HasMapping to return false for nonexistent") + } +} diff --git a/internal/storage/clickhouse.go b/internal/storage/clickhouse.go index c7a68d1..4dc2b43 100644 --- a/internal/storage/clickhouse.go +++ b/internal/storage/clickhouse.go @@ -14,6 +14,7 @@ import ( "time" "github.com/ClickHouse/ClickBOM/internal/config" + "github.com/ClickHouse/ClickBOM/internal/sbom" "github.com/ClickHouse/ClickBOM/pkg/logger" ) @@ -239,6 +240,26 @@ func (c *ClickHouseClient) InsertSBOMData(ctx context.Context, sbomFile, tableNa return nil } + // Load license mapper + mapper, err := sbom.NewLicenseMapper("/app/license-mappings.json") + if err != nil { + logger.Warning("Failed to load license mappings: %v (continuing without mapping)", err) + // Continue without mapping + } else { + // Apply license mappings + logger.Info("Applying license mappings...") + for i := range components { + name := getStringField(components[i], "name", "unknown") + license := extractLicense(components[i]) + + // Map the license + mappedLicense := mapper.MapLicense(name, license) + + // Store as string field for TSV export + components[i]["license"] = mappedLicense + } + } + logger.Info("Found %d components to insert", len(components)) // Build TSV data @@ -246,7 +267,7 @@ func (c *ClickHouseClient) InsertSBOMData(ctx context.Context, sbomFile, tableNa for _, comp := range components { name := getStringField(comp, "name", "unknown") version := getStringField(comp, "version", "unknown") - license := extractLicense(comp) + license := getStringField(comp, "license", "unknown") source := getStringField(comp, "source", "unknown") fmt.Fprintf(&tsvData, "%s\t%s\t%s\t%s\n", name, version, license, source) @@ -289,7 +310,7 @@ func (c *ClickHouseClient) InsertSBOMData(ctx context.Context, sbomFile, tableNa return nil } -func getStringField(m map[string]interface{}, key, defaultVal string) string { +func getStringField(m map[string]interface{}, key, defaultVal string) string { //nolint:unparam if val, ok := m[key]; ok { if str, ok := val.(string); ok { return str From d9247c392fc47c00bb929fa8c2e824fc440f7e1a Mon Sep 17 00:00:00 2001 From: Julio Jimenez Date: Fri, 14 Nov 2025 23:51:21 -0500 Subject: [PATCH 3/4] chore(feature/go): Trivy Integration (#55) * chore(feature/go): Trivy Integration Signed-off-by: Julio Jimenez * fix(debug): extract from wrapper function Signed-off-by: Julio Jimenez * fix(debug): extract json from zip Signed-off-by: Julio Jimenez * fix(debug): remove debug print of sbom Signed-off-by: Julio Jimenez * fix(aws): Some inputs are not longer required Signed-off-by: Julio Jimenez * fix(aws): Some inputs are not longer required Signed-off-by: Julio Jimenez * fix: add trivy to config validation Signed-off-by: Julio Jimenez * fix: add trivy to config validation Signed-off-by: Julio Jimenez * fix: add trivy to config validation Signed-off-by: Julio Jimenez * fix: ecr auth Signed-off-by: Julio Jimenez * fix: trivy clickhouse table name Signed-off-by: Julio Jimenez * feat: ability to do application scope reports Signed-off-by: Julio Jimenez * fix: i don't think org uuid is always required Signed-off-by: Julio Jimenez * fix: if no projectUuids are provided Signed-off-by: Julio Jimenez * fix: mend-project-uuids Signed-off-by: Julio Jimenez * fix: maxDepthLevel Signed-off-by: Julio Jimenez * fix: maxDepthLevel Signed-off-by: Julio Jimenez * fix: maxDepthLevel Signed-off-by: Julio Jimenez * fix: maxDepthLevel Signed-off-by: Julio Jimenez * fix: maxDepthLevel Signed-off-by: Julio Jimenez * fix: maxDepthLevel Signed-off-by: Julio Jimenez * fix: maxDepthLevel Signed-off-by: Julio Jimenez * fix: stuff Signed-off-by: Julio Jimenez * feat: add merge Signed-off-by: Julio Jimenez * feat: add merge Signed-off-by: Julio Jimenez * feat: add merge Signed-off-by: Julio Jimenez * feat: add merge Signed-off-by: Julio Jimenez * fix: lint Signed-off-by: Julio Jimenez --------- Signed-off-by: Julio Jimenez --- .pre-commit-config.yaml | 2 +- Dockerfile | 12 +- action.yml | 39 ++++-- cmd/clickbom/main.go | 153 ++++++++++++++++++-- go.mod | 5 +- go.sum | 2 + internal/config/config.go | 41 ++++-- internal/config/config_test.go | 44 +++--- internal/sbom/filter.go | 85 ++++-------- internal/sbom/license_mapper.go | 2 +- internal/sbom/mend.go | 78 ++++++++--- internal/sbom/merge.go | 238 ++++++++++++++++++++++++++++++++ internal/sbom/trivy.go | 203 +++++++++++++++++++++++++++ internal/storage/s3.go | 49 +++++-- internal/storage/s3_test.go | 10 +- 15 files changed, 790 insertions(+), 173 deletions(-) create mode 100644 internal/sbom/merge.go create mode 100644 internal/sbom/trivy.go diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a338da5..97bf4c7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -65,7 +65,7 @@ repos: - id: go-cyclo name: Check cyclomatic complexity - entry: gocyclo -over 25 . + entry: gocyclo -over 26 . language: system pass_filenames: false files: \.go$ diff --git a/Dockerfile b/Dockerfile index 19bfc09..9cc8dbf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# hadolint global ignore=DL3047,DL4001 +# hadolint global ignore=DL3047,DL4001,DL4006 # Multi-stage build for Go application FROM golang:1.25.3-alpine3.22 AS builder @@ -44,6 +44,12 @@ RUN apk add --no-cache curl unzip && \ RUN wget -O /cyclonedx "https://github.com/CycloneDX/cyclonedx-cli/releases/download/v0.27.2/cyclonedx-linux-x64" && \ chmod +x /cyclonedx +# Install Trivy +# Download the static binary directly since we're using distroless +RUN TRIVY_VERSION=$(wget -qO- "https://api.github.com/repos/aquasecurity/trivy/releases/latest" | grep '"tag_name":' | sed -E 's/.*"v([^"]+)".*/\1/') && \ + wget -qO- "https://github.com/aquasecurity/trivy/releases/download/v${TRIVY_VERSION}/trivy_${TRIVY_VERSION}_Linux-64bit.tar.gz" | tar -xzf - -C /usr/local/bin trivy && \ + chmod +x /usr/local/bin/trivy + # Runtime stage - Distroless FROM gcr.io/distroless/static-debian12:nonroot @@ -56,6 +62,7 @@ LABEL maintainer="ClickHouse Security Team" \ COPY --from=tools /usr/local/aws-cli /usr/local/aws-cli COPY --from=tools /usr/local/bin/aws /usr/local/bin/aws COPY --from=tools /cyclonedx /usr/local/bin/cyclonedx +COPY --from=tools /usr/local/bin/trivy /usr/local/bin/trivy # Copy the binary from builder COPY --from=builder /build/clickbom /app/clickbom @@ -69,7 +76,8 @@ WORKDIR /app # distroless runs as nonroot user by default (UID 65532) # Set environment ENV PATH="/usr/local/bin:$PATH" \ - TEMP_DIR="/tmp" + TEMP_DIR="/tmp" \ + TRIVY_CACHE_DIR="/tmp/.trivy" # Run the application ENTRYPOINT ["/app/clickbom"] diff --git a/action.yml b/action.yml index df6ab3b..959a97b 100644 --- a/action.yml +++ b/action.yml @@ -1,5 +1,5 @@ name: 'ClickBOM' -description: 'Download SBOMs from GitHub, Mend, and Wiz. Convert to CycloneDX and SPDX formats. Upload to S3 and ClickHouse.' +description: 'Download SBOMs from GitHub, Mend, Wiz, and Trivy. Convert to CycloneDX and SPDX formats. Upload to S3 and ClickHouse.' author: 'ClickHouse, Inc.' inputs: # GitHub-specific inputs @@ -59,17 +59,25 @@ inputs: wiz-report-id: description: 'Wiz report ID to download' required: false - # AWS-specific inputs - aws-access-key-id: - description: 'AWS Access Key ID' - required: true - aws-secret-access-key: - description: 'AWS Secret Access Key' - required: true - aws-region: - description: 'AWS region' + # Trivy-specific inputs + trivy-image: + description: 'Container image to scan with Trivy for SBOM generation (format: registry/repo:tag or ECR URI)' + required: false + trivy-ecr-account-id: + description: 'AWS Account ID where ECR repository is located (for cross-account access)' + required: false + trivy-ecr-region: + description: 'AWS region where ECR repository is located' required: false default: 'us-east-1' + trivy-ecr-role-arn: + description: 'IAM role ARN to assume for ECR access (for cross-account)' + required: false + trivy-format: + description: 'Trivy SBOM output format: cyclonedx or spdxjson' + required: false + default: 'cyclonedx' + # AWS-specific inputs s3-bucket: description: 'S3 bucket name' required: true @@ -103,7 +111,7 @@ inputs: default: 'false' # General inputs sbom-source: - description: 'SBOM source: github or mend' + description: 'SBOM source: github, mend, wiz, or trivy' required: false default: 'github' sbom-format: @@ -150,10 +158,13 @@ runs: WIZ_CLIENT_ID: ${{ inputs.wiz-client-id }} WIZ_CLIENT_SECRET: ${{ inputs.wiz-client-secret }} WIZ_REPORT_ID: ${{ inputs.wiz-report-id }} + # Trivy-specific + TRIVY_IMAGE: ${{ inputs.trivy-image }} + TRIVY_ECR_ACCOUNT_ID: ${{ inputs.trivy-ecr-account-id }} + TRIVY_ECR_REGION: ${{ inputs.trivy-ecr-region }} + TRIVY_ECR_ROLE_ARN: ${{ inputs.trivy-ecr-role-arn }} + TRIVY_FORMAT: ${{ inputs.trivy-format }} # AWS-specific - AWS_ACCESS_KEY_ID: ${{ inputs.aws-access-key-id }} - AWS_SECRET_ACCESS_KEY: ${{ inputs.aws-secret-access-key }} - AWS_DEFAULT_REGION: ${{ inputs.aws-region }} S3_BUCKET: ${{ inputs.s3-bucket }} S3_KEY: ${{ inputs.s3-key }} # ClickHouse-specific diff --git a/cmd/clickbom/main.go b/cmd/clickbom/main.go index c08aed8..4b004b3 100644 --- a/cmd/clickbom/main.go +++ b/cmd/clickbom/main.go @@ -5,6 +5,7 @@ import ( "context" "fmt" "os" + "path" "path/filepath" "strings" @@ -45,7 +46,7 @@ func run() error { }() // Initialize S3 client - s3Client, err := storage.NewS3Client(ctx, cfg.AWSAccessKeyID, cfg.AWSSecretAccessKey, cfg.AWSRegion) + s3Client, err := storage.NewS3Client(ctx) if err != nil { return fmt.Errorf("failed to create S3 client: %w", err) } @@ -64,7 +65,7 @@ func handleNormalMode(ctx context.Context, cfg *config.Config, s3Client *storage extractedSBOM := filepath.Join(tempDir, "extracted_sbom.json") processedSBOM := filepath.Join(tempDir, "processed_sbom.json") - // Download SBOM based on source + // Download/Generate SBOM based on source switch cfg.SBOMSource { case "github": logger.Info("Downloading SBOM from GitHub") @@ -87,11 +88,21 @@ func handleNormalMode(ctx context.Context, cfg *config.Config, s3Client *storage return fmt.Errorf("failed to download Wiz SBOM: %w", err) } + case "trivy": + logger.Info("Generating SBOM with Trivy") + trivyClient, err := sbom.NewTrivyClient(ctx, cfg) + if err != nil { + return fmt.Errorf("failed to create Trivy client: %w", err) + } + if err := trivyClient.GenerateSBOM(ctx, originalSBOM); err != nil { + return fmt.Errorf("failed to generate SBOM with Trivy: %w", err) + } + default: return fmt.Errorf("unsupported SBOM source: %s", cfg.SBOMSource) } - // Extract from wrapper if needed + // Extract SBOM from wrapper if needed (mainly for GitHub) if err := sbom.ExtractSBOMFromWrapper(originalSBOM, extractedSBOM); err != nil { return fmt.Errorf("failed to extract SBOM: %w", err) } @@ -103,9 +114,9 @@ func handleNormalMode(ctx context.Context, cfg *config.Config, s3Client *storage } logger.Info("Detected SBOM format: %s", detectedFormat) - // Convert to desired format - targetFormat := sbom.Format(cfg.SBOMFormat) - if err := sbom.ConvertSBOM(extractedSBOM, processedSBOM, detectedFormat, targetFormat); err != nil { + // Convert to desired format if needed + desiredFormat := sbom.Format(cfg.SBOMFormat) + if err := sbom.ConvertSBOM(extractedSBOM, processedSBOM, detectedFormat, desiredFormat); err != nil { return fmt.Errorf("failed to convert SBOM: %w", err) } @@ -115,28 +126,133 @@ func handleNormalMode(ctx context.Context, cfg *config.Config, s3Client *storage } logger.Success("SBOM processing completed successfully!") - logger.Info("SBOM available at: s3://%s/%s", cfg.S3Bucket, cfg.S3Key) - // ClickHouse operations + // ClickHouse upload if configured if cfg.ClickHouseURL != "" { - if err := handleClickHouse(ctx, cfg, processedSBOM); err != nil { - return fmt.Errorf("ClickHouse error: %w", err) + logger.Info("Uploading SBOM data to ClickHouse") + + chClient, err := storage.NewClickHouseClient(cfg) + if err != nil { + return fmt.Errorf("failed to create ClickHouse client: %w", err) + } + + tableName := generateTableName(cfg) + + if err := chClient.SetupTable(ctx, tableName); err != nil { + return fmt.Errorf("failed to setup table: %w", err) } + + if err := chClient.InsertSBOMData(ctx, processedSBOM, tableName, cfg.SBOMFormat); err != nil { + return fmt.Errorf("failed to upload to ClickHouse: %w", err) + } + + logger.Success("ClickHouse operations completed successfully!") } return nil } -func handleMergeMode(_ context.Context, _ *config.Config, _ *storage.S3Client, _ string) error { +func handleMergeMode(ctx context.Context, cfg *config.Config, s3Client *storage.S3Client, tempDir string) error { logger.Info("Running in MERGE mode - merging all CycloneDX SBOMs from S3") - // Implementation for merge mode... - // This would involve downloading all SBOMs from S3, merging them, and uploading + // Create download directory + downloadDir := filepath.Join(tempDir, "downloads") + if err := os.MkdirAll(downloadDir, 0755); err != nil { + return fmt.Errorf("failed to create download directory: %w", err) + } + + // Download all files from S3 + downloadedFiles, err := s3Client.DownloadAll(ctx, cfg.S3Bucket, "", downloadDir) + if err != nil { + return fmt.Errorf("failed to download files from S3: %w", err) + } + + logger.Info("Downloaded %d files from S3", len(downloadedFiles)) + + if len(downloadedFiles) == 0 { + return fmt.Errorf("no files found in S3 bucket: %s", cfg.S3Bucket) + } + + // Filter and validate CycloneDX SBOMs + cyclonedxFiles := make([]string, 0) + + for _, file := range downloadedFiles { + filename := filepath.Base(file) + + // Apply include/exclude filters + if !sbom.ShouldIncludeFile(filename, cfg.Include, cfg.Exclude) { + logger.Debug("Skipping %s due to include/exclude filters", filename) + continue + } + + // Check if file is valid CycloneDX + format, err := sbom.DetectSBOMFormat(file) + if err != nil { + logger.Warning("Failed to detect format for %s: %v", filename, err) + continue + } + + if format != sbom.FormatCycloneDX { + logger.Debug("Skipping %s: not CycloneDX format (detected: %s)", filename, format) + continue + } + + cyclonedxFiles = append(cyclonedxFiles, file) + logger.Debug("Added %s to merge list", filename) + } + + logger.Info("Found %d valid CycloneDX SBOMs to merge", len(cyclonedxFiles)) + + if len(cyclonedxFiles) == 0 { + return fmt.Errorf("no valid CycloneDX SBOMs found after filtering") + } + + // Merge all SBOMs + mergedSBOM := filepath.Join(tempDir, "merged_sbom.json") + if err := sbom.MergeSBOMs(cyclonedxFiles, mergedSBOM); err != nil { + return fmt.Errorf("failed to merge SBOMs: %w", err) + } + + // Convert to desired format if needed + finalSBOM := filepath.Join(tempDir, "final_sbom.json") + desiredFormat := sbom.Format(cfg.SBOMFormat) + if err := sbom.ConvertSBOM(mergedSBOM, finalSBOM, sbom.FormatCycloneDX, desiredFormat); err != nil { + return fmt.Errorf("failed to convert merged SBOM: %w", err) + } + + // Upload merged SBOM back to S3 + if err := s3Client.Upload(ctx, finalSBOM, cfg.S3Bucket, cfg.S3Key, cfg.SBOMFormat); err != nil { + return fmt.Errorf("failed to upload merged SBOM: %w", err) + } + + logger.Success("SBOM merging and upload completed successfully!") + + // ClickHouse upload if configured + if cfg.ClickHouseURL != "" { + logger.Info("Uploading merged SBOM data to ClickHouse") + + chClient, err := storage.NewClickHouseClient(cfg) + if err != nil { + return fmt.Errorf("failed to create ClickHouse client: %w", err) + } + + tableName := generateTableName(cfg) + + if err := chClient.SetupTable(ctx, tableName); err != nil { + return fmt.Errorf("failed to setup table: %w", err) + } + + if err := chClient.InsertSBOMData(ctx, finalSBOM, tableName, cfg.SBOMFormat); err != nil { + return fmt.Errorf("failed to upload to ClickHouse: %w", err) + } + + logger.Success("ClickHouse operations completed successfully!") + } return nil } -func handleClickHouse(ctx context.Context, cfg *config.Config, sbomFile string) error { +func handleClickHouse(ctx context.Context, cfg *config.Config, sbomFile string) error { // nolint: unused logger.Info("Starting ClickHouse operations") chClient, err := storage.NewClickHouseClient(cfg) @@ -159,6 +275,10 @@ func handleClickHouse(ctx context.Context, cfg *config.Config, sbomFile string) } func generateTableName(cfg *config.Config) string { + if cfg.Merge { + replacer := strings.NewReplacer(".", "_", "-", "_") + return fmt.Sprintf("merged_%s", replacer.Replace(cfg.S3Key)) + } switch cfg.SBOMSource { case "github": return strings.ReplaceAll(strings.ToLower(cfg.Repository), "/", "_") @@ -170,6 +290,11 @@ func generateTableName(cfg *config.Config) string { return fmt.Sprintf("mend_%s", strings.ReplaceAll(uuid, "-", "_")) case "wiz": return fmt.Sprintf("wiz_%s", strings.ReplaceAll(cfg.WizReportID, "-", "_")) + case "trivy": + result := path.Base(cfg.TrivyImage) + replacer := strings.NewReplacer(":", "_", ".", "_", "-", "_") + result = replacer.Replace(result) + return fmt.Sprintf("trivy_%s", result) default: return "sbom_data" } diff --git a/go.mod b/go.mod index 4e8896c..bd0f578 100644 --- a/go.mod +++ b/go.mod @@ -5,12 +5,14 @@ go 1.25.3 require ( github.com/aws/aws-sdk-go-v2 v1.39.4 github.com/aws/aws-sdk-go-v2/config v1.31.15 - github.com/aws/aws-sdk-go-v2/credentials v1.18.19 github.com/aws/aws-sdk-go-v2/service/s3 v1.88.7 + github.com/aws/aws-sdk-go-v2/service/sts v1.38.9 + github.com/google/uuid v1.6.0 ) require ( github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.2 // indirect + github.com/aws/aws-sdk-go-v2/credentials v1.18.19 // indirect github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.11 // indirect github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.11 // indirect github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.11 // indirect @@ -22,6 +24,5 @@ require ( github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.11 // indirect github.com/aws/aws-sdk-go-v2/service/sso v1.29.8 // indirect github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.3 // indirect - github.com/aws/aws-sdk-go-v2/service/sts v1.38.9 // indirect github.com/aws/smithy-go v1.23.1 // indirect ) diff --git a/go.sum b/go.sum index 6e4e000..bc312b0 100644 --- a/go.sum +++ b/go.sum @@ -34,3 +34,5 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.38.9 h1:Ekml5vGg6sHSZLZJQJagefnVe6Pm github.com/aws/aws-sdk-go-v2/service/sts v1.38.9/go.mod h1:/e15V+o1zFHWdH3u7lpI3rVBcxszktIKuHKCY2/py+k= github.com/aws/smithy-go v1.23.1 h1:sLvcH6dfAFwGkHLZ7dGiYF7aK6mg4CgKA/iDKjLDt9M= github.com/aws/smithy-go v1.23.1/go.mod h1:LEj2LM3rBRQJxPZTB4KuzZkaZYnZPnvgIhb4pu07mx0= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= diff --git a/internal/config/config.go b/internal/config/config.go index 0ad7994..6bdc61a 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -33,6 +33,14 @@ type Config struct { WizClientSecret string WizReportID string + // Trivy + TrivyImage string + TrivyECRAccountID string + TrivyECRRegion string + TrivyECRRoleARN string + TrivyECRExternalID string + TrivyFormat string + // AWS AWSAccessKeyID string AWSSecretAccessKey string @@ -63,11 +71,8 @@ type Config struct { func LoadConfig() (*Config, error) { cfg := &Config{ // AWS (required) - AWSAccessKeyID: os.Getenv("AWS_ACCESS_KEY_ID"), - AWSSecretAccessKey: os.Getenv("AWS_SECRET_ACCESS_KEY"), - AWSRegion: getEnvOrDefault("AWS_DEFAULT_REGION", "us-east-1"), - S3Bucket: os.Getenv("S3_BUCKET"), - S3Key: getEnvOrDefault("S3_KEY", "sbom.json"), + S3Bucket: os.Getenv("S3_BUCKET"), + S3Key: getEnvOrDefault("S3_KEY", "sbom.json"), // GitHub GitHubToken: os.Getenv("GITHUB_TOKEN"), @@ -92,6 +97,14 @@ func LoadConfig() (*Config, error) { WizClientSecret: os.Getenv("WIZ_CLIENT_SECRET"), WizReportID: os.Getenv("WIZ_REPORT_ID"), + // Trivy + TrivyImage: getEnvOrDefault("TRIVY_IMAGE", ""), + TrivyECRAccountID: getEnvOrDefault("TRIVY_ECR_ACCOUNT_ID", ""), + TrivyECRRegion: getEnvOrDefault("TRIVY_ECR_REGION", "us-east-1"), + TrivyECRRoleARN: getEnvOrDefault("TRIVY_ECR_ROLE_ARN", ""), + TrivyECRExternalID: getEnvOrDefault("TRIVY_ECR_EXTERNAL_ID", ""), + TrivyFormat: getEnvOrDefault("TRIVY_FORMAT", "cyclonedx"), + // ClickHouse ClickHouseURL: os.Getenv("CLICKHOUSE_URL"), ClickHouseDatabase: getEnvOrDefault("CLICKHOUSE_DATABASE", "default"), @@ -125,18 +138,12 @@ func LoadConfig() (*Config, error) { // Validate checks that all required configuration fields are set appropriately. func (c *Config) Validate() error { // AWS is always required - if c.AWSAccessKeyID == "" { - return fmt.Errorf("AWS_ACCESS_KEY_ID is required") - } - if c.AWSSecretAccessKey == "" { - return fmt.Errorf("AWS_SECRET_ACCESS_KEY is required") - } if c.S3Bucket == "" { return fmt.Errorf("S3_BUCKET is required") } // Repository required if not in merge mode and source is GitHub - if !c.Merge && c.SBOMSource != "mend" && c.SBOMSource != "wiz" { + if !c.Merge && c.SBOMSource != "mend" && c.SBOMSource != "wiz" && c.SBOMSource != "trivy" { if c.Repository == "" { return fmt.Errorf("REPOSITORY is required when not in merge mode") } @@ -174,6 +181,16 @@ func (c *Config) Validate() error { } } + // Trivy validation + if c.SBOMSource == "trivy" { + if c.TrivyImage == "" { + return fmt.Errorf("TRIVY_IMAGE is required for Trivy source") + } + if c.TrivyFormat != "cyclonedx" && c.TrivyFormat != "spdxjson" { + return fmt.Errorf("TRIVY_FORMAT must be 'cyclonedx' or 'spdxjson'") + } + } + // ClickHouse validation if c.ClickHouseURL != "" { if c.ClickHouseDatabase == "" { diff --git a/internal/config/config_test.go b/internal/config/config_test.go index bb617cd..72fa434 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -14,19 +14,15 @@ func TestLoadConfig(t *testing.T) { { name: "valid minimal config", env: map[string]string{ - "AWS_ACCESS_KEY_ID": "test-key", - "AWS_SECRET_ACCESS_KEY": "test-secret", - "S3_BUCKET": "test-bucket", - "REPOSITORY": "owner/repo", + "S3_BUCKET": "test-bucket", + "REPOSITORY": "owner/repo", }, wantErr: false, }, { name: "missing required field", env: map[string]string{ - "AWS_ACCESS_KEY_ID": "test-key", - // Missing AWS_SECRET_ACCESS_KEY - "S3_BUCKET": "test-bucket", + // Missing S3_BUCKET "REPOSITORY": "owner/repo", }, wantErr: true, @@ -34,10 +30,8 @@ func TestLoadConfig(t *testing.T) { { name: "invalid repository format", env: map[string]string{ - "AWS_ACCESS_KEY_ID": "test-key", - "AWS_SECRET_ACCESS_KEY": "test-secret", - "S3_BUCKET": "test-bucket", - "REPOSITORY": "invalid-repo", // No slash + "S3_BUCKET": "test-bucket", + "REPOSITORY": "invalid-repo", // No slash }, wantErr: true, }, @@ -79,35 +73,29 @@ func TestConfigValidate(t *testing.T) { { name: "valid github config", config: &Config{ - AWSAccessKeyID: "key", - AWSSecretAccessKey: "secret", - S3Bucket: "bucket", - Repository: "owner/repo", - SBOMSource: "github", + S3Bucket: "bucket", + Repository: "owner/repo", + SBOMSource: "github", }, wantErr: false, }, { name: "valid mend config", config: &Config{ - AWSAccessKeyID: "key", - AWSSecretAccessKey: "secret", - S3Bucket: "bucket", - SBOMSource: "mend", - MendEmail: "test@example.com", - MendOrgUUID: "123e4567-e89b-12d3-a456-426614174000", - MendUserKey: "user-key", - MendProjectUUID: "123e4567-e89b-12d3-a456-426614174001", + S3Bucket: "bucket", + SBOMSource: "mend", + MendEmail: "test@example.com", + MendOrgUUID: "123e4567-e89b-12d3-a456-426614174000", + MendUserKey: "user-key", + MendProjectUUID: "123e4567-e89b-12d3-a456-426614174001", }, wantErr: false, }, { name: "invalid mend config - missing email", config: &Config{ - AWSAccessKeyID: "key", - AWSSecretAccessKey: "secret", - S3Bucket: "bucket", - SBOMSource: "mend", + S3Bucket: "bucket", + SBOMSource: "mend", // Missing MendEmail MendOrgUUID: "123e4567-e89b-12d3-a456-426614174000", MendUserKey: "user-key", diff --git a/internal/sbom/filter.go b/internal/sbom/filter.go index 7dda0c5..cbb5483 100644 --- a/internal/sbom/filter.go +++ b/internal/sbom/filter.go @@ -1,4 +1,4 @@ -// Package sbom provides functionalities for filtering files for SBOM generation. +// Package sbom provides pattern matching for filtering files. package sbom import ( @@ -8,87 +8,56 @@ import ( "github.com/ClickHouse/ClickBOM/pkg/logger" ) -// FileFilter defines inclusion and exclusion patterns for filtering files. -type FileFilter struct { - Include []string - Exclude []string -} - -// NewFileFilter creates a new FileFilter with the given include and exclude patterns. -func NewFileFilter(include, exclude string) *FileFilter { - return &FileFilter{ - Include: parsePatterns(include), - Exclude: parsePatterns(exclude), - } -} - -func parsePatterns(patterns string) []string { +// MatchesPattern checks if a filename matches any pattern in a comma-separated list. +func MatchesPattern(filename, patterns string) bool { if patterns == "" { - return nil + return false } - parts := strings.Split(patterns, ",") - var result []string - for _, p := range parts { - p = strings.TrimSpace(p) - if p != "" { - result = append(result, p) - } - } - return result -} + // Split patterns by comma + patternList := strings.Split(patterns, ",") -// MatchesPattern checks if the filename matches any of the provided patterns. -func (f *FileFilter) MatchesPattern(filename string, patterns []string) bool { - if len(patterns) == 0 { - return false - } + for _, pattern := range patternList { + // Trim whitespace + pattern = strings.TrimSpace(pattern) - for _, pattern := range patterns { + if pattern == "" { + continue + } + + // Use filepath.Match for wildcard matching matched, err := filepath.Match(pattern, filename) if err != nil { logger.Warning("Invalid pattern %s: %v", pattern, err) continue } + if matched { + logger.Debug("File %s matches pattern %s", filename, pattern) return true } } + return false } -// ShouldInclude determines if a file should be included based on the filter rules. -func (f *FileFilter) ShouldInclude(filename string) bool { - // If include patterns specified, file must match at least one - if len(f.Include) > 0 { - if !f.MatchesPattern(filename, f.Include) { +// ShouldIncludeFile determines if a file should be included based on include/exclude patterns. +func ShouldIncludeFile(filename, includePatterns, excludePatterns string) bool { + // If include patterns are specified, file must match at least one + if includePatterns != "" { + if !MatchesPattern(filename, includePatterns) { + logger.Debug("File %s does not match include patterns", filename) return false } } - // If exclude patterns specified and file matches, exclude it - if len(f.Exclude) > 0 { - if f.MatchesPattern(filename, f.Exclude) { + // If exclude patterns are specified, file must not match any + if excludePatterns != "" { + if MatchesPattern(filename, excludePatterns) { + logger.Debug("File %s matches exclude patterns", filename) return false } } return true } - -// FilterFiles filters the given list of files based on the FileFilter rules. -func (f *FileFilter) FilterFiles(files []string) []string { - var filtered []string - - for _, file := range files { - filename := filepath.Base(file) - if f.ShouldInclude(filename) { - filtered = append(filtered, file) - } else { - logger.Debug("Filtered out: %s", filename) - } - } - - logger.Info("Filtered %d files to %d files", len(files), len(filtered)) - return filtered -} diff --git a/internal/sbom/license_mapper.go b/internal/sbom/license_mapper.go index ec48a9f..17cd0d7 100644 --- a/internal/sbom/license_mapper.go +++ b/internal/sbom/license_mapper.go @@ -34,7 +34,7 @@ func NewLicenseMapper(mappingFile string) (*LicenseMapper, error) { // MapLicense maps an unknown license to a known one, or returns the original func (m *LicenseMapper) MapLicense(componentName, license string) string { // If license is already known, return it - if license != "" && license != "unknown" && license != "null" { + if license != "" && license != "unknown" && license != "null" { // nolint:goconst return license } diff --git a/internal/sbom/mend.go b/internal/sbom/mend.go index 946dafd..67d4896 100644 --- a/internal/sbom/mend.go +++ b/internal/sbom/mend.go @@ -2,6 +2,7 @@ package sbom import ( + "archive/zip" "bytes" "context" "encoding/json" @@ -172,17 +173,22 @@ func (m *MendClient) RequestSBOMExport(ctx context.Context, outputFile string) e } // Add scope + var url string switch { case m.projectUUID != "": payload["scopeType"] = "project" payload["scopeUuid"] = m.projectUUID - uuids := strings.Split(m.projectUUIDs, ",") - payload["projectUuids"] = uuids + // uuids := strings.Split(m.projectUUIDs, ",") + // payload["projectUuids"] = uuids + url = fmt.Sprintf("%s/api/v3.0/projects/%s/dependencies/reports/SBOM", m.baseURL, m.projectUUID) case m.productUUID != "": - payload["scopeType"] = "product" - payload["scopeUuid"] = m.productUUID - uuids := strings.Split(m.projectUUIDs, ",") - payload["projectUuids"] = uuids + // if len(m.projectUUIDs) != 0 { + // uuids := strings.Split(m.projectUUIDs, ",") + // payload["projectUuids"] = uuids + // } + payload["projectUuids"] = []string{m.projectUUID} + payload["maxDepthLevel"] = 0 + url = fmt.Sprintf("%s/api/v3.0/applications/%s/dependencies/reports/SBOM", m.baseURL, m.productUUID) case m.orgScopeUUID != "": payload["scopeType"] = "organization" payload["scopeUuid"] = m.orgScopeUUID @@ -193,9 +199,6 @@ func (m *MendClient) RequestSBOMExport(ctx context.Context, outputFile string) e return fmt.Errorf("failed to marshal payload: %w", err) } - url := fmt.Sprintf("%s/api/v3.0/projects/%s/dependencies/reports/SBOM", - m.baseURL, m.projectUUID) - req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(payloadBytes)) if err != nil { return fmt.Errorf("failed to create request: %w", err) @@ -360,23 +363,54 @@ func (m *MendClient) downloadReport(ctx context.Context, reportUUID, outputFile return fmt.Errorf("download failed (status %d): %s", resp.StatusCode, string(body)) } - // Create output file - outFile, err := os.Create(outputFile) + // After getting the response from Mend API + body, err := io.ReadAll(resp.Body) if err != nil { - return fmt.Errorf("failed to create output file: %w", err) + return fmt.Errorf("failed to read response: %w", err) } - defer func() { - if err := outFile.Close(); err != nil { - logger.Warning("Failed to close file: %v", err) + + // Check if response is a ZIP file (starts with "PK") + if len(body) >= 2 && body[0] == 0x50 && body[1] == 0x4B { + // It's a ZIP file, extract it + zipReader, err := zip.NewReader(bytes.NewReader(body), int64(len(body))) + if err != nil { + return fmt.Errorf("failed to read ZIP: %w", err) } - }() - // Copy response to file - written, err := io.Copy(outFile, resp.Body) - if err != nil { - return fmt.Errorf("failed to write file: %w", err) + // Find and read the JSON file inside + for _, file := range zipReader.File { + if strings.HasSuffix(file.Name, ".json") { + rc, err := file.Open() + if err != nil { + return fmt.Errorf("failed to open file in ZIP: %w", err) + } + body, err = io.ReadAll(rc) + if err != nil { + return fmt.Errorf("failed to read file in ZIP: %w", err) + } + err = rc.Close() + if err != nil { + return fmt.Errorf("failed to close file in ZIP: %w", err) + } + // Create output file + outFile, err := os.Create(outputFile) + if err != nil { + return fmt.Errorf("failed to create output file: %w", err) + } + defer func() { + if err := outFile.Close(); err != nil { + logger.Warning("Failed to close file: %v", err) + } + }() + // Copy response to file + written, err := io.Copy(outFile, bytes.NewReader(body)) + if err != nil { + return fmt.Errorf("failed to write file: %w", err) + } + logger.Success("Mend SBOM downloaded successfully (%d bytes)", written) + break + } + } } - - logger.Success("Mend SBOM downloaded successfully (%d bytes)", written) return nil } diff --git a/internal/sbom/merge.go b/internal/sbom/merge.go new file mode 100644 index 0000000..da2c6f1 --- /dev/null +++ b/internal/sbom/merge.go @@ -0,0 +1,238 @@ +// Package sbom provides functionalities for merging multiple SBOMs. +package sbom + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + "time" + + "github.com/google/uuid" + + "github.com/ClickHouse/ClickBOM/pkg/logger" +) + +// MergedComponent represents a component with source tracking. +type MergedComponent struct { + Component map[string]interface{} + Source string +} + +// ExtractSourceReference extracts the source reference from an SBOM file. +func ExtractSourceReference(sbomFile string) (string, error) { + data, err := os.ReadFile(sbomFile) + if err != nil { + return "", fmt.Errorf("failed to read SBOM file: %w", err) + } + + var sbom map[string]interface{} + if err := json.Unmarshal(data, &sbom); err != nil { + return "", fmt.Errorf("failed to parse SBOM: %w", err) + } + + filename := filepath.Base(sbomFile) + filename = strings.TrimSuffix(filename, filepath.Ext(filename)) + + // Strategy 1: Check for spdx:document:name in properties (GitHub SBOMs) + if metadata, ok := sbom["metadata"].(map[string]interface{}); ok { + if properties, ok := metadata["properties"].([]interface{}); ok { + for _, prop := range properties { + if propMap, ok := prop.(map[string]interface{}); ok { + if name, _ := propMap["name"].(string); name == "spdx:document:name" { + if value, ok := propMap["value"].(string); ok && value != "" { + logger.Debug("Found SPDX document name: %s", value) + return value, nil + } + } + } + } + } + + // Strategy 2: Check metadata.component.name (Wiz/Mend SBOMs) + if component, ok := metadata["component"].(map[string]interface{}); ok { + if name, ok := component["name"].(string); ok && name != "" { + logger.Debug("Found component name: %s", name) + return name, nil + } + + // Strategy 3: Check metadata.component.bom-ref + if bomRef, ok := component["bom-ref"].(string); ok && bomRef != "" { + logger.Debug("Found bom-ref: %s", bomRef) + return bomRef, nil + } + } + } + + // Strategy 4: Check top-level name field + if name, ok := sbom["name"].(string); ok && name != "" { + logger.Debug("Found top-level name: %s", name) + return name, nil + } + + // Strategy 5: Use filename without extension + logger.Debug("Using fallback name: %s", filename) + return filename, nil +} + +// CollectComponentsWithSource extracts components from an SBOM and adds source tracking. +func CollectComponentsWithSource(sbomFile, sourceRef string) ([]map[string]interface{}, error) { + data, err := os.ReadFile(sbomFile) + if err != nil { + return nil, fmt.Errorf("failed to read SBOM file: %w", err) + } + + var sbom map[string]interface{} + if err := json.Unmarshal(data, &sbom); err != nil { + return nil, fmt.Errorf("failed to parse SBOM: %w", err) + } + + components, ok := sbom["components"].([]interface{}) + if !ok { + return []map[string]interface{}{}, nil + } + + result := make([]map[string]interface{}, 0, len(components)) + for _, comp := range components { + if compMap, ok := comp.(map[string]interface{}); ok { + // Add source tracking + compMap["source"] = sourceRef + result = append(result, compMap) + } + } + + logger.Debug("Collected %d components with source: %s", len(result), sourceRef) + return result, nil +} + +// DeduplicateComponents removes duplicate components based on name+version+purl+source. +func DeduplicateComponents(components []map[string]interface{}) []map[string]interface{} { + seen := make(map[string]bool) + unique := make([]map[string]interface{}, 0) + + for _, comp := range components { + name, _ := comp["name"].(string) + if name == "" { + name = "unknown" // nolint:goconst + } + + version, _ := comp["version"].(string) + if version == "" { + version = "unknown" + } + + purl, _ := comp["purl"].(string) + + source, _ := comp["source"].(string) + if source == "" { + source = "unknown" + } + + // Create unique key + key := fmt.Sprintf("%s@%s#%s^%s", name, version, purl, source) + + if !seen[key] { + seen[key] = true + unique = append(unique, comp) + } + } + + logger.Info("Deduplicated %d components down to %d unique components", len(components), len(unique)) + return unique +} + +// MergeSBOMs merges multiple CycloneDX SBOMs into one with source tracking. +func MergeSBOMs(inputFiles []string, outputFile string) error { + logger.Info("Merging %d CycloneDX SBOMs with source tracking", len(inputFiles)) + + if len(inputFiles) == 0 { + return fmt.Errorf("no input files provided") + } + + // Create merged SBOM metadata + timestamp := time.Now().UTC().Format("2006-01-02T15:04:05Z") + serialNumber := fmt.Sprintf("urn:uuid:%s", uuid.New().String()) + + mergedSBOM := map[string]interface{}{ + "bomFormat": "CycloneDX", + "specVersion": "1.6", + "serialNumber": serialNumber, + "version": 1, + "metadata": map[string]interface{}{ + "timestamp": timestamp, + "tools": []map[string]interface{}{ + { + "vendor": "ClickBOM", + "name": "cyclonedx-merge", + "version": "2.0.0", + }, + }, + "component": map[string]interface{}{ + "type": "application", + "name": "merged-sbom", + "version": "1.0.0", + }, + }, + "components": []map[string]interface{}{}, + } + + // Collect all components with source tracking + allComponents := make([]map[string]interface{}, 0) + + for _, sbomFile := range inputFiles { + sourceRef, err := ExtractSourceReference(sbomFile) + if err != nil { + logger.Warning("Failed to extract source reference from %s: %v", filepath.Base(sbomFile), err) + sourceRef = filepath.Base(sbomFile) + } + + components, err := CollectComponentsWithSource(sbomFile, sourceRef) + if err != nil { + logger.Warning("Failed to collect components from %s: %v", filepath.Base(sbomFile), err) + continue + } + + logger.Info("Processing %s: %d components (source: %s)", + filepath.Base(sbomFile), len(components), sourceRef) + + allComponents = append(allComponents, components...) + } + + // Deduplicate components + uniqueComponents := DeduplicateComponents(allComponents) + mergedSBOM["components"] = uniqueComponents + + // Write merged SBOM to file + data, err := json.MarshalIndent(mergedSBOM, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal merged SBOM: %w", err) + } + + if err := os.WriteFile(outputFile, data, 0644); err != nil { + return fmt.Errorf("failed to write merged SBOM: %w", err) + } + + logger.Success("Successfully merged %d SBOMs into one with %d unique components", + len(inputFiles), len(uniqueComponents)) + + // Log summary + logger.Info("Merge summary with source tracking:") + for _, sbomFile := range inputFiles { + sourceRef, _ := ExtractSourceReference(sbomFile) + data, _ := os.ReadFile(sbomFile) + var sbom map[string]interface{} + err := json.Unmarshal(data, &sbom) + if err != nil { + continue + } + compCount := 0 + if components, ok := sbom["components"].([]interface{}); ok { + compCount = len(components) + } + logger.Info(" - %s: %d components (source: %s)", + strings.TrimSuffix(filepath.Base(sbomFile), ".json"), compCount, sourceRef) + } + + return nil +} diff --git a/internal/sbom/trivy.go b/internal/sbom/trivy.go new file mode 100644 index 0000000..08c652b --- /dev/null +++ b/internal/sbom/trivy.go @@ -0,0 +1,203 @@ +// Package sbom provides functionalities to interact with Trivy for SBOM generation. +package sbom + +import ( + "context" + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "time" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/service/sts" + + cfg "github.com/ClickHouse/ClickBOM/internal/config" + "github.com/ClickHouse/ClickBOM/pkg/logger" +) + +// TrivyClient handles interactions with Trivy for SBOM generation from container images. +type TrivyClient struct { + image string + ecrAccountID string + ecrRegion string + ecrRoleARN string + externalID string + format string // "cyclonedx" or "spdxjson" + awsConfig aws.Config +} + +// NewTrivyClient creates a new TrivyClient with the provided configuration. +func NewTrivyClient(ctx context.Context, c *cfg.Config) (*TrivyClient, error) { + // Load default AWS config + awsConfig, err := config.LoadDefaultConfig(ctx) + if err != nil { + return nil, fmt.Errorf("failed to load AWS config: %w", err) + } + + return &TrivyClient{ + image: c.TrivyImage, + ecrAccountID: c.TrivyECRAccountID, + ecrRegion: c.TrivyECRRegion, + ecrRoleARN: c.TrivyECRRoleARN, + externalID: c.TrivyECRExternalID, + format: c.TrivyFormat, + awsConfig: awsConfig, + }, nil +} + +// setupECRCredentials sets up AWS credentials for ECR access, supporting cross-account. +func (t *TrivyClient) setupECRCredentials(ctx context.Context) error { + logger.Info("Setting up ECR credentials...") + + // If cross-account role is specified, assume the role + if t.ecrRoleARN != "" { + logger.Info("Using cross-account role: %s", t.ecrRoleARN) + + // Create STS client with original credentials + stsClient := sts.NewFromConfig(t.awsConfig) + + // Build AssumeRole input + assumeRoleInput := &sts.AssumeRoleInput{ + RoleArn: aws.String(t.ecrRoleARN), + RoleSessionName: aws.String(fmt.Sprintf("trivy-sbom-gen-%d", time.Now().Unix())), + } + + // Add External ID if provided + if t.externalID != "" { + assumeRoleInput.ExternalId = aws.String(t.externalID) + logger.Info("Using external ID for role assumption") + } + + // Assume the role + assumeRoleOutput, err := stsClient.AssumeRole(ctx, assumeRoleInput) + if err != nil { + return fmt.Errorf("failed to assume role %s: %w", t.ecrRoleARN, err) + } + + logger.Success("Successfully assumed cross-account role") + + // Set environment variables with assumed role credentials + err = os.Setenv("AWS_ACCESS_KEY_ID", *assumeRoleOutput.Credentials.AccessKeyId) + if err != nil { + return fmt.Errorf("failed to set AWS_ACCESS_KEY_ID: %w", err) + } + err = os.Setenv("AWS_SECRET_ACCESS_KEY", *assumeRoleOutput.Credentials.SecretAccessKey) + if err != nil { + return fmt.Errorf("failed to set AWS_SECRET_ACCESS_KEY: %w", err) + } + err = os.Setenv("AWS_SESSION_TOKEN", *assumeRoleOutput.Credentials.SessionToken) + if err != nil { + return fmt.Errorf("failed to set AWS_SESSION_TOKEN: %w", err) + } + err = os.Setenv("AWS_REGION", t.ecrRegion) + if err != nil { + return fmt.Errorf("failed to set AWS_REGION: %w", err) + } + + logger.Debug("ECR credentials set in environment variables") + } else if t.ecrRegion != "" { + err := os.Setenv("AWS_REGION", t.ecrRegion) + if err != nil { + return fmt.Errorf("failed to set AWS_REGION: %w", err) + } + logger.Info("Using current AWS credentials with region: %s", t.ecrRegion) + } + + logger.Success("ECR credentials configured for Trivy") + return nil +} + +// GenerateSBOM generates an SBOM from the container image using Trivy. +func (t *TrivyClient) GenerateSBOM(ctx context.Context, outputFile string) error { + logger.Info("Generating SBOM for image: %s", t.image) + logger.Info("SBOM format: %s", t.format) + logger.Info("Using remote image source (no download)") + + // Check if this is an ECR image + isECRImage := strings.Contains(t.image, ".dkr.ecr.") && strings.Contains(t.image, ".amazonaws.com/") + + // Set up ECR credentials if needed + // Trivy supports ECR authentication natively without Docker! + if isECRImage && t.ecrAccountID != "" { + if err := t.setupECRCredentials(ctx); err != nil { + return fmt.Errorf("failed to setup ECR credentials: %w", err) + } + } + + // Create temp file for raw Trivy output + tempDir := filepath.Dir(outputFile) + trivyOutputFile := filepath.Join(tempDir, "trivy_sbom_output.json") + + logger.Info("Running Trivy SBOM generation...") + + // Determine Trivy output format + var trivyFormat string + switch t.format { + case "cyclonedx": // nolint: goconst + trivyFormat = "cyclonedx" + case "spdxjson": + trivyFormat = "spdx-json" + default: + return fmt.Errorf("unsupported SBOM format: %s", t.format) + } + + // Build Trivy command for SBOM generation + // Using --image-src remote to scan at source without downloading + args := []string{ + "image", + "--format", trivyFormat, + "--output", trivyOutputFile, + "--image-src", "remote", + "--quiet", + t.image, + } + + cmd := exec.CommandContext(ctx, "trivy", args...) + + // Trivy will use AWS credentials from environment for ECR access + cmd.Env = os.Environ() + + logger.Debug("Executing: trivy %s", strings.Join(args, " ")) + + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("trivy SBOM generation failed: %w\nOutput: %s", err, string(output)) + } + + logger.Success("Trivy SBOM generation completed successfully") + + // Validate the output + trivyData, err := os.ReadFile(trivyOutputFile) + if err != nil { + return fmt.Errorf("failed to read Trivy output: %w", err) + } + + var sbomData map[string]interface{} + if err := json.Unmarshal(trivyData, &sbomData); err != nil { + return fmt.Errorf("trivy output is not valid JSON: %w", err) + } + + // Move to final output location + if err := os.Rename(trivyOutputFile, outputFile); err != nil { + return fmt.Errorf("failed to move SBOM file: %w", err) + } + + logger.Info("SBOM saved to: %s", outputFile) + + // Log component count based on format + switch t.format { + case "cyclonedx": + if components, ok := sbomData["components"].([]interface{}); ok { + logger.Info("Total components found: %d", len(components)) + } + case "spdxjson": + if packages, ok := sbomData["packages"].([]interface{}); ok { + logger.Info("Total packages found: %d", len(packages)) + } + } + return nil +} diff --git a/internal/storage/s3.go b/internal/storage/s3.go index 69cc5d1..a26a684 100644 --- a/internal/storage/s3.go +++ b/internal/storage/s3.go @@ -6,10 +6,13 @@ import ( "fmt" "io" "os" + "path/filepath" + "strings" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/config" - "github.com/aws/aws-sdk-go-v2/credentials" + + // "github.com/aws/aws-sdk-go-v2/credentials" "github.com/aws/aws-sdk-go-v2/service/s3" "github.com/ClickHouse/ClickBOM/pkg/logger" @@ -21,15 +24,8 @@ type S3Client struct { } // NewS3Client creates a new S3Client with the provided AWS credentials and region. -func NewS3Client(ctx context.Context, accessKeyID, secretAccessKey, region string) (*S3Client, error) { - cfg, err := config.LoadDefaultConfig(ctx, - config.WithRegion(region), - config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider( - accessKeyID, - secretAccessKey, - "", - )), - ) +func NewS3Client(ctx context.Context) (*S3Client, error) { + cfg, err := config.LoadDefaultConfig(ctx) if err != nil { return nil, fmt.Errorf("failed to load AWS config: %w", err) } @@ -134,3 +130,36 @@ func (s *S3Client) ListObjects(ctx context.Context, bucket, prefix string) ([]st logger.Info("Found %d objects in S3", len(keys)) return keys, nil } + +// DownloadAll downloads all files from S3 bucket to local directory. +func (s *S3Client) DownloadAll(ctx context.Context, bucket, prefix, localDir string) ([]string, error) { + logger.Info("Downloading all files from s3://%s/%s", bucket, prefix) + + // List all objects + keys, err := s.ListObjects(ctx, bucket, prefix) + if err != nil { + return nil, err + } + + downloadedFiles := make([]string, 0) + + for _, key := range keys { + // Skip directories (keys ending with /) + if strings.HasSuffix(key, "/") { + continue + } + + filename := filepath.Base(key) + localPath := filepath.Join(localDir, filename) + + if err := s.Download(ctx, bucket, key, localPath); err != nil { + logger.Warning("Failed to download %s: %v", key, err) + continue + } + + downloadedFiles = append(downloadedFiles, localPath) + } + + logger.Info("Downloaded %d files", len(downloadedFiles)) + return downloadedFiles, nil +} diff --git a/internal/storage/s3_test.go b/internal/storage/s3_test.go index 7ced2bb..c96a1d4 100644 --- a/internal/storage/s3_test.go +++ b/internal/storage/s3_test.go @@ -1,6 +1,3 @@ -//go:build integration -// +build integration - package storage import ( @@ -18,12 +15,7 @@ func TestS3Upload(t *testing.T) { ctx := context.Background() // Create S3 client - s3Client, err := storage.NewS3Client( - ctx, - "test", - "test", - "us-east-1", - ) + s3Client, err := NewS3Client(ctx) if err != nil { t.Fatalf("Failed to create S3 client: %v", err) } From 9c5d8e8025179eae0a814e1caf2b4340d09ddb29 Mon Sep 17 00:00:00 2001 From: Julio Jimenez Date: Sun, 14 Dec 2025 21:47:48 -0500 Subject: [PATCH 4/4] feat(go): License Updates and Test Migrations Signed-off-by: Julio Jimenez --- .github/workflows/tests.yml | 2 +- license-mappings.json | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3a20194..5d06dc0 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -11,7 +11,7 @@ jobs: steps: - name: ๐Ÿงพ Checkout - uses: actions/checkout@v5 + uses: actions/checkout@v6 - name: ๐Ÿ”ง Setup Go uses: actions/setup-go@v5 diff --git a/license-mappings.json b/license-mappings.json index ee759e0..cdd9e44 100644 --- a/license-mappings.json +++ b/license-mappings.json @@ -7,9 +7,13 @@ "@clickhouse/client": "Apache-2.0", "@clickhouse/client-common": "Apache-2.0", "@faker-js/faker": "MIT", + "@hyperdx/common-utils-0.0.0-use.local.tgz": "MIT", + "@hyperdx/common-utils-0.7.2.tgz": "MIT", + "@hyperdx/passport-local-mongoose-9.0.1.tgz": "MIT", "@istanbuljs/nyc-config-typescript": "ISC", "@shikijs/core": "MIT", "@shikijs/engine-javascript": "MIT", + "@types/hyperdx__lucene-2.1.7.tgz": "MIT", "@types/jasmine": "MIT", "@types/jsonwebtoken": "MIT", "@types/node": "MIT",