diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..ac9dab3 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,2 @@ +# Global code owners - all files require review from @bold-minds +* @bold-minds diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..00c0e34 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,36 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '[BUG] ' +labels: 'bug' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Code Example** +If applicable, add a minimal code example to help explain your problem. + +```go +// Your code here +``` + +**Environment (please complete the following information):** + - OS: [e.g. Linux, macOS, Windows] + - Go version: [e.g. 1.22.0] + - Library version: [e.g. v1.0.0] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..759a68a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,31 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '[FEATURE] ' +labels: 'enhancement' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Proposed API (if applicable)** +If this feature would involve API changes, please provide a code example of how it would work: + +```go +// Your proposed API here +``` + +**Additional context** +Add any other context or screenshots about the feature request here. + +**Backward Compatibility** +- [ ] This feature maintains backward compatibility +- [ ] This feature introduces breaking changes (please explain above) diff --git a/.github/badges/coverage.json b/.github/badges/coverage.json new file mode 100644 index 0000000..6c71cd8 --- /dev/null +++ b/.github/badges/coverage.json @@ -0,0 +1 @@ +{"schemaVersion":1,"label":"coverage","message":"pending","color":"lightgrey"} diff --git a/.github/badges/dependabot.json b/.github/badges/dependabot.json new file mode 100644 index 0000000..eeb03fc --- /dev/null +++ b/.github/badges/dependabot.json @@ -0,0 +1 @@ +{"schemaVersion":1,"label":"security","message":"pending","color":"lightgrey"} diff --git a/.github/badges/go-version.json b/.github/badges/go-version.json new file mode 100644 index 0000000..d8d6269 --- /dev/null +++ b/.github/badges/go-version.json @@ -0,0 +1 @@ +{"schemaVersion":1,"label":"Go","message":"pending","color":"lightgrey"} diff --git a/.github/badges/golangci-lint.json b/.github/badges/golangci-lint.json new file mode 100644 index 0000000..3cd7817 --- /dev/null +++ b/.github/badges/golangci-lint.json @@ -0,0 +1 @@ +{"schemaVersion":1,"label":"golangci-lint","message":"pending","color":"lightgrey"} diff --git a/.github/badges/last-updated.json b/.github/badges/last-updated.json new file mode 100644 index 0000000..7c63ee8 --- /dev/null +++ b/.github/badges/last-updated.json @@ -0,0 +1 @@ +{"schemaVersion":1,"label":"last updated","message":"pending","color":"lightgrey"} diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..9206d73 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,42 @@ +version: 2 +updates: + - package-ecosystem: "gomod" + directory: "/" + schedule: + interval: "weekly" + day: "monday" + time: "09:00" + timezone: "America/Los_Angeles" + open-pull-requests-limit: 5 + assignees: + - "bold-minds" + commit-message: + prefix: "deps" + include: "scope" + labels: + - "dependencies" + - "go" + groups: + go-dependencies: + patterns: + - "*" + update-types: + - "minor" + - "patch" + + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + day: "monday" + time: "09:00" + timezone: "America/Los_Angeles" + open-pull-requests-limit: 5 + assignees: + - "bold-minds" + commit-message: + prefix: "ci" + include: "scope" + labels: + - "dependencies" + - "github-actions" diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml new file mode 100644 index 0000000..8f26c32 --- /dev/null +++ b/.github/workflows/test.yaml @@ -0,0 +1,171 @@ +name: test + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +permissions: + contents: write + pull-requests: read + security-events: read + actions: read + +jobs: + test: + strategy: + matrix: + include: + # Primary testing: Latest Go on all platforms (time/concurrency testing) + - go-version: '1.24' + os: ubuntu-latest + - go-version: '1.24' + os: windows-latest + - go-version: '1.24' + os: macos-latest + # Compatibility testing: Min supported Go on Linux only + - go-version: '1.22' + os: ubuntu-latest + # Intermediate compatibility: Go 1.23 on Linux only + - go-version: '1.23' + os: ubuntu-latest + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version: ${{ matrix.go-version }} + - name: Install dependencies + run: go mod download + + # Build validation (cross-platform) + - name: Build validation + run: | + go build ./... + go mod tidy + git diff --exit-code go.mod go.sum + + # Unit tests with race detection + - name: Run tests (Unix) + if: runner.os != 'Windows' + run: go test -v -race -coverprofile=coverage.out ./... + - name: Run tests (Windows) + if: runner.os == 'Windows' + run: go test -v -coverprofile="coverage.out" ./... + - name: Run benchmarks + run: go test -bench=. -benchmem ./... + + # Generate badge data from CI metrics (only from primary job) + - name: Generate badge data + if: github.ref == 'refs/heads/main' && matrix.os == 'ubuntu-latest' && matrix.go-version == '1.24' + env: + GH_TOKEN: ${{ github.token }} + run: | + mkdir -p .github/badges + + # Install golangci-lint v2 directly (due to third-party action restrictions) + go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@latest + + # Generate coverage badge from existing coverage.out + if [[ -f "coverage.out" ]]; then + COVERAGE=$(go test -coverprofile=temp_coverage.out ./. 2>/dev/null | grep "coverage:" | grep -oE '[0-9]+\.[0-9]+%' | sed 's/%//' | head -1) + rm -f temp_coverage.out 2>/dev/null + + if [[ -z "$COVERAGE" ]]; then + COVERAGE=$(go tool cover -func=coverage.out | grep total | awk '{print $3}' | sed 's/%//') + fi + + if (( $(echo "$COVERAGE >= 80" | bc -l) )); then + echo '{"schemaVersion":1,"label":"coverage","message":"'$COVERAGE'%","color":"brightgreen"}' > .github/badges/coverage.json + elif (( $(echo "$COVERAGE >= 60" | bc -l) )); then + echo '{"schemaVersion":1,"label":"coverage","message":"'$COVERAGE'%","color":"yellow"}' > .github/badges/coverage.json + else + echo '{"schemaVersion":1,"label":"coverage","message":"'$COVERAGE'%","color":"red"}' > .github/badges/coverage.json + fi + fi + + # Generate Go version badge + GO_VERSION=$(go version | grep -oE 'go[0-9]+\.[0-9]+(\.[0-9]+)?' | head -1) + echo '{"schemaVersion":1,"label":"Go","message":"'$GO_VERSION'","color":"00ADD8"}' > .github/badges/go-version.json + + # Generate last updated badge + LAST_COMMIT_DATE=$(git log -1 --format=%cd --date=short) + echo '{"schemaVersion":1,"label":"last updated","message":"'$LAST_COMMIT_DATE'","color":"teal"}' > .github/badges/last-updated.json + + # Generate golangci-lint badge (v2 doesn't support --out-format, use exit code) + if golangci-lint run; then + echo '{"schemaVersion":1,"label":"golangci-lint","message":"0 issues","color":"brightgreen"}' > .github/badges/golangci-lint.json + else + # Count issues by running again and capturing output + ISSUES=$(golangci-lint run 2>&1 | grep -c "^.*\.go:" || echo "0") + if [[ $ISSUES -eq 0 ]]; then + echo '{"schemaVersion":1,"label":"golangci-lint","message":"passing","color":"brightgreen"}' > .github/badges/golangci-lint.json + else + echo '{"schemaVersion":1,"label":"golangci-lint","message":"'$ISSUES' issues","color":"red"}' > .github/badges/golangci-lint.json + fi + fi + + # Generate comprehensive security badge (Dependabot + Code Scanning) + echo "🔍 Checking security alerts..." + DEPENDABOT_ALERTS=$(gh api repos/bold-minds/id/dependabot/alerts --jq 'length' 2>/dev/null || echo "0") + echo "Dependabot alerts: $DEPENDABOT_ALERTS" + + # Debug code scanning API access + echo "🔍 Checking code scanning alerts..." + gh api repos/bold-minds/id/code-scanning/alerts 2>&1 | head -5 || echo "Code scanning API failed" + CODE_SCANNING_ALERTS=$(gh api repos/bold-minds/id/code-scanning/alerts --jq '[.[] | select(.state == "open")] | length' 2>/dev/null || echo "0") + echo "Code scanning alerts: $CODE_SCANNING_ALERTS" + + TOTAL_ALERTS=$((DEPENDABOT_ALERTS + CODE_SCANNING_ALERTS)) + OPEN_PRS=$(gh pr list --author "app/dependabot" --state open --json number --jq 'length' 2>/dev/null || echo "0") + echo "Total alerts: $TOTAL_ALERTS, Open PRs: $OPEN_PRS" + + if [[ $TOTAL_ALERTS -gt 0 ]]; then + if [[ $DEPENDABOT_ALERTS -gt 0 && $CODE_SCANNING_ALERTS -gt 0 ]]; then + echo '{"schemaVersion":1,"label":"security","message":"'$TOTAL_ALERTS' alerts","color":"red"}' > .github/badges/dependabot.json + elif [[ $DEPENDABOT_ALERTS -gt 0 ]]; then + echo '{"schemaVersion":1,"label":"security","message":"'$DEPENDABOT_ALERTS' dependency alerts","color":"red"}' > .github/badges/dependabot.json + else + echo '{"schemaVersion":1,"label":"security","message":"'$CODE_SCANNING_ALERTS' code alerts","color":"red"}' > .github/badges/dependabot.json + fi + elif [[ $OPEN_PRS -gt 0 ]]; then + echo '{"schemaVersion":1,"label":"dependabot","message":"'$OPEN_PRS' updates","color":"blue"}' > .github/badges/dependabot.json + else + echo '{"schemaVersion":1,"label":"security","message":"all clear","color":"brightgreen"}' > .github/badges/dependabot.json + fi + + - name: Generate GitHub App Token for Badge Commits + if: github.ref == 'refs/heads/main' && matrix.os == 'ubuntu-latest' && matrix.go-version == '1.24' + id: app-token + uses: actions/create-github-app-token@v1 + with: + app-id: ${{ vars.BADGE_BOT_APP_ID }} + private-key: ${{ secrets.BADGE_BOT_PRIVATE_KEY }} + + - name: Commit badges to main branch + if: github.ref == 'refs/heads/main' && matrix.os == 'ubuntu-latest' && matrix.go-version == '1.24' + env: + GITHUB_TOKEN: ${{ steps.app-token.outputs.token }} + run: | + # Configure git with GitHub App identity and authentication + git config --global user.name "Badge Automation Bot" + git config --global user.email "action@github.com" + + # Configure git to use the GitHub App token for authentication + git config --global url."https://x-access-token:${GITHUB_TOKEN}@github.com/".insteadOf "https://github.com/" + + # Add badge files to git + git add .github/badges/ + + # Commit if there are changes + if git diff --staged --quiet; then + echo "No badge changes to commit" + else + git commit -m "chore: update badges from CI run ${{ github.run_number }} [skip ci]" + # Push with GitHub App token as GITHUB_TOKEN (standard pattern for repository ruleset bypass) + git push origin HEAD:main + fi diff --git a/.gitignore b/.gitignore index aaadf73..34fadf4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,3 @@ -# If you prefer the allow list template instead of the deny list, see community template: -# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore -# # Binaries for programs and plugins *.exe *.exe~ @@ -11,22 +8,43 @@ # Test binary, built with `go test -c` *.test -# Code coverage profiles and other test artifacts +# Output of the go coverage tool, specifically when used with LiteIDE *.out -coverage.* -*.coverprofile -profile.cov + +# Badge files are now committed to main branch for direct access # Dependency directories (remove the comment below to include it) # vendor/ # Go workspace file go.work -go.work.sum -# env file -.env +# IDE files +.idea/ +*.swp +*.swo +*~ + +# OS generated files +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# Coverage reports +coverage.out +coverage.html + +# Build artifacts +dist/ +build/ + +# Temporary files +*.tmp +*.temp -# Editor/IDE -# .idea/ -# .vscode/ +# Log files +*.log diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 0000000..54dff85 --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,134 @@ +version: "2" +run: + relative-path-mode: wd +linters: + default: none + enable: + - depguard + - errcheck + - godox + - gosec + - govet + - ineffassign + - staticcheck + - unused + settings: + cyclop: + max-complexity: 30 + package-average: 10 + depguard: + rules: + main: + files: + - $all + allow: + - $gostd + - github.com/bold-minds/obs + - go.opentelemetry.io/otel + - go.opentelemetry.io/otel/attribute + - go.opentelemetry.io/otel/codes + - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp + - go.opentelemetry.io/otel/sdk + - go.opentelemetry.io/otel/sdk/resource + - go.opentelemetry.io/otel/sdk/trace + - go.opentelemetry.io/otel/semconv/v1.21.0 + - go.opentelemetry.io/otel/trace + - go.opentelemetry.io/otel/trace/noop + errcheck: + check-type-assertions: true + funlen: + lines: 100 + statements: 50 + ignore-comments: true + gocognit: + min-complexity: 20 + gochecksumtype: + default-signifies-exhaustive: false + gocritic: + settings: + captLocal: + paramsOnly: false + underef: + skipRecvDeref: false + govet: + disable: + - fieldalignment + enable-all: true + settings: + shadow: + strict: true + inamedparam: + skip-single-param: true + mnd: + ignored-functions: + - args.Error + - flag.Arg + - flag.Duration.* + - flag.Float.* + - flag.Int.* + - flag.Uint.* + - os.Chmod + - os.Mkdir.* + - os.OpenFile + - os.WriteFile + - prometheus.ExponentialBuckets.* + - prometheus.LinearBuckets + nakedret: + max-func-lines: 0 + nolintlint: + require-explanation: true + require-specific: true + allow-no-explanation: + - funlen + - gocognit + - lll + perfsprint: + strconcat: false + reassign: + patterns: + - .* + rowserrcheck: + packages: + - github.com/jmoiron/sqlx + sloglint: + no-global: all + context: scope + usetesting: + os-temp-dir: true + exclusions: + generated: lax + presets: + - comments + - common-false-positives + - legacy + - std-error-handling + rules: + - linters: + - godot + source: (noinspection|TODO) + - linters: + - gocritic + source: //noinspection + - linters: + - bodyclose + - dupl + - errcheck + - funlen + - goconst + - gosec + - noctx + - wrapcheck + path: _test\.go + paths: + - third_party$ + - builtin$ + - examples$ +issues: + max-same-issues: 50 +formatters: + exclusions: + generated: lax + paths: + - third_party$ + - builtin$ + - examples$ diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..d5d897e --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,76 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or advances of + any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email address, + without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement through GitHub +issues or direct contact with project maintainers. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. + +[homepage]: https://www.contributor-covenant.org +[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..b469bf2 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,118 @@ +# Contributing + +Thank you for your interest in contributing! We welcome contributions that improve the library while maintaining its focus on simplicity, performance, and Go idioms. + +## Getting Started + +### Prerequisites + +- **Go 1.22+** +- **Git** +- **golangci-lint** (optional, for comprehensive linting) + +### Development Setup + +1. **Fork and clone the repository**: + ```bash + git clone https://github.com/YOUR_USERNAME/obs.git + cd obs + ``` + +2. **Run tests**: + ```bash + go test -race ./... + ``` + +## What We're Looking For + +### Encouraged + +- **Bug fixes** — fix issues or edge cases +- **Performance improvements** — optimize without breaking compatibility +- **Test enhancements** — add test cases, improve coverage +- **Documentation improvements** — clarify usage, add examples + +### Requires Discussion First + +- **API changes** — modifications to public interfaces +- **New dependencies** — adding external packages +- **Breaking changes** — changes that affect backward compatibility + +### Not Accepted + +- **Feature creep** — complex features that don't align with Go idioms +- **Non-idiomatic Go** — code that doesn't follow Go conventions +- **Performance regressions** — changes that significantly slow down the library + +## Contribution Process + +### 1. Create an Issue First + +For significant changes, please create an issue to discuss: +- What problem you're solving +- Your proposed approach +- Any potential breaking changes + +### 2. Development Workflow + +1. **Create a feature branch**: + ```bash + git checkout -b feature/your-feature-name + ``` + +2. **Make your changes** — follow the code style guidelines below, add tests, update documentation as needed. + +3. **Validate your changes**: + ```bash + go fmt ./... + go vet ./... + go test -race ./... + ``` + +4. **Commit your changes**: + ```bash + git commit -m "feat: add your feature description" + ``` + +5. **Push and create a pull request**: + ```bash + git push origin feature/your-feature-name + ``` + +### 3. Pull Request Guidelines + +Your PR should: +- Have a clear title describing the change +- Reference any related issues using `Fixes #123` or `Closes #123` +- Include tests for new functionality +- Pass all CI checks +- Maintain backward compatibility unless discussed otherwise + +## Code Style + +- Follow standard Go formatting (`go fmt`) +- Use meaningful variable and function names +- Write clear, concise comments for public APIs +- Follow Go's error handling patterns +- Write table-driven tests where appropriate +- Test both success and error cases +- Include edge cases (nil values, empty strings, etc.) +- Run tests with `-race` to ensure thread safety + +## Commit Messages + +We follow conventional commits: + +``` +type(scope): description +``` + +Types: `feat`, `fix`, `docs`, `test`, `refactor`, `perf`, `chore` + +## Code Review + +We look for: correctness, performance, style, tests, documentation, and backward compatibility. Initial review within 2-3 business days. + +## License + +By contributing, you agree that your contributions will be licensed under the same license that covers the project. diff --git a/README.md b/README.md index 0411508..8c819d6 100644 --- a/README.md +++ b/README.md @@ -1 +1,212 @@ -# obs \ No newline at end of file +# # obs + +A powerful Go observability library that combines intelligent sampling, business-aware tracking, and seamless OpenTelemetry integration. + +## Features + +🎯 **Intelligent Sampling** +- Context-aware sampling based on operation type, error status, and endpoint volume +- Configurable sampling rates for different operation types (database, user events, errors) +- High-volume endpoint detection with reduced sampling rates + +📊 **Generic Event System** +- Flexible, domain-agnostic event tracking for any business context +- Pre-built event builders for common scenarios (data ops, queries, errors, etc.) +- Rich context attributes with type-safe attribute builders +- Backward-compatible legacy functions for existing codebases + +🔧 **Multiple Abstraction Levels** +- Low-level span management for fine-grained control +- High-level `TrackedOperation` for concise business logic tracing +- Middleware-friendly API design + +🚀 **Production Ready** +- Comprehensive configuration with validation +- Environment variable support +- Graceful shutdown and error handling +- Memory and performance metrics + +## Quick Start + +### Installation + +```bash +go get github.com/bold-minds/obs +``` + +### Basic Usage + +```go +package main + +import ( + "context" + "log" + + "github.com/bold-minds/obs" +) + +func main() { + ctx := context.Background() + + // Initialize from environment variables + if err := obs.InitFromEnv(ctx); err != nil { + log.Fatal(err) + } + defer obs.Shutdown(ctx) + + // Track a business operation + ctx, op := obs.TrackOperation(ctx, "api", "user_signup") + defer op.Success(1) + + // Your business logic here + processUserSignup() +} +``` + +### Configuration + +Set environment variables: + +```bash +export OBS_ENABLED=true +export OBS_API_KEY=your_honeycomb_api_key +export OBS_DATASET=your_dataset +export OBS_SERVICE_NAME=your_service +export OBS_ENVIRONMENT=production +``` + +Or configure programmatically: + +```go +cfg := obs.Config{ + Enabled: true, + APIKey: "your_api_key", + Dataset: "your_dataset", + ServiceName: "your_service", + Environment: "production", + Sampling: obs.SamplingConfig{ + TraceSampleRate: 25, // 25% of traces + ErrorSampleRate: 100, // 100% of errors + DatabaseSampleRate: 10, // 10% of DB operations + UserEventSampleRate: 50, // 50% of user events + HighVolumeEndpoints: []string{"Find", "List"}, + HighVolumeSampleRate: 5, // 5% for high-volume endpoints + }, +} + +if err := obs.Init(ctx, cfg); err != nil { + log.Fatal(err) +} +``` + +## Usage Examples + +### Business Tracking + +```go +// Track data operations +obs.TrackDataOperationResult(ctx, "save", "tenant_123", 5, 100, true, false) + +// Track user activity +obs.TrackUserActivity(ctx, "api_call", "tenant_123", 1024, 150, true) + +// Track query performance +obs.TrackQueryPerformance(ctx, "find_users", 45, 25, true, false) +``` + +### Advanced Tracing + +```go +// Manual span management +ctx, span := obs.StartSpan(ctx, "business", "complex_operation") +defer span.End() + +obs.AddBusinessContext(span, "tenant_123", 5, "data_migration") +obs.AddMemoryMetrics(span) + +// Tracked operations with error handling +ctx, op := obs.TrackOperationWithError(ctx, "critical", "payment_processing") +if err := processPayment(); err != nil { + op.Error(err, "payment_failed") + return +} +op.Success(1) +``` + +### Database Operations + +```go +ctx, span := obs.TrackDatabaseOperation(ctx, "query", "find_users", 512) +defer func() { + obs.FinishSpanWithResult(span, time.Since(start), resultCount, err) +}() + +// Execute your database query +results, err := db.Query("MATCH (u:User) RETURN u") +``` + +## Migration from Original Code + +This library maintains 100% API compatibility with the original `observe` package. Simply update your imports: + +```go +// Before +import "your_project/observe" + +// After +import "github.com/bold-minds/obs" +``` + +All function signatures and behavior remain identical, ensuring zero migration friction. + +## Configuration Reference + +### Environment Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| `OBS_ENABLED` | Enable/disable observability | `false` | +| `OBS_API_KEY` | Honeycomb API key | Required when enabled | +| `OBS_DATASET` | Honeycomb dataset | `default` | +| `OBS_SERVICE_NAME` | Service name | `service` | +| `OBS_ENVIRONMENT` | Environment (dev/staging/prod) | `development` | +| `OBS_TRACE_SAMPLE_RATE` | Base trace sampling rate (1-100) | `100` | +| `OBS_ERROR_SAMPLE_RATE` | Error sampling rate (1-100) | `100` | +| `OBS_DATABASE_SAMPLE_RATE` | Database operation sampling (1-100) | `100` | +| `OBS_USER_EVENT_SAMPLE_RATE` | User event sampling (1-100) | `100` | +| `OBS_HIGH_VOLUME_ENDPOINTS` | Comma-separated list of endpoints | `Find,Save` | +| `OBS_HIGH_VOLUME_SAMPLE_RATE` | Sampling rate for high-volume endpoints | `25` | + +### Sampling Strategy + +The library uses intelligent sampling to manage event volume: + +1. **Error Priority**: Errors are sampled at higher rates to ensure visibility +2. **Operation Type**: Different rates for database, user events, and general operations +3. **High-Volume Detection**: Reduced sampling for read-heavy endpoints +4. **Context Awareness**: Sampling decisions consider operation context + +## Architecture + +``` +obs/ +├── config/ # Configuration management +├── sampling/ # Intelligent sampling logic +├── tracing/ # Core tracing utilities +├── business/ # Business domain tracking +├── providers/ # Backend integrations (Honeycomb) +├── obs.go # Main client API +└── observe.go # Compatibility layer +``` + +## Contributing + +1. Fork the repository +2. Create a feature branch +3. Add tests for new functionality +4. Submit a pull request + +## License + +MIT License - see LICENSE file for details. \ No newline at end of file diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..d2bb630 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,58 @@ +# Security Policy + +## Supported Versions + +Only the **latest released minor version** receives security patches. + +| Version | Supported | +| ------- | ------------------ | +| latest | :white_check_mark: | +| older | :x: | + +## Reporting a Vulnerability + +**Do not open a public GitHub issue for security problems.** + +### 1. Report Privately + +Report via **[GitHub Security Advisories](https://github.com/bold-minds/obs/security/advisories/new)**. This creates a confidential channel between you and the maintainers. + +If the Security Advisories flow is unavailable, email **security@bold-minds.com**. + +### 2. What to Include + +- A description of the issue and its impact +- Steps to reproduce or a proof-of-concept +- The version affected +- Your Go version and OS, if relevant +- Any suggested mitigation + +### 3. Response Timeline + +- **Initial acknowledgement**: within 48 hours +- **Triage + severity assessment**: within 7 days +- **Resolution**: varies based on complexity, typically within 30 days + +You will be credited in the release notes unless you request otherwise. + +### 4. Disclosure Process + +1. We acknowledge receipt of your vulnerability report +2. We investigate and validate the vulnerability +3. We develop and test a fix +4. We coordinate disclosure timing with you +5. We release a security update +6. We publicly acknowledge your responsible disclosure (if desired) + +## Security Updates + +Security updates will be: + +- Released as patch versions +- Documented in CHANGELOG.md +- Announced through GitHub releases +- Tagged with security labels + +## Acknowledgments + +We appreciate responsible disclosure and will acknowledge security researchers who help improve the security of this project. diff --git a/business/events.go b/business/events.go new file mode 100644 index 0000000..3efb9fb --- /dev/null +++ b/business/events.go @@ -0,0 +1,155 @@ +package business + +import ( + "context" + "fmt" + + "go.opentelemetry.io/otel/attribute" + + "github.com/bold-minds/obs/tracing" +) + +// Event represents a generic business event with flexible attributes +type Event struct { + Name string + Category string + Domain string + Attributes map[string]interface{} +} + +// NewEvent creates a new business event +func NewEvent(name, category, domain string) *Event { + return &Event{ + Name: name, + Category: category, + Domain: domain, + Attributes: make(map[string]interface{}), + } +} + +// WithString adds a string attribute to the event +func (e *Event) WithString(key, value string) *Event { + e.Attributes[key] = value + return e +} + +// WithInt adds an integer attribute to the event +func (e *Event) WithInt(key string, value int) *Event { + e.Attributes[key] = value + return e +} + +// WithInt64 adds an int64 attribute to the event +func (e *Event) WithInt64(key string, value int64) *Event { + e.Attributes[key] = value + return e +} + +// WithFloat64 adds a float64 attribute to the event +func (e *Event) WithFloat64(key string, value float64) *Event { + e.Attributes[key] = value + return e +} + +// WithBool adds a boolean attribute to the event +func (e *Event) WithBool(key string, value bool) *Event { + e.Attributes[key] = value + return e +} + +// WithAttributes adds multiple attributes at once +func (e *Event) WithAttributes(attrs map[string]interface{}) *Event { + for k, v := range attrs { + e.Attributes[k] = v + } + return e +} + +// Track records the business event as a span +func (e *Event) Track(ctx context.Context) { + _, span := tracing.StartSpan(ctx, "business", e.Name) + defer span.End() + + // Convert attributes to OpenTelemetry attributes + attrs := make([]attribute.KeyValue, 0, len(e.Attributes)+3) + + // Add standard attributes + attrs = append(attrs, + attribute.String("business.event", e.Name), + attribute.String("business.category", e.Category), + attribute.String("business.domain", e.Domain), + ) + + // Add custom attributes + for key, value := range e.Attributes { + switch v := value.(type) { + case string: + attrs = append(attrs, attribute.String(key, v)) + case int: + attrs = append(attrs, attribute.Int(key, v)) + case int64: + attrs = append(attrs, attribute.Int64(key, v)) + case float64: + attrs = append(attrs, attribute.Float64(key, v)) + case bool: + attrs = append(attrs, attribute.Bool(key, v)) + default: + // Fallback to string representation + attrs = append(attrs, attribute.String(key, fmt.Sprintf("%v", v))) + } + } + + span.SetAttributes(attrs...) +} + +// TrackEvent is a convenience function for creating and tracking an event in one call +func TrackEvent(ctx context.Context, name, category, domain string, attributes map[string]interface{}) { + event := NewEvent(name, category, domain).WithAttributes(attributes) + event.Track(ctx) +} + +// Common event builders for typical business scenarios + +// DataOperationEvent creates an event for data operations +func DataOperationEvent(operation string, success bool, recordsAffected int) *Event { + return NewEvent("data_operation", "data_management", "records"). + WithString("operation", operation). + WithBool("success", success). + WithInt("records_affected", recordsAffected) +} + +// UserActivityEvent creates an event for user activity tracking +func UserActivityEvent(operation string, success bool) *Event { + return NewEvent("user_activity", "usage_analytics", "user"). + WithString("operation", operation). + WithBool("success", success) +} + +// QueryPerformanceEvent creates an event for query performance tracking +func QueryPerformanceEvent(queryName string, durationMs int64, resultCount int, success bool, cached bool) *Event { + return NewEvent("query_performance", "performance_monitoring", "database"). + WithString("query_name", queryName). + WithInt64("duration_ms", durationMs). + WithInt("result_count", resultCount). + WithBool("success", success). + WithBool("cached", cached) +} + +// ErrorPatternEvent creates an event for error pattern tracking +func ErrorPatternEvent(errorType, errorCode, operation, component string, frequency int) *Event { + return NewEvent("error_pattern", "error_analysis", "reliability"). + WithString("error_type", errorType). + WithString("error_code", errorCode). + WithString("operation", operation). + WithString("component", component). + WithInt("frequency", frequency) +} + +// ResourceUsageEvent creates an event for system resource tracking +func ResourceUsageEvent(operation string, memoryMB, cpuPercent float64, connectionCount int) *Event { + return NewEvent("resource_usage", "resource_monitoring", "infrastructure"). + WithString("operation", operation). + WithFloat64("memory_mb", memoryMB). + WithFloat64("cpu_percent", cpuPercent). + WithInt("connection_count", connectionCount) +} diff --git a/business/events_test.go b/business/events_test.go new file mode 100644 index 0000000..7ac2d56 --- /dev/null +++ b/business/events_test.go @@ -0,0 +1,248 @@ +package business + +import ( + "context" + "testing" +) + +// TestNewEvent verifies that NewEvent initializes Name, Category, Domain, and Attributes map +func TestNewEvent(t *testing.T) { + name := "test_event" + category := "test_category" + domain := "test_domain" + + event := NewEvent(name, category, domain) + + if event.Name != name { + t.Errorf("expected Name %q, got %q", name, event.Name) + } + if event.Category != category { + t.Errorf("expected Category %q, got %q", category, event.Category) + } + if event.Domain != domain { + t.Errorf("expected Domain %q, got %q", domain, event.Domain) + } + if event.Attributes == nil { + t.Error("expected Attributes to be initialized, got nil") + } + if len(event.Attributes) != 0 { + t.Errorf("expected empty Attributes map, got len %d", len(event.Attributes)) + } +} + +// TestEvent_BuilderChain verifies that builder methods chain correctly and attributes are set +func TestEvent_BuilderChain(t *testing.T) { + event := NewEvent("test_event", "test_category", "test_domain"). + WithString("string_key", "string_value"). + WithInt("int_key", 42). + WithInt64("int64_key", int64(123456789)). + WithFloat64("float64_key", 3.14159). + WithBool("bool_key", true) + + // Verify chaining returns the same event + if event == nil { + t.Fatal("builder chain returned nil") + } + + // Verify attributes + if event.Attributes["string_key"] != "string_value" { + t.Errorf("expected string_key value %q, got %v", "string_value", event.Attributes["string_key"]) + } + if event.Attributes["int_key"] != 42 { + t.Errorf("expected int_key value 42, got %v", event.Attributes["int_key"]) + } + if event.Attributes["int64_key"] != int64(123456789) { + t.Errorf("expected int64_key value 123456789, got %v", event.Attributes["int64_key"]) + } + if event.Attributes["float64_key"] != 3.14159 { + t.Errorf("expected float64_key value 3.14159, got %v", event.Attributes["float64_key"]) + } + if event.Attributes["bool_key"] != true { + t.Errorf("expected bool_key value true, got %v", event.Attributes["bool_key"]) + } + if len(event.Attributes) != 5 { + t.Errorf("expected 5 attributes, got %d", len(event.Attributes)) + } +} + +// TestEvent_WithAttributes verifies that WithAttributes adds multiple attributes at once +func TestEvent_WithAttributes(t *testing.T) { + attrs := map[string]interface{}{ + "key1": "value1", + "key2": 42, + "key3": 3.14, + } + + event := NewEvent("test_event", "test_category", "test_domain"). + WithAttributes(attrs) + + if len(event.Attributes) != 3 { + t.Errorf("expected 3 attributes, got %d", len(event.Attributes)) + } + + if event.Attributes["key1"] != "value1" { + t.Errorf("expected key1 %q, got %v", "value1", event.Attributes["key1"]) + } + if event.Attributes["key2"] != 42 { + t.Errorf("expected key2 42, got %v", event.Attributes["key2"]) + } + if event.Attributes["key3"] != 3.14 { + t.Errorf("expected key3 3.14, got %v", event.Attributes["key3"]) + } +} + +// TestEvent_Track_DoesNotPanic verifies that Track does not panic when called with attributes +func TestEvent_Track_DoesNotPanic(t *testing.T) { + defer func() { + if r := recover(); r != nil { + t.Errorf("Track panicked: %v", r) + } + }() + + ctx := context.Background() + event := NewEvent("test_event", "test_category", "test_domain"). + WithString("test_key", "test_value"). + WithInt("count", 10). + WithBool("active", true) + + event.Track(ctx) +} + +// TestTrackEvent_DoesNotPanic verifies that TrackEvent convenience function does not panic +func TestTrackEvent_DoesNotPanic(t *testing.T) { + defer func() { + if r := recover(); r != nil { + t.Errorf("TrackEvent panicked: %v", r) + } + }() + + ctx := context.Background() + attrs := map[string]interface{}{ + "key1": "value1", + "key2": 42, + } + + TrackEvent(ctx, "test_event", "test_category", "test_domain", attrs) +} + +// TestDataOperationEvent verifies DataOperationEvent creates correct event structure +func TestDataOperationEvent(t *testing.T) { + event := DataOperationEvent("insert", true, 5) + + if event.Name != "data_operation" { + t.Errorf("expected name %q, got %q", "data_operation", event.Name) + } + if event.Category != "data_management" { + t.Errorf("expected category %q, got %q", "data_management", event.Category) + } + if event.Domain != "records" { + t.Errorf("expected domain %q, got %q", "records", event.Domain) + } + + if event.Attributes["operation"] != "insert" { + t.Errorf("expected operation %q, got %v", "insert", event.Attributes["operation"]) + } + if event.Attributes["success"] != true { + t.Errorf("expected success true, got %v", event.Attributes["success"]) + } + if event.Attributes["records_affected"] != 5 { + t.Errorf("expected records_affected 5, got %v", event.Attributes["records_affected"]) + } +} + +// TestQueryPerformanceEvent verifies QueryPerformanceEvent creates correct event structure +func TestQueryPerformanceEvent(t *testing.T) { + event := QueryPerformanceEvent("user_lookup", int64(150), 10, true, false) + + if event.Name != "query_performance" { + t.Errorf("expected name %q, got %q", "query_performance", event.Name) + } + if event.Category != "performance_monitoring" { + t.Errorf("expected category %q, got %q", "performance_monitoring", event.Category) + } + if event.Domain != "database" { + t.Errorf("expected domain %q, got %q", "database", event.Domain) + } + + if event.Attributes["query_name"] != "user_lookup" { + t.Errorf("expected query_name %q, got %v", "user_lookup", event.Attributes["query_name"]) + } + if event.Attributes["duration_ms"] != int64(150) { + t.Errorf("expected duration_ms 150, got %v", event.Attributes["duration_ms"]) + } + if event.Attributes["result_count"] != 10 { + t.Errorf("expected result_count 10, got %v", event.Attributes["result_count"]) + } + if event.Attributes["success"] != true { + t.Errorf("expected success true, got %v", event.Attributes["success"]) + } + if event.Attributes["cached"] != false { + t.Errorf("expected cached false, got %v", event.Attributes["cached"]) + } +} + +// TestErrorPatternEvent verifies ErrorPatternEvent creates correct event structure +func TestErrorPatternEvent(t *testing.T) { + event := ErrorPatternEvent("timeout", "ERR_TIMEOUT_001", "fetch_user", "api", 3) + + if event.Name != "error_pattern" { + t.Errorf("expected name %q, got %q", "error_pattern", event.Name) + } + if event.Category != "error_analysis" { + t.Errorf("expected category %q, got %q", "error_analysis", event.Category) + } + if event.Domain != "reliability" { + t.Errorf("expected domain %q, got %q", "reliability", event.Domain) + } + + if event.Attributes["error_type"] != "timeout" { + t.Errorf("expected error_type %q, got %v", "timeout", event.Attributes["error_type"]) + } + if event.Attributes["frequency"] != 3 { + t.Errorf("expected frequency 3, got %v", event.Attributes["frequency"]) + } +} + +// TestUserActivityEvent verifies UserActivityEvent creates correct event structure +func TestUserActivityEvent(t *testing.T) { + event := UserActivityEvent("login", true) + + if event.Name != "user_activity" { + t.Errorf("expected name %q, got %q", "user_activity", event.Name) + } + if event.Category != "usage_analytics" { + t.Errorf("expected category %q, got %q", "usage_analytics", event.Category) + } + if event.Domain != "user" { + t.Errorf("expected domain %q, got %q", "user", event.Domain) + } + + if event.Attributes["operation"] != "login" { + t.Errorf("expected operation %q, got %v", "login", event.Attributes["operation"]) + } + if event.Attributes["success"] != true { + t.Errorf("expected success true, got %v", event.Attributes["success"]) + } +} + +// TestResourceUsageEvent verifies ResourceUsageEvent creates correct event structure +func TestResourceUsageEvent(t *testing.T) { + event := ResourceUsageEvent("api_call", 256.5, 45.2, 150) + + if event.Name != "resource_usage" { + t.Errorf("expected name %q, got %q", "resource_usage", event.Name) + } + if event.Category != "resource_monitoring" { + t.Errorf("expected category %q, got %q", "resource_monitoring", event.Category) + } + if event.Domain != "infrastructure" { + t.Errorf("expected domain %q, got %q", "infrastructure", event.Domain) + } + + if event.Attributes["memory_mb"] != 256.5 { + t.Errorf("expected memory_mb 256.5, got %v", event.Attributes["memory_mb"]) + } + if event.Attributes["connection_count"] != 150 { + t.Errorf("expected connection_count 150, got %v", event.Attributes["connection_count"]) + } +} diff --git a/business/tracking.go b/business/tracking.go new file mode 100644 index 0000000..d59a160 --- /dev/null +++ b/business/tracking.go @@ -0,0 +1,84 @@ +// Package business provides domain-specific tracking functions. +// These are convenience wrappers around the generic event system for backward compatibility. +// For new projects, consider using the generic Event system in events.go instead. +package business + +import ( + "context" +) + +// TrackScopeOperation tracks tenant scope management operations +// DEPRECATED: This is a tvzr-specific function. Use the generic Event system for new projects. +func TrackScopeOperation(ctx context.Context, operation, scopeKey string, success bool, recordsAffected int) { + NewEvent("scope_operation", "tenant_management", "scope"). + WithString("operation", operation). + WithString("scope_key", scopeKey). + WithBool("success", success). + WithInt("records_affected", recordsAffected). + Track(ctx) +} + +// TrackDataModelChanges tracks schema/model definition changes +// DEPRECATED: This is a tvzr-specific function. Use the generic Event system for new projects. +func TrackDataModelChanges(ctx context.Context, modelName, changeType string, fieldCount, recordsAffected int, success bool) { + NewEvent("model_change", "definition_management", "model"). + WithString("model", modelName). + WithString("change_type", changeType). + WithInt("field_count", fieldCount). + WithInt("records_affected", recordsAffected). + WithBool("success", success). + Track(ctx) +} + +// TrackFieldOperation tracks field-level operations (type changes, deletions, etc.) +// DEPRECATED: This is a tvzr-specific function. Use the generic Event system for new projects. +func TrackFieldOperation(ctx context.Context, modelName, fieldName, operation string, success bool, valuesAffected int) { + NewEvent("field_operation", "definition_management", "field"). + WithString("model", modelName). + WithString("name", fieldName). + WithString("operation", operation). + WithBool("success", success). + WithInt("values_affected", valuesAffected). + Track(ctx) +} + +// TrackDataOperationResult tracks high-level data operations (save, find, delete) +// DEPRECATED: This is a tvzr-specific function. Use DataOperationEvent() or the generic Event system for new projects. +func TrackDataOperationResult(ctx context.Context, operation, scopeKey string, modelCount, recordsAffected int, success bool, dryRun bool) { + NewEvent("data_operation", "data_management", "records"). + WithString("operation", operation). + WithString("scope_key", scopeKey). + WithInt("model_count", modelCount). + WithInt("records_affected", recordsAffected). + WithBool("success", success). + WithBool("dry_run", dryRun). + Track(ctx) +} + +// TrackQueryPerformance tracks database query performance metrics +// Use QueryPerformanceEvent() for new projects. +func TrackQueryPerformance(ctx context.Context, queryName string, durationMs int64, resultCount int, success bool, cached bool) { + QueryPerformanceEvent(queryName, durationMs, resultCount, success, cached).Track(ctx) +} + +// TrackResourceUsage tracks system resource utilization +// Use ResourceUsageEvent() for new projects. +func TrackResourceUsage(ctx context.Context, operation string, memoryMB, cpuPercent float64, connectionCount int) { + ResourceUsageEvent(operation, memoryMB, cpuPercent, connectionCount).Track(ctx) +} + +// TrackUserActivity tracks customer/user activity patterns +// DEPRECATED: This is a tvzr-specific function. Use UserActivityEvent() or the generic Event system for new projects. +func TrackUserActivity(ctx context.Context, operation, scopeKey string, inputSizeKB int, responseTimeMs int64, success bool) { + UserActivityEvent(operation, success). + WithString("scope_key", scopeKey). + WithInt("input_size_kb", inputSizeKB). + WithInt64("response_time_ms", responseTimeMs). + Track(ctx) +} + +// TrackErrorPattern tracks error patterns for analysis +// Use ErrorPatternEvent() for new projects. +func TrackErrorPattern(ctx context.Context, errorType, errorCode, operation, component string, frequency int) { + ErrorPatternEvent(errorType, errorCode, operation, component, frequency).Track(ctx) +} diff --git a/config/config.go b/config/config.go new file mode 100644 index 0000000..dd6da8b --- /dev/null +++ b/config/config.go @@ -0,0 +1,169 @@ +package config + +import ( + "fmt" + "os" + "strconv" + "strings" +) + +// Config holds the configuration for observability +type Config struct { + Enabled bool `mapstructure:"enabled" json:"enabled"` + APIKey string `mapstructure:"api_key" json:"api_key"` + Dataset string `mapstructure:"dataset" json:"dataset"` + Environment string `mapstructure:"environment" json:"environment"` + ServiceName string `mapstructure:"service_name" json:"service_name"` + Version string `mapstructure:"version" json:"version"` + + // Sampling configuration for controlling event volume + Sampling SamplingConfig `mapstructure:"sampling" json:"sampling"` +} + +// SamplingConfig controls various sampling rates to manage event volume +type SamplingConfig struct { + // TraceSampleRate controls what percentage of traces are sampled (1-100) + // Lower values = fewer events. Example: 10 = 10% of traces sampled + TraceSampleRate int `mapstructure:"trace_sample_rate" json:"trace_sample_rate"` + + // ErrorSampleRate controls sampling for error traces (1-100) + // Typically higher than TraceSampleRate to ensure error visibility + ErrorSampleRate int `mapstructure:"error_sample_rate" json:"error_sample_rate"` + + // DatabaseSampleRate controls sampling for database operation spans (1-100) + // Can be lower since DB ops are high-volume + DatabaseSampleRate int `mapstructure:"database_sample_rate" json:"database_sample_rate"` + + // UserEventSampleRate controls sampling for custom user activity events (1-100) + // Business analytics events via TrackUserActivity + UserEventSampleRate int `mapstructure:"user_event_sample_rate" json:"user_event_sample_rate"` + + // HighVolumeEndpoints lists API endpoints that should use reduced sampling + // Example: ["Find", "FetchScope"] for read-heavy operations + HighVolumeEndpoints []string `mapstructure:"high_volume_endpoints" json:"high_volume_endpoints"` + + // HighVolumeSampleRate is the reduced sample rate for high-volume endpoints (1-100) + HighVolumeSampleRate int `mapstructure:"high_volume_sample_rate" json:"high_volume_sample_rate"` +} + +// DefaultConfig returns a default observability configuration +func DefaultConfig() Config { + return Config{ + Enabled: false, + Dataset: "default", + Environment: "development", + ServiceName: "service", + Version: "1.0.0", + Sampling: SamplingConfig{ + TraceSampleRate: 100, // 100% sampling for development + ErrorSampleRate: 100, // Always sample errors + DatabaseSampleRate: 100, // Full DB tracing in dev + UserEventSampleRate: 100, // Full user event tracking + HighVolumeEndpoints: []string{"Find", "Save"}, // Common read operations + HighVolumeSampleRate: 25, // 25% sampling for high-volume endpoints + }, + } +} + +// NewConfigFromEnv creates a configuration from environment variables +func NewConfigFromEnv() (Config, error) { + config := DefaultConfig() + + // Basic configuration + if enabled := os.Getenv("OBS_ENABLED"); enabled != "" { + config.Enabled = strings.ToLower(enabled) == "true" + } + if apiKey := os.Getenv("OBS_API_KEY"); apiKey != "" { + config.APIKey = apiKey + } + if dataset := os.Getenv("OBS_DATASET"); dataset != "" { + config.Dataset = dataset + } + if env := os.Getenv("OBS_ENVIRONMENT"); env != "" { + config.Environment = env + } + if serviceName := os.Getenv("OBS_SERVICE_NAME"); serviceName != "" { + config.ServiceName = serviceName + } + if version := os.Getenv("OBS_VERSION"); version != "" { + config.Version = version + } + + // Sampling configuration + if rate := os.Getenv("OBS_TRACE_SAMPLE_RATE"); rate != "" { + if r, err := strconv.Atoi(rate); err == nil && r >= 1 && r <= 100 { + config.Sampling.TraceSampleRate = r + } + } + if rate := os.Getenv("OBS_ERROR_SAMPLE_RATE"); rate != "" { + if r, err := strconv.Atoi(rate); err == nil && r >= 1 && r <= 100 { + config.Sampling.ErrorSampleRate = r + } + } + if rate := os.Getenv("OBS_DATABASE_SAMPLE_RATE"); rate != "" { + if r, err := strconv.Atoi(rate); err == nil && r >= 1 && r <= 100 { + config.Sampling.DatabaseSampleRate = r + } + } + if rate := os.Getenv("OBS_USER_EVENT_SAMPLE_RATE"); rate != "" { + if r, err := strconv.Atoi(rate); err == nil && r >= 1 && r <= 100 { + config.Sampling.UserEventSampleRate = r + } + } + if endpoints := os.Getenv("OBS_HIGH_VOLUME_ENDPOINTS"); endpoints != "" { + parts := strings.Split(endpoints, ",") + filtered := parts[:0] + for _, p := range parts { + if s := strings.TrimSpace(p); s != "" { + filtered = append(filtered, s) + } + } + if len(filtered) > 0 { + config.Sampling.HighVolumeEndpoints = filtered + } + } + if rate := os.Getenv("OBS_HIGH_VOLUME_SAMPLE_RATE"); rate != "" { + if r, err := strconv.Atoi(rate); err == nil && r >= 1 && r <= 100 { + config.Sampling.HighVolumeSampleRate = r + } + } + + return config, nil +} + +// Validate checks if the configuration is valid +func (c Config) Validate() error { + if !c.Enabled { + return nil + } + if c.APIKey == "" { + return fmt.Errorf("api_key is required when observability is enabled") + } + if c.Dataset == "" { + return fmt.Errorf("dataset is required when observability is enabled") + } + if c.ServiceName == "" { + return fmt.Errorf("service_name is required when observability is enabled") + } + + // Validate sampling rates + if c.Sampling.TraceSampleRate < 1 || c.Sampling.TraceSampleRate > 100 { + return fmt.Errorf("trace_sample_rate must be between 1 and 100") + } + if c.Sampling.ErrorSampleRate < 1 || c.Sampling.ErrorSampleRate > 100 { + return fmt.Errorf("error_sample_rate must be between 1 and 100") + } + if c.Sampling.DatabaseSampleRate < 1 || c.Sampling.DatabaseSampleRate > 100 { + return fmt.Errorf("database_sample_rate must be between 1 and 100") + } + if c.Sampling.UserEventSampleRate < 1 || c.Sampling.UserEventSampleRate > 100 { + return fmt.Errorf("user_event_sample_rate must be between 1 and 100") + } + if len(c.Sampling.HighVolumeEndpoints) > 0 { + if c.Sampling.HighVolumeSampleRate < 1 || c.Sampling.HighVolumeSampleRate > 100 { + return fmt.Errorf("high_volume_sample_rate must be between 1 and 100") + } + } + + return nil +} diff --git a/config/config_test.go b/config/config_test.go new file mode 100644 index 0000000..33eae01 --- /dev/null +++ b/config/config_test.go @@ -0,0 +1,269 @@ +package config + +import ( + "testing" +) + +// TestDefaultConfig verifies default configuration values +func TestDefaultConfig(t *testing.T) { + cfg := DefaultConfig() + + if cfg.Enabled { + t.Error("expected disabled by default") + } + if cfg.ServiceName == "" { + t.Error("expected service_name to have default value") + } + if cfg.Sampling.TraceSampleRate != 100 { + t.Errorf("expected trace rate 100, got %d", cfg.Sampling.TraceSampleRate) + } +} + +// TestValidate_DisabledConfig verifies that disabled config always validates +func TestValidate_DisabledConfig(t *testing.T) { + cfg := Config{ + Enabled: false, + // All other fields empty + Sampling: SamplingConfig{ + TraceSampleRate: 50, + ErrorSampleRate: 50, + DatabaseSampleRate: 50, + UserEventSampleRate: 50, + HighVolumeSampleRate: 50, + }, + } + + err := cfg.Validate() + if err != nil { + t.Errorf("disabled config should always validate, got error: %v", err) + } +} + +// TestValidate_EnabledMissingAPIKey verifies validation fails without API key +func TestValidate_EnabledMissingAPIKey(t *testing.T) { + cfg := Config{ + Enabled: true, + APIKey: "", + Dataset: "test", + ServiceName: "test-service", + Sampling: SamplingConfig{ + TraceSampleRate: 100, + ErrorSampleRate: 100, + DatabaseSampleRate: 100, + UserEventSampleRate: 100, + HighVolumeSampleRate: 100, + }, + } + + err := cfg.Validate() + if err == nil { + t.Error("expected validation to fail without api_key") + } +} + +// TestValidate_EnabledMissingDataset verifies validation fails without dataset +func TestValidate_EnabledMissingDataset(t *testing.T) { + cfg := Config{ + Enabled: true, + APIKey: "test-key", + Dataset: "", + ServiceName: "test-service", + Sampling: SamplingConfig{ + TraceSampleRate: 100, + ErrorSampleRate: 100, + DatabaseSampleRate: 100, + UserEventSampleRate: 100, + HighVolumeSampleRate: 100, + }, + } + + err := cfg.Validate() + if err == nil { + t.Error("expected validation to fail without dataset") + } +} + +// TestValidate_EnabledMissingServiceName verifies validation fails without service name +func TestValidate_EnabledMissingServiceName(t *testing.T) { + cfg := Config{ + Enabled: true, + APIKey: "test-key", + Dataset: "test", + ServiceName: "", + Sampling: SamplingConfig{ + TraceSampleRate: 100, + ErrorSampleRate: 100, + DatabaseSampleRate: 100, + UserEventSampleRate: 100, + HighVolumeSampleRate: 100, + }, + } + + err := cfg.Validate() + if err == nil { + t.Error("expected validation to fail without service_name") + } +} + +// TestValidate_InvalidSamplingRates tests table-driven validation of sampling rates +func TestValidate_InvalidSamplingRates(t *testing.T) { + tests := []struct { + name string + rateField string + rateValue int + shouldFail bool + }{ + {"TraceSampleRate 0", "trace", 0, true}, + {"TraceSampleRate 101", "trace", 101, true}, + {"ErrorSampleRate 0", "error", 0, true}, + {"ErrorSampleRate 101", "error", 101, true}, + {"DatabaseSampleRate 0", "database", 0, true}, + {"DatabaseSampleRate 101", "database", 101, true}, + {"UserEventSampleRate 0", "user", 0, true}, + {"UserEventSampleRate 101", "user", 101, true}, + {"HighVolumeSampleRate 0", "highvolume", 0, true}, + {"HighVolumeSampleRate 101", "highvolume", 101, true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cfg := Config{ + Enabled: true, + APIKey: "test-key", + Dataset: "test", + ServiceName: "test-service", + Sampling: SamplingConfig{ + TraceSampleRate: 100, + ErrorSampleRate: 100, + DatabaseSampleRate: 100, + UserEventSampleRate: 100, + HighVolumeSampleRate: 100, + }, + } + + // Set the rate field being tested + switch tt.rateField { + case "trace": + cfg.Sampling.TraceSampleRate = tt.rateValue + case "error": + cfg.Sampling.ErrorSampleRate = tt.rateValue + case "database": + cfg.Sampling.DatabaseSampleRate = tt.rateValue + case "user": + cfg.Sampling.UserEventSampleRate = tt.rateValue + case "highvolume": + cfg.Sampling.HighVolumeSampleRate = tt.rateValue + cfg.Sampling.HighVolumeEndpoints = []string{"Find"} + } + + err := cfg.Validate() + if tt.shouldFail && err == nil { + t.Errorf("%s: expected validation to fail", tt.name) + } + if !tt.shouldFail && err != nil { + t.Errorf("%s: expected validation to pass, got error: %v", tt.name, err) + } + }) + } +} + +// TestNewConfigFromEnv_Defaults verifies default values when no env vars are set +func TestNewConfigFromEnv_Defaults(t *testing.T) { + // Ensure no relevant env vars are set + t.Setenv("OBS_ENABLED", "") + t.Setenv("OBS_API_KEY", "") + t.Setenv("OBS_DATASET", "") + t.Setenv("OBS_ENVIRONMENT", "") + t.Setenv("OBS_SERVICE_NAME", "") + t.Setenv("OBS_VERSION", "") + t.Setenv("OBS_TRACE_SAMPLE_RATE", "") + t.Setenv("OBS_ERROR_SAMPLE_RATE", "") + t.Setenv("OBS_DATABASE_SAMPLE_RATE", "") + t.Setenv("OBS_USER_EVENT_SAMPLE_RATE", "") + t.Setenv("OBS_HIGH_VOLUME_ENDPOINTS", "") + t.Setenv("OBS_HIGH_VOLUME_SAMPLE_RATE", "") + + cfg, err := NewConfigFromEnv() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if cfg.Enabled { + t.Error("expected disabled by default") + } + if cfg.ServiceName != "service" { + t.Errorf("expected default service_name, got %q", cfg.ServiceName) + } + if cfg.Sampling.TraceSampleRate != 100 { + t.Errorf("expected default trace rate 100, got %d", cfg.Sampling.TraceSampleRate) + } +} + +// TestNewConfigFromEnv_HighVolumeEndpoints_Empty verifies empty string doesn't produce [""] +func TestNewConfigFromEnv_HighVolumeEndpoints_Empty(t *testing.T) { + t.Setenv("OBS_HIGH_VOLUME_ENDPOINTS", "") + + cfg, err := NewConfigFromEnv() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // Should use defaults, not empty string + expected := []string{"Find", "Save"} + if len(cfg.Sampling.HighVolumeEndpoints) != len(expected) { + t.Errorf("expected %d endpoints, got %d", len(expected), len(cfg.Sampling.HighVolumeEndpoints)) + } +} + +// TestNewConfigFromEnv_HighVolumeEndpoints_Set verifies parsing of comma-separated endpoints +func TestNewConfigFromEnv_HighVolumeEndpoints_Set(t *testing.T) { + t.Setenv("OBS_HIGH_VOLUME_ENDPOINTS", "Find,List,GetAll") + + cfg, err := NewConfigFromEnv() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if len(cfg.Sampling.HighVolumeEndpoints) != 3 { + t.Errorf("expected 3 endpoints, got %d", len(cfg.Sampling.HighVolumeEndpoints)) + } + if cfg.Sampling.HighVolumeEndpoints[0] != "Find" { + t.Errorf("expected first endpoint 'Find', got %q", cfg.Sampling.HighVolumeEndpoints[0]) + } + if cfg.Sampling.HighVolumeEndpoints[1] != "List" { + t.Errorf("expected second endpoint 'List', got %q", cfg.Sampling.HighVolumeEndpoints[1]) + } + if cfg.Sampling.HighVolumeEndpoints[2] != "GetAll" { + t.Errorf("expected third endpoint 'GetAll', got %q", cfg.Sampling.HighVolumeEndpoints[2]) + } +} + +// TestNewConfigFromEnv_InvalidRate verifies non-numeric rate keeps default +func TestNewConfigFromEnv_InvalidRate(t *testing.T) { + t.Setenv("OBS_TRACE_SAMPLE_RATE", "not-a-number") + + cfg, err := NewConfigFromEnv() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // Should keep default value of 100 + if cfg.Sampling.TraceSampleRate != 100 { + t.Errorf("expected default trace rate 100 on invalid input, got %d", cfg.Sampling.TraceSampleRate) + } +} + +// TestNewConfigFromEnv_OutOfRangeRate verifies out-of-range rate keeps default +func TestNewConfigFromEnv_OutOfRangeRate(t *testing.T) { + t.Setenv("OBS_ERROR_SAMPLE_RATE", "200") + + cfg, err := NewConfigFromEnv() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // Should keep default value of 100 + if cfg.Sampling.ErrorSampleRate != 100 { + t.Errorf("expected default error rate 100 on out-of-range input, got %d", cfg.Sampling.ErrorSampleRate) + } +} diff --git a/examples/basic/main.go b/examples/basic/main.go new file mode 100644 index 0000000..9e40e85 --- /dev/null +++ b/examples/basic/main.go @@ -0,0 +1,78 @@ +package main + +import ( + "context" + "log" + "time" + + "github.com/bold-minds/obs" +) + +func main() { + ctx := context.Background() + + // Example 1: Initialize from environment variables + // Set OBS_ENABLED=true, OBS_API_KEY=your_key, etc. + if err := obs.InitFromEnv(ctx); err != nil { + log.Printf("Failed to initialize observability: %v", err) + // Continue without observability in this example + } + defer obs.Shutdown(ctx) + + // Example 2: Track a business operation + simulateUserSignup(ctx) + + // Example 3: Track database operations + simulateDataOperation(ctx) + + // Example 4: Track API operations with error handling + simulateAPIOperation(ctx) +} + +func simulateUserSignup(ctx context.Context) { + // High-level tracked operation + ctx, op := obs.TrackOperation(ctx, "api", "user_signup") + defer op.Success(1) + + // Simulate some work + time.Sleep(50 * time.Millisecond) + + // Track user activity + obs.TrackUserActivity(ctx, "signup", "tenant_123", 2048, 50, true) +} + +func simulateDataOperation(ctx context.Context) { + // Track data operation with business context + ctx, span := obs.TrackDataOperation(ctx, "save", "tenant_123", 5, false) + defer func() { + obs.FinishSpanWithResult(span, 100*time.Millisecond, 5, nil) + }() + + // Simulate database work + time.Sleep(100 * time.Millisecond) + + // Track the result + obs.TrackDataOperationResult(ctx, "save", "tenant_123", 5, 100, true, false) +} + +func simulateAPIOperation(ctx context.Context) { + // Track API operation with error handling + ctx, op := obs.TrackOperationWithError(ctx, "critical", "payment_processing") + + // Simulate some processing + time.Sleep(25 * time.Millisecond) + + // Simulate success (in real code, check for actual errors) + if err := processPayment(); err != nil { + op.Error(err, "payment_failed") + return + } + + op.Success(1) +} + +func processPayment() error { + // Simulate payment processing + time.Sleep(10 * time.Millisecond) + return nil // Success in this example +} diff --git a/examples/generic/main.go b/examples/generic/main.go new file mode 100644 index 0000000..bd3f65b --- /dev/null +++ b/examples/generic/main.go @@ -0,0 +1,135 @@ +// This example shows how to use the new generic event system +// for maximum flexibility and reusability across different domains + +package main + +import ( + "context" + "log" + + "github.com/bold-minds/obs" +) + +func main() { + ctx := context.Background() + + // Initialize observability + if err := obs.InitFromEnv(ctx); err != nil { + log.Printf("Failed to initialize observability: %v", err) + } + defer obs.Shutdown(ctx) + + // Example 1: Generic event tracking for any business domain + trackECommerceEvents(ctx) + + // Example 2: Using pre-built event builders + trackCommonEvents(ctx) + + // Example 3: Custom domain-specific events + trackCustomDomainEvents(ctx) +} + +func trackECommerceEvents(ctx context.Context) { + // Track an e-commerce order event + obs.NewEvent("order_placed", "sales", "ecommerce"). + WithString("customer_id", "cust_12345"). + WithString("order_id", "ord_67890"). + WithFloat64("order_value", 149.99). + WithInt("item_count", 3). + WithBool("success", true). + WithString("payment_method", "credit_card"). + Track(ctx) + + // Track inventory update + obs.NewEvent("inventory_updated", "operations", "warehouse"). + WithString("product_id", "prod_abc123"). + WithString("operation", "restock"). + WithInt("quantity_added", 50). + WithInt("new_total", 125). + WithBool("low_stock_alert", false). + Track(ctx) + + // Track user behavior + obs.NewEvent("page_view", "analytics", "website"). + WithString("user_id", "user_789"). + WithString("page", "/product/abc123"). + WithInt64("load_time_ms", 245). + WithString("referrer", "google"). + WithBool("mobile", true). + Track(ctx) +} + +func trackCommonEvents(ctx context.Context) { + // Use pre-built event builders for common scenarios + + // Data operation + obs.DataOperationEvent("create_user", true, 1). + WithString("user_type", "premium"). + WithString("source", "web_signup"). + Track(ctx) + + // Query performance + obs.QueryPerformanceEvent("find_products", 45, 25, true, false). + WithString("filter", "category:electronics"). + Track(ctx) + + // User activity + obs.UserActivityEvent("api_call", true). + WithString("endpoint", "/api/v1/products"). + WithString("method", "GET"). + WithInt("response_size", 2048). + Track(ctx) + + // Error pattern + obs.ErrorPatternEvent("timeout", "E001", "payment_processing", "stripe_api", 3). + WithString("severity", "high"). + Track(ctx) + + // Resource usage + obs.ResourceUsageEvent("peak_traffic", 512.5, 75.2, 150). + WithString("instance", "web-01"). + Track(ctx) +} + +func trackCustomDomainEvents(ctx context.Context) { + // Example: Gaming domain events + obs.NewEvent("player_level_up", "gameplay", "progression"). + WithString("player_id", "player_456"). + WithInt("old_level", 15). + WithInt("new_level", 16). + WithInt("xp_gained", 1250). + WithString("achievement", "dragon_slayer"). + WithInt64("session_duration_ms", 3600000). // 1 hour + Track(ctx) + + // Example: IoT sensor data + obs.NewEvent("sensor_reading", "monitoring", "iot"). + WithString("sensor_id", "temp_001"). + WithString("location", "warehouse_a"). + WithFloat64("temperature", 22.5). + WithFloat64("humidity", 45.2). + WithBool("alert_triggered", false). + WithString("unit", "celsius"). + Track(ctx) + + // Example: Financial transaction + obs.NewEvent("transaction_processed", "finance", "payments"). + WithString("transaction_id", "txn_789123"). + WithString("account_from", "acc_111"). + WithString("account_to", "acc_222"). + WithFloat64("amount", 1500.00). + WithString("currency", "USD"). + WithBool("fraud_check_passed", true). + WithInt64("processing_time_ms", 125). + Track(ctx) + + // Example: Content management + obs.NewEvent("article_published", "content", "cms"). + WithString("article_id", "art_456"). + WithString("author_id", "auth_789"). + WithString("category", "technology"). + WithInt("word_count", 1200). + WithBool("featured", true). + WithString("status", "published"). + Track(ctx) +} diff --git a/examples/migration/main.go b/examples/migration/main.go new file mode 100644 index 0000000..ec29514 --- /dev/null +++ b/examples/migration/main.go @@ -0,0 +1,50 @@ +// This example shows how to migrate from the original observe package +// to the new obs library with ZERO code changes required! + +package main + +import ( + "context" + "log" + + // Before migration: + // "your_project/observe" + + // After migration - just change the import! + "github.com/bold-minds/obs" +) + +func main() { + ctx := context.Background() + + // All the same function calls work exactly as before! + config := obs.DefaultConfig() + config.Enabled = true + config.APIKey = "your_honeycomb_key" + config.Dataset = "your_dataset" + config.ServiceName = "your_service" + + // Initialize exactly as before + provider, cleanup, err := obs.InitHoneycomb(ctx, config) + if err != nil { + log.Fatal(err) + } + defer cleanup() + + // All your existing business tracking functions work unchanged + obs.TrackScopeOperation(ctx, "create", "tenant_123", true, 5) + obs.TrackDataModelChanges(ctx, "User", "add_field", 1, 100, true) + obs.TrackQueryPerformance(ctx, "find_users", 45, 25, true, false) + + // All your existing tracing functions work unchanged + ctx, span := obs.StartSpan(ctx, "business", "complex_operation") + obs.AddBusinessContext(span, "tenant_123", 5, "migration_test") + obs.AddMemoryMetrics(span) + span.End() + + // TrackedOperation works exactly the same + ctx, op := obs.TrackOperation(ctx, "api", "test_operation") + op.Success(1) + + log.Printf("Migration successful! Provider: %T", provider) +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..3bd8c12 --- /dev/null +++ b/go.mod @@ -0,0 +1,30 @@ +module github.com/bold-minds/obs + +go 1.25.0 + +require ( + go.opentelemetry.io/otel v1.43.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0 + go.opentelemetry.io/otel/sdk v1.43.0 + go.opentelemetry.io/otel/trace v1.43.0 +) + +require ( + github.com/cenkalti/backoff/v5 v5.0.3 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect + go.opentelemetry.io/auto/sdk v1.2.1 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 // indirect + go.opentelemetry.io/otel/metric v1.43.0 // indirect + go.opentelemetry.io/proto/otlp v1.10.0 // indirect + golang.org/x/net v0.52.0 // indirect + golang.org/x/sys v0.42.0 // indirect + golang.org/x/text v0.35.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 // indirect + google.golang.org/grpc v1.80.0 // indirect + google.golang.org/protobuf v1.36.11 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..18865a2 --- /dev/null +++ b/go.sum @@ -0,0 +1,61 @@ +github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= +github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 h1:HWRh5R2+9EifMyIHV7ZV+MIZqgz+PMpZ14Jynv3O2Zs= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l85lZYYOm7LV6om4ceouMv45c= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= +go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I= +go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 h1:88Y4s2C8oTui1LGM6bTWkw0ICGcOLCAI5l6zsD1j20k= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0/go.mod h1:Vl1/iaggsuRlrHf/hfPJPvVag77kKyvrLeD10kpMl+A= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0 h1:3iZJKlCZufyRzPzlQhUIWVmfltrXuGyfjREgGP3UUjc= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0/go.mod h1:/G+nUPfhq2e+qiXMGxMwumDrP5jtzU+mWN7/sjT2rak= +go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM= +go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY= +go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg= +go.opentelemetry.io/otel/sdk v1.43.0/go.mod h1:P+IkVU3iWukmiit/Yf9AWvpyRDlUeBaRg6Y+C58QHzg= +go.opentelemetry.io/otel/sdk/metric v1.43.0 h1:S88dyqXjJkuBNLeMcVPRFXpRw2fuwdvfCGLEo89fDkw= +go.opentelemetry.io/otel/sdk/metric v1.43.0/go.mod h1:C/RJtwSEJ5hzTiUz5pXF1kILHStzb9zFlIEe85bhj6A= +go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A= +go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0= +go.opentelemetry.io/proto/otlp v1.10.0 h1:IQRWgT5srOCYfiWnpqUYz9CVmbO8bFmKcwYxpuCSL2g= +go.opentelemetry.io/proto/otlp v1.10.0/go.mod h1:/CV4QoCR/S9yaPj8utp3lvQPoqMtxXdzn7ozvvozVqk= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0= +golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw= +golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= +golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8= +golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA= +gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4= +gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E= +google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 h1:VPWxll4HlMw1Vs/qXtN7BvhZqsS9cdAittCNvVENElA= +google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:7QBABkRtR8z+TEnmXTqIqwJLlzrZKVfAUm7tY3yGv0M= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 h1:m8qni9SQFH0tJc1X0vmnpw/0t+AImlSvp30sEupozUg= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8= +google.golang.org/grpc v1.80.0 h1:Xr6m2WmWZLETvUNvIUmeD5OAagMw3FiKmMlTdViWsHM= +google.golang.org/grpc v1.80.0/go.mod h1:ho/dLnxwi3EDJA4Zghp7k2Ec1+c2jqup0bFkw07bwF4= +google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= +google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/exception.go b/internal/exception.go new file mode 100644 index 0000000..e5dd380 --- /dev/null +++ b/internal/exception.go @@ -0,0 +1,9 @@ +package internal + +// Exception represents a structured exception with code and message. +// Types implementing this interface get richer error attributes in spans. +type Exception interface { + Code() int + Message() string + Error() string +} diff --git a/obs.go b/obs.go new file mode 100644 index 0000000..db267b8 --- /dev/null +++ b/obs.go @@ -0,0 +1,131 @@ +package obs + +import ( + "context" + "fmt" + "sync" + + "github.com/bold-minds/obs/config" + "github.com/bold-minds/obs/providers" + "github.com/bold-minds/obs/sampling" +) + +// Client represents the main observability client. +type Client struct { + mu sync.RWMutex + config config.Config + provider providers.Provider +} + +// New creates a new observability client with the given configuration. +func New(cfg config.Config) (*Client, error) { + if err := cfg.Validate(); err != nil { + return nil, fmt.Errorf("invalid config: %w", err) + } + + // Initialize or update sampling manager + sampling.SetManager(cfg.Sampling) + + // Create provider (currently only Honeycomb) + provider := providers.NewHoneycombProvider(cfg) + + return &Client{ + config: cfg, + provider: provider, + }, nil +} + +// NewFromEnv creates a new observability client from environment variables. +func NewFromEnv() (*Client, error) { + cfg, err := config.NewConfigFromEnv() + if err != nil { + return nil, fmt.Errorf("failed to load config from environment: %w", err) + } + + return New(cfg) +} + +// Init initializes the observability client and sets up tracing. +func (c *Client) Init(ctx context.Context) error { + return c.provider.Init(ctx) +} + +// Shutdown gracefully shuts down the observability client. +func (c *Client) Shutdown(ctx context.Context) error { + return c.provider.Shutdown(ctx) +} + +// Config returns the current configuration. +func (c *Client) Config() config.Config { + c.mu.RLock() + defer c.mu.RUnlock() + return c.config +} + +// UpdateSamplingConfig updates the sampling configuration at runtime. +func (c *Client) UpdateSamplingConfig(samplingConfig config.SamplingConfig) { + c.mu.Lock() + c.config.Sampling = samplingConfig + c.mu.Unlock() + if sm := sampling.GetManager(); sm != nil { + sm.UpdateConfig(samplingConfig) + } +} + +// Global client instance for convenience functions. +var ( + globalMu sync.RWMutex + globalClient *Client +) + +// Init initializes the global observability client with the given configuration. +func Init(ctx context.Context, cfg config.Config) error { + client, err := New(cfg) + if err != nil { + return err + } + + if err := client.Init(ctx); err != nil { + return err + } + + globalMu.Lock() + globalClient = client + globalMu.Unlock() + return nil +} + +// InitFromEnv initializes the global observability client from environment variables. +func InitFromEnv(ctx context.Context) error { + client, err := NewFromEnv() + if err != nil { + return err + } + + if err := client.Init(ctx); err != nil { + return err + } + + globalMu.Lock() + globalClient = client + globalMu.Unlock() + return nil +} + +// Shutdown gracefully shuts down the global observability client. +func Shutdown(ctx context.Context) error { + globalMu.RLock() + c := globalClient + globalMu.RUnlock() + if c == nil { + return nil + } + return c.Shutdown(ctx) +} + +// GetClient returns the global client instance. +func GetClient() *Client { + globalMu.RLock() + defer globalMu.RUnlock() + return globalClient +} diff --git a/obs_test.go b/obs_test.go new file mode 100644 index 0000000..16c49f8 --- /dev/null +++ b/obs_test.go @@ -0,0 +1,129 @@ +package obs + +import ( + "context" + "testing" + + "github.com/bold-minds/obs/config" +) + +func TestGetTracer(t *testing.T) { + tracer := GetTracer("mycomp") + if tracer == nil { + t.Fatal("GetTracer returned nil") + } +} + +func TestNew_DisabledConfig(t *testing.T) { + cfg := config.DefaultConfig() + cfg.Enabled = false + + client, err := New(cfg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if client == nil { + t.Fatal("client should not be nil") + } +} + +func TestNew_InvalidConfig(t *testing.T) { + cfg := config.Config{ + Enabled: true, + // Missing required fields + } + _, err := New(cfg) + if err == nil { + t.Error("should fail with invalid enabled config") + } +} + +func TestClient_InitAndShutdown(t *testing.T) { + ctx := context.Background() + cfg := config.DefaultConfig() + cfg.Enabled = false + + client, err := New(cfg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if err := client.Init(ctx); err != nil { + t.Fatalf("Init failed: %v", err) + } + if err := client.Shutdown(ctx); err != nil { + t.Fatalf("Shutdown failed: %v", err) + } +} + +func TestClient_Config(t *testing.T) { + cfg := config.DefaultConfig() + client, err := New(cfg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if client.Config().ServiceName != cfg.ServiceName { + t.Error("Config() should return the config passed to New()") + } +} + +func TestGlobalInit_Disabled(t *testing.T) { + ctx := context.Background() + cfg := config.DefaultConfig() + cfg.Enabled = false + + if err := Init(ctx, cfg); err != nil { + t.Fatalf("Init failed: %v", err) + } + if GetClient() == nil { + t.Error("GetClient should return non-nil after Init") + } + if err := Shutdown(ctx); err != nil { + t.Fatalf("Shutdown failed: %v", err) + } +} + +func TestGlobalInitFromEnv(t *testing.T) { + ctx := context.Background() + t.Setenv("OBS_ENABLED", "false") + + if err := InitFromEnv(ctx); err != nil { + t.Fatalf("InitFromEnv failed: %v", err) + } + if err := Shutdown(ctx); err != nil { + t.Fatalf("Shutdown failed: %v", err) + } +} + +func TestShutdown_NilClient(t *testing.T) { + globalClient = nil + ctx := context.Background() + if err := Shutdown(ctx); err != nil { + t.Errorf("Shutdown on nil client should not error: %v", err) + } +} + +func TestGlobalTracing_DoesNotPanic(t *testing.T) { + ctx := context.Background() + cfg := config.DefaultConfig() + cfg.Enabled = false + _ = Init(ctx, cfg) + defer Shutdown(ctx) + + // All of these should work without panicking + TrackScopeOperation(ctx, "test", "t1", true, 5) + TrackDataModelChanges(ctx, "Model", "add", 1, 10, true) + TrackQueryPerformance(ctx, "q1", 50, 10, true, false) + TrackUserActivity(ctx, "login", "t1", 100, 200, true) + TrackResourceUsage(ctx, "handler", 256.0, 45.0, 10) + TrackErrorPattern(ctx, "timeout", "E001", "save", "db", 3) + + ctx2, span := StartSpan(ctx, "test", "op") + AddBusinessContext(span, "t1", 1, "test") + AddMemoryMetrics(span) + span.End() + + _, op := TrackOperation(ctx2, "api", "test_op") + op.Success(1) + + NewEvent("test", "cat", "dom").WithString("k", "v").Track(ctx) +} diff --git a/observe.go b/observe.go new file mode 100644 index 0000000..3446a02 --- /dev/null +++ b/observe.go @@ -0,0 +1,225 @@ +// Package obs provides a compatibility layer that maintains the exact API +// from the original observe package, ensuring minimal migration friction. +package obs + +import ( + "context" + "fmt" + "time" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + oteltrace "go.opentelemetry.io/otel/trace" + + "github.com/bold-minds/obs/business" + "github.com/bold-minds/obs/config" + "github.com/bold-minds/obs/providers" + "github.com/bold-minds/obs/sampling" + "github.com/bold-minds/obs/tracing" +) + +// COMPATIBILITY LAYER - These functions maintain the exact API from the original observe package + +// InitHoneycomb initializes OpenTelemetry with Honeycomb as the backend +// This maintains compatibility with the original API +func InitHoneycomb(ctx context.Context, cfg config.Config) (*providers.HoneycombProvider, func(), error) { + // Initialize or replace sampling manager + sampling.SetManager(cfg.Sampling) + + provider := providers.NewHoneycombProvider(cfg) + if err := provider.Init(ctx); err != nil { + return nil, nil, err + } + + // Return cleanup function + cleanup := func() { + shutdownCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + if err := provider.Shutdown(shutdownCtx); err != nil { + // Log error but don't panic during shutdown + fmt.Printf("Error shutting down tracer provider: %v\n", err) + } + } + + return provider, cleanup, nil +} + +// GetTracer returns a tracer for the given component name +// Maintains compatibility with original API +func GetTracer(name string) oteltrace.Tracer { + return otel.Tracer(fmt.Sprintf("obs.%s", name)) +} + +// InitSampling initializes the global sampling manager +// Maintains compatibility with original API +func InitSampling(cfg config.SamplingConfig) { + sampling.Init(cfg) +} + +// GetSamplingManager returns the global sampling manager +// Maintains compatibility with original API +func GetSamplingManager() *sampling.Manager { + return sampling.GetManager() +} + +// TRACING FUNCTIONS - Direct compatibility with original API + +// StartSpan creates a new span with the given name and component +func StartSpan(ctx context.Context, component, operation string) (context.Context, oteltrace.Span) { + return tracing.StartSpan(ctx, component, operation) +} + +// StartSpanWithSampling creates a span with intelligent sampling based on operation type +func StartSpanWithSampling(ctx context.Context, component, operation string, isError bool) (context.Context, oteltrace.Span) { + return tracing.StartSpanWithSampling(ctx, component, operation, isError) +} + +// TraceError records an error on the span with structured attributes +func TraceError(span oteltrace.Span, err error, operation string) { + tracing.TraceError(span, err, operation) +} + +// AddBusinessContext adds common business context attributes to a span +func AddBusinessContext(span oteltrace.Span, scopeKey string, modelCount int, operation string) { + tracing.AddBusinessContext(span, scopeKey, modelCount, operation) +} + +// AddDatabaseContext adds database operation context to a span. +// WARNING: statement may contain sensitive data — sanitise before passing. +func AddDatabaseContext(span oteltrace.Span, dbSystem, queryName, statement string, paramCount int) { + tracing.AddDatabaseContext(span, dbSystem, queryName, statement, paramCount) +} + +// AddPerformanceMetrics adds performance-related attributes to a span +func AddPerformanceMetrics(span oteltrace.Span, duration time.Duration, resultCount int) { + tracing.AddPerformanceMetrics(span, duration, resultCount) +} + +// AddMemoryMetrics adds current memory usage metrics to a span +func AddMemoryMetrics(span oteltrace.Span) { + tracing.AddMemoryMetrics(span) +} + +// TrackAPIOperation creates a span for API-level operations with rich context +func TrackAPIOperation(ctx context.Context, operation string, inputSize int) (context.Context, oteltrace.Span) { + return tracing.TrackAPIOperation(ctx, operation, inputSize) +} + +// TrackDatabaseOperation creates a span for database-level operations +func TrackDatabaseOperation(ctx context.Context, operation, queryName string, querySize int) (context.Context, oteltrace.Span) { + return tracing.TrackDatabaseOperation(ctx, operation, queryName, querySize) +} + +// TrackDataOperation creates a span for data service operations +func TrackDataOperation(ctx context.Context, operation, scopeKey string, modelCount int, dryRun bool) (context.Context, oteltrace.Span) { + return tracing.TrackDataOperation(ctx, operation, scopeKey, modelCount, dryRun) +} + +// TrackStoreOperation creates a span for store/database operations +func TrackStoreOperation(ctx context.Context, operation string) (context.Context, oteltrace.Span) { + return tracing.TrackStoreOperation(ctx, operation) +} + +// TrackQueryExecution creates a span for individual query execution +func TrackQueryExecution(ctx context.Context, queryName, cypher string, params map[string]any) (context.Context, oteltrace.Span) { + return tracing.TrackQueryExecution(ctx, queryName, cypher, params) +} + +// FinishSpanWithResult finishes a span with timing and result metrics +func FinishSpanWithResult(span oteltrace.Span, duration time.Duration, resultCount int, err error) { + tracing.FinishSpanWithResult(span, duration, resultCount, err) +} + +// TrackErrorAndFinish is a concise helper for error cases with optional attributes +func TrackErrorAndFinish(span oteltrace.Span, start time.Time, err error, errorType string, attrs ...attribute.KeyValue) { + tracing.TrackErrorAndFinish(span, start, err, errorType, attrs...) +} + +// TrackSuccessAndFinish is a concise helper for success cases with metrics +func TrackSuccessAndFinish(span oteltrace.Span, start time.Time, resultCount int, attrs ...attribute.KeyValue) { + tracing.TrackSuccessAndFinish(span, start, resultCount, attrs...) +} + +// TrackedOperation provides ultra-concise tracing for business operations +type TrackedOperation = tracing.TrackedOperation + +// TrackOperation starts a tracked operation with intelligent sampling +func TrackOperation(ctx context.Context, operationType, name string, attrs ...attribute.KeyValue) (context.Context, *TrackedOperation) { + return tracing.TrackOperation(ctx, operationType, name, attrs...) +} + +// TrackOperationWithError starts a tracked operation, indicating it's for error handling +func TrackOperationWithError(ctx context.Context, operationType, name string, attrs ...attribute.KeyValue) (context.Context, *TrackedOperation) { + return tracing.TrackOperationWithError(ctx, operationType, name, attrs...) +} + +// GENERIC EVENT SYSTEM - New flexible business tracking + +// Event represents a generic business event with flexible attributes +type Event = business.Event + +// NewEvent creates a new business event +var NewEvent = business.NewEvent + +// TrackEvent is a convenience function for creating and tracking an event in one call +var TrackEvent = business.TrackEvent + +// Common event builders for typical business scenarios +var ( + DataOperationEvent = business.DataOperationEvent + UserActivityEvent = business.UserActivityEvent + QueryPerformanceEvent = business.QueryPerformanceEvent + ErrorPatternEvent = business.ErrorPatternEvent + ResourceUsageEvent = business.ResourceUsageEvent +) + +// BUSINESS TRACKING FUNCTIONS - Direct compatibility with original API +// NOTE: These are tvzr-specific and deprecated for new projects. Use the Event system above. + +// TrackScopeOperation tracks tenant scope management operations +func TrackScopeOperation(ctx context.Context, operation, scopeKey string, success bool, recordsAffected int) { + business.TrackScopeOperation(ctx, operation, scopeKey, success, recordsAffected) +} + +// TrackDataModelChanges tracks schema/model definition changes +func TrackDataModelChanges(ctx context.Context, modelName, changeType string, fieldCount, recordsAffected int, success bool) { + business.TrackDataModelChanges(ctx, modelName, changeType, fieldCount, recordsAffected, success) +} + +// TrackFieldOperation tracks field-level operations (type changes, deletions, etc.) +func TrackFieldOperation(ctx context.Context, modelName, fieldName, operation string, success bool, valuesAffected int) { + business.TrackFieldOperation(ctx, modelName, fieldName, operation, success, valuesAffected) +} + +// TrackDataOperationResult tracks high-level data operations (save, find, delete) +func TrackDataOperationResult(ctx context.Context, operation, scopeKey string, modelCount, recordsAffected int, success bool, dryRun bool) { + business.TrackDataOperationResult(ctx, operation, scopeKey, modelCount, recordsAffected, success, dryRun) +} + +// TrackQueryPerformance tracks database query performance metrics +func TrackQueryPerformance(ctx context.Context, queryName string, durationMs int64, resultCount int, success bool, cached bool) { + business.TrackQueryPerformance(ctx, queryName, durationMs, resultCount, success, cached) +} + +// TrackResourceUsage tracks system resource utilization +func TrackResourceUsage(ctx context.Context, operation string, memoryMB, cpuPercent float64, connectionCount int) { + business.TrackResourceUsage(ctx, operation, memoryMB, cpuPercent, connectionCount) +} + +// TrackUserActivity tracks customer/user activity patterns +func TrackUserActivity(ctx context.Context, operation, scopeKey string, inputSizeKB int, responseTimeMs int64, success bool) { + business.TrackUserActivity(ctx, operation, scopeKey, inputSizeKB, responseTimeMs, success) +} + +// TrackErrorPattern tracks error patterns for analysis +func TrackErrorPattern(ctx context.Context, errorType, errorCode, operation, component string, frequency int) { + business.TrackErrorPattern(ctx, errorType, errorCode, operation, component, frequency) +} + +// TYPE ALIASES for compatibility +type Config = config.Config +type SamplingConfig = config.SamplingConfig +type SamplingManager = sampling.Manager + +// CONFIGURATION FUNCTIONS for compatibility +var DefaultConfig = config.DefaultConfig diff --git a/providers/honeycomb.go b/providers/honeycomb.go new file mode 100644 index 0000000..3692c4e --- /dev/null +++ b/providers/honeycomb.go @@ -0,0 +1,111 @@ +package providers + +import ( + "context" + "fmt" + "time" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" + "go.opentelemetry.io/otel/sdk/resource" + "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.21.0" + + "github.com/bold-minds/obs/config" +) + +// HoneycombProvider implements the Provider interface for Honeycomb +type HoneycombProvider struct { + config config.Config + tp *trace.TracerProvider +} + +// Provider defines the interface for observability providers +type Provider interface { + Init(ctx context.Context) error + Shutdown(ctx context.Context) error + GetTracerProvider() *trace.TracerProvider +} + +// NewHoneycombProvider creates a new Honeycomb provider +func NewHoneycombProvider(config config.Config) *HoneycombProvider { + return &HoneycombProvider{ + config: config, + } +} + +// Init initializes the Honeycomb provider +func (h *HoneycombProvider) Init(ctx context.Context) error { + if !h.config.Enabled { + // Return a no-op tracer provider when disabled + h.tp = trace.NewTracerProvider() + return nil + } + + if err := h.config.Validate(); err != nil { + return fmt.Errorf("invalid observability config: %w", err) + } + + // Create OTLP HTTP exporter for Honeycomb + exporter, err := otlptracehttp.New(ctx, + otlptracehttp.WithEndpoint("https://api.honeycomb.io"), + otlptracehttp.WithHeaders(map[string]string{ + "x-honeycomb-team": h.config.APIKey, + "x-honeycomb-dataset": h.config.Dataset, + }), + otlptracehttp.WithTimeout(10*time.Second), + ) + if err != nil { + return fmt.Errorf("failed to create OTLP exporter: %w", err) + } + + // Create resource with service information. + // If this fails, shut down the exporter we already created. + res, err := resource.New(ctx, + resource.WithAttributes( + semconv.ServiceNameKey.String(h.config.ServiceName), + semconv.ServiceVersionKey.String(h.config.Version), + semconv.DeploymentEnvironmentKey.String(h.config.Environment), + ), + ) + if err != nil { + _ = exporter.Shutdown(context.Background()) + return fmt.Errorf("failed to create resource: %w", err) + } + + // Create tracer provider with batch span processor + // Use TraceSampleRate as the base sampling rate + sampleRatio := float64(h.config.Sampling.TraceSampleRate) / 100.0 + h.tp = trace.NewTracerProvider( + trace.WithBatcher(exporter, + trace.WithBatchTimeout(5*time.Second), + trace.WithMaxExportBatchSize(512), + ), + trace.WithResource(res), + trace.WithSampler(trace.TraceIDRatioBased(sampleRatio)), + ) + + // Set as global tracer provider + otel.SetTracerProvider(h.tp) + + return nil +} + +// Shutdown gracefully shuts down the provider. The caller controls +// the deadline via ctx; this method does not add its own timeout. +func (h *HoneycombProvider) Shutdown(ctx context.Context) error { + if h.tp == nil { + return nil + } + + if err := h.tp.Shutdown(ctx); err != nil { + return fmt.Errorf("error shutting down tracer provider: %w", err) + } + h.tp = nil + return nil +} + +// GetTracerProvider returns the tracer provider +func (h *HoneycombProvider) GetTracerProvider() *trace.TracerProvider { + return h.tp +} diff --git a/sampling/export_test.go b/sampling/export_test.go new file mode 100644 index 0000000..f8d5c98 --- /dev/null +++ b/sampling/export_test.go @@ -0,0 +1,8 @@ +package sampling + +// ResetGlobal resets the global sampling manager for test isolation. +func ResetGlobal() { + globalMu.Lock() + defer globalMu.Unlock() + samplingManager = nil +} diff --git a/sampling/manager.go b/sampling/manager.go new file mode 100644 index 0000000..8676d6f --- /dev/null +++ b/sampling/manager.go @@ -0,0 +1,100 @@ +package sampling + +import ( + "math/rand/v2" + "strings" + "sync" + + "github.com/bold-minds/obs/config" +) + +// Global sampling manager instance. +var ( + globalMu sync.RWMutex + samplingManager *Manager +) + +// Manager handles intelligent sampling decisions based on operation type. +type Manager struct { + config config.SamplingConfig + mu sync.RWMutex +} + +// Init initializes the global sampling manager if it has not been set. +// Subsequent calls are no-ops. Use SetManager to replace the manager. +func Init(cfg config.SamplingConfig) { + globalMu.Lock() + defer globalMu.Unlock() + if samplingManager == nil { + samplingManager = &Manager{config: cfg} + } +} + +// SetManager replaces the global sampling manager with a new one. +func SetManager(cfg config.SamplingConfig) { + globalMu.Lock() + defer globalMu.Unlock() + samplingManager = &Manager{config: cfg} +} + +// GetManager returns the global sampling manager. +func GetManager() *Manager { + globalMu.RLock() + defer globalMu.RUnlock() + return samplingManager +} + +// ShouldSample determines if an operation should be sampled based on type and context. +func (sm *Manager) ShouldSample(operationType, operationName string, isError bool) bool { + if sm == nil { + return true // Default to sampling if not initialized + } + + sm.mu.RLock() + defer sm.mu.RUnlock() + + // Always sample errors if ErrorSampleRate > 0 + if isError && sm.config.ErrorSampleRate > 0 { + return shouldSampleAtRate(sm.config.ErrorSampleRate) + } + + // Check for high-volume endpoints first + for _, endpoint := range sm.config.HighVolumeEndpoints { + if strings.EqualFold(operationName, endpoint) { + return shouldSampleAtRate(sm.config.HighVolumeSampleRate) + } + } + + // Sample based on operation type + switch strings.ToLower(operationType) { + case "db", "store", "query": + return shouldSampleAtRate(sm.config.DatabaseSampleRate) + case "user_event", "analytics": + return shouldSampleAtRate(sm.config.UserEventSampleRate) + default: + return shouldSampleAtRate(sm.config.TraceSampleRate) + } +} + +// shouldSampleAtRate returns true if we should sample at the given rate (1-100). +// Uses the thread-safe global rand from math/rand/v2. +func shouldSampleAtRate(rate int) bool { + if rate <= 0 { + return false + } + if rate >= 100 { + return true + } + return rand.IntN(100) < rate +} + +// UpdateConfig updates the sampling configuration at runtime. +func (sm *Manager) UpdateConfig(cfg config.SamplingConfig) { + if sm == nil { + return + } + + sm.mu.Lock() + defer sm.mu.Unlock() + sm.config = cfg +} diff --git a/sampling/manager_test.go b/sampling/manager_test.go new file mode 100644 index 0000000..07a3f11 --- /dev/null +++ b/sampling/manager_test.go @@ -0,0 +1,168 @@ +package sampling + +import ( + "sync" + "testing" + + "github.com/bold-minds/obs/config" +) + +func cfg(trace, errRate, db, userEvent, highVolume int, endpoints ...string) config.SamplingConfig { + return config.SamplingConfig{ + TraceSampleRate: trace, + ErrorSampleRate: errRate, + DatabaseSampleRate: db, + UserEventSampleRate: userEvent, + HighVolumeSampleRate: highVolume, + HighVolumeEndpoints: endpoints, + } +} + +// TestShouldSample_ConcurrentAccess exercises ShouldSample from 100 goroutines +// simultaneously to verify there is no data race on the RNG. +func TestShouldSample_ConcurrentAccess(t *testing.T) { + ResetGlobal() + Init(cfg(50, 100, 50, 50, 10)) + + sm := GetManager() + var wg sync.WaitGroup + for i := range 100 { + wg.Add(1) + go func(i int) { + defer wg.Done() + for range 1000 { + // Just verify no panic/race; discard result. + _ = sm.ShouldSample("http", "someOp", false) + } + }(i) + } + wg.Wait() +} + +// TestShouldSample_ErrorPriority verifies that errors are always sampled when +// ErrorSampleRate is 100, even if TraceSampleRate is 1. +func TestShouldSample_ErrorPriority(t *testing.T) { + ResetGlobal() + Init(cfg(1, 100, 1, 1, 1)) + + sm := GetManager() + for range 100 { + if !sm.ShouldSample("http", "someOp", true) { + t.Fatal("expected error to always be sampled when ErrorSampleRate=100") + } + } +} + +// TestShouldSample_HighVolumeEndpoints verifies endpoints in the list use +// HighVolumeSampleRate (0 here → never sampled) rather than TraceSampleRate (100). +func TestShouldSample_HighVolumeEndpoints(t *testing.T) { + ResetGlobal() + // TraceSampleRate=100 but HighVolumeSampleRate=0 → high-volume endpoints never sampled. + Init(cfg(100, 100, 100, 100, 0, "Find", "FetchScope")) + + sm := GetManager() + for range 100 { + if sm.ShouldSample("http", "Find", false) { + t.Fatal("expected high-volume endpoint to never be sampled when HighVolumeSampleRate=0") + } + if sm.ShouldSample("http", "fetchscope", false) { // case-insensitive + t.Fatal("expected case-insensitive high-volume endpoint to never be sampled") + } + } + // Non-listed endpoint should use TraceSampleRate=100 → always sampled. + for range 20 { + if !sm.ShouldSample("http", "Other", false) { + t.Fatal("expected non-high-volume endpoint to always be sampled when TraceSampleRate=100") + } + } +} + +// TestShouldSample_OperationTypeRouting verifies db/store/query use DatabaseSampleRate +// and user_event/analytics use UserEventSampleRate. +func TestShouldSample_OperationTypeRouting(t *testing.T) { + ResetGlobal() + // DatabaseSampleRate=0, UserEventSampleRate=0, TraceSampleRate=100 + Init(cfg(100, 100, 0, 0, 100)) + + sm := GetManager() + dbTypes := []string{"db", "store", "query", "DB", "STORE", "Query"} + for _, opType := range dbTypes { + for range 20 { + if sm.ShouldSample(opType, "op", false) { + t.Fatalf("expected opType %q to never be sampled when DatabaseSampleRate=0", opType) + } + } + } + + userTypes := []string{"user_event", "analytics", "USER_EVENT", "Analytics"} + for _, opType := range userTypes { + for range 20 { + if sm.ShouldSample(opType, "op", false) { + t.Fatalf("expected opType %q to never be sampled when UserEventSampleRate=0", opType) + } + } + } + + // Default type should use TraceSampleRate=100 → always sampled. + for range 20 { + if !sm.ShouldSample("http", "op", false) { + t.Fatal("expected default opType to always be sampled when TraceSampleRate=100") + } + } +} + +// TestShouldSample_NilManager verifies a nil *Manager returns true (default-sample). +func TestShouldSample_NilManager(t *testing.T) { + var sm *Manager + if !sm.ShouldSample("http", "op", false) { + t.Fatal("expected nil manager to return true") + } + if !sm.ShouldSample("http", "op", true) { + t.Fatal("expected nil manager to return true for errors too") + } +} + +// TestShouldSampleAtRate_Boundaries verifies that rate=0 never samples and +// rate=100 always samples. +func TestShouldSampleAtRate_Boundaries(t *testing.T) { + for range 1000 { + if shouldSampleAtRate(0) { + t.Fatal("rate=0 should never sample") + } + } + for range 1000 { + if !shouldSampleAtRate(100) { + t.Fatal("rate=100 should always sample") + } + } +} + +// TestUpdateConfig verifies that UpdateConfig takes effect immediately so that +// subsequent ShouldSample calls respect the new 0% trace rate. +func TestUpdateConfig(t *testing.T) { + ResetGlobal() + Init(cfg(100, 100, 100, 100, 100)) + + sm := GetManager() + + // Confirm we're sampling before the update. + sampled := false + for range 20 { + if sm.ShouldSample("http", "op", false) { + sampled = true + break + } + } + if !sampled { + t.Fatal("expected at least one sample before config update") + } + + // Update to 0% trace rate (and 0% error rate so errors are not sampled via that path). + sm.UpdateConfig(cfg(0, 0, 0, 0, 0)) + + for range 1000 { + if sm.ShouldSample("http", "op", false) { + t.Fatal("expected no sampling after UpdateConfig to 0%") + } + } +} diff --git a/tracing/operation.go b/tracing/operation.go new file mode 100644 index 0000000..cea6b19 --- /dev/null +++ b/tracing/operation.go @@ -0,0 +1,66 @@ +package tracing + +import ( + "context" + "time" + + "go.opentelemetry.io/otel/attribute" + oteltrace "go.opentelemetry.io/otel/trace" +) + +// TrackedOperation provides ultra-concise tracing for business operations +type TrackedOperation struct { + span oteltrace.Span + start time.Time +} + +// TrackOperation starts a tracked operation with intelligent sampling +func TrackOperation(ctx context.Context, operationType, name string, attrs ...attribute.KeyValue) (context.Context, *TrackedOperation) { + // Use sampling-aware span creation (assume not an error initially) + ctx, span := StartSpanWithSampling(ctx, operationType, name, false) + if len(attrs) > 0 { + span.SetAttributes(attrs...) + } + return ctx, &TrackedOperation{ + span: span, + start: time.Now(), + } +} + +// TrackOperationWithError starts a tracked operation, indicating it's for error handling +func TrackOperationWithError(ctx context.Context, operationType, name string, attrs ...attribute.KeyValue) (context.Context, *TrackedOperation) { + // Use sampling-aware span creation with error flag for higher sampling rate + ctx, span := StartSpanWithSampling(ctx, operationType, name, true) + if len(attrs) > 0 { + span.SetAttributes(attrs...) + } + return ctx, &TrackedOperation{ + span: span, + start: time.Now(), + } +} + +// Error records an error and finishes the operation +func (op *TrackedOperation) Error(err error, errorType string, attrs ...attribute.KeyValue) { + TrackErrorAndFinish(op.span, op.start, err, errorType, attrs...) +} + +// Success records success metrics and finishes the operation +func (op *TrackedOperation) Success(resultCount int, attrs ...attribute.KeyValue) { + TrackSuccessAndFinish(op.span, op.start, resultCount, attrs...) +} + +// AddAttributes adds attributes to the ongoing operation +func (op *TrackedOperation) AddAttributes(attrs ...attribute.KeyValue) { + op.span.SetAttributes(attrs...) +} + +// Span returns the underlying span for advanced usage +func (op *TrackedOperation) Span() oteltrace.Span { + return op.span +} + +// Duration returns the elapsed time since the operation started +func (op *TrackedOperation) Duration() time.Duration { + return time.Since(op.start) +} diff --git a/tracing/operation_test.go b/tracing/operation_test.go new file mode 100644 index 0000000..8f4d1e7 --- /dev/null +++ b/tracing/operation_test.go @@ -0,0 +1,131 @@ +package tracing + +import ( + "context" + "fmt" + "testing" + "time" + + "go.opentelemetry.io/otel/attribute" +) + +// TestTrackOperation_ReturnsNonNil verifies that TrackOperation returns a non-nil +// TrackedOperation that can be used to record success. +func TestTrackOperation_ReturnsNonNil(t *testing.T) { + ctx := context.Background() + _, op := TrackOperation(ctx, "test", "operation") + if op == nil { + t.Fatal("TrackOperation returned nil operation") + } + + // Verify we can call Success without panicking + defer func() { + if r := recover(); r != nil { + t.Fatalf("Success panicked: %v", r) + } + }() + op.Success(1) +} + +// TestTrackOperation_WithAttributes verifies that TrackOperation accepts multiple +// attributes of different types and does not panic when Success is called. +func TestTrackOperation_WithAttributes(t *testing.T) { + ctx := context.Background() + _, op := TrackOperation( + ctx, + "test", + "operation", + attribute.String("key1", "value1"), + attribute.Int("key2", 42), + ) + if op == nil { + t.Fatal("TrackOperation returned nil operation") + } + + // Verify Success does not panic with attributes + defer func() { + if r := recover(); r != nil { + t.Fatalf("Success panicked: %v", r) + } + }() + op.Success(1) +} + +// TestTrackOperationWithError_ReturnsNonNil verifies that TrackOperationWithError +// returns a non-nil TrackedOperation that can record errors. +func TestTrackOperationWithError_ReturnsNonNil(t *testing.T) { + ctx := context.Background() + _, op := TrackOperationWithError(ctx, "test", "operation") + if op == nil { + t.Fatal("TrackOperationWithError returned nil operation") + } + + // Verify we can call Error without panicking + defer func() { + if r := recover(); r != nil { + t.Fatalf("Error panicked: %v", r) + } + }() + op.Error(fmt.Errorf("test"), "test_error") +} + +// TestTrackedOperation_Duration verifies that Duration() returns the elapsed time +// since the operation was started, and that it's at least the time we slept. +func TestTrackedOperation_Duration(t *testing.T) { + ctx := context.Background() + _, op := TrackOperation(ctx, "test", "operation") + if op == nil { + t.Fatal("TrackOperation returned nil operation") + } + + // Sleep for 10ms + time.Sleep(10 * time.Millisecond) + + // Verify Duration is at least 10ms + duration := op.Duration() + if duration < 10*time.Millisecond { + t.Fatalf("Duration() returned %v, expected >= 10ms", duration) + } + + // Clean up the operation + op.Success(1) +} + +// TestTrackedOperation_Span verifies that Span() returns a non-nil span +// for advanced usage. +func TestTrackedOperation_Span(t *testing.T) { + ctx := context.Background() + _, op := TrackOperation(ctx, "test", "operation") + if op == nil { + t.Fatal("TrackOperation returned nil operation") + } + + span := op.Span() + if span == nil { + t.Fatal("Span() returned nil") + } + + // Clean up the operation + op.Success(1) +} + +// TestTrackedOperation_AddAttributes verifies that AddAttributes can add additional +// attributes to an ongoing operation without panicking. +func TestTrackedOperation_AddAttributes(t *testing.T) { + ctx := context.Background() + _, op := TrackOperation(ctx, "test", "operation") + if op == nil { + t.Fatal("TrackOperation returned nil operation") + } + + // Verify AddAttributes does not panic + defer func() { + if r := recover(); r != nil { + t.Fatalf("AddAttributes panicked: %v", r) + } + }() + op.AddAttributes(attribute.String("dynamic_key", "dynamic_value")) + + // Clean up the operation + op.Success(1) +} diff --git a/tracing/tracing.go b/tracing/tracing.go new file mode 100644 index 0000000..b9f213c --- /dev/null +++ b/tracing/tracing.go @@ -0,0 +1,224 @@ +package tracing + +import ( + "context" + "fmt" + "runtime" + "time" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + oteltrace "go.opentelemetry.io/otel/trace" + "go.opentelemetry.io/otel/trace/noop" + + "github.com/bold-minds/obs/internal" + "github.com/bold-minds/obs/sampling" +) + +// StartSpan creates a new span with the given name and component +func StartSpan(ctx context.Context, component, operation string) (context.Context, oteltrace.Span) { + tracer := GetTracer(component) + return tracer.Start(ctx, fmt.Sprintf("%s.%s", component, operation)) +} + +// StartSpanWithSampling creates a span with intelligent sampling based on operation type +func StartSpanWithSampling(ctx context.Context, component, operation string, isError bool) (context.Context, oteltrace.Span) { + // Check if we should sample this operation + if sm := sampling.GetManager(); sm != nil { + if !sm.ShouldSample(component, operation, isError) { + // Return a proper no-op span so the parent span is not polluted + // with attributes meant for the skipped operation. + return noop.NewTracerProvider().Tracer("").Start(ctx, fmt.Sprintf("%s.%s", component, operation)) + } + } + return StartSpan(ctx, component, operation) +} + +// GetTracer returns a tracer for the given component name +func GetTracer(name string) oteltrace.Tracer { + return otel.Tracer(fmt.Sprintf("obs.%s", name)) +} + +// TraceError records an error on the span with structured attributes +func TraceError(span oteltrace.Span, err error, operation string) { + if err == nil { + return + } + + span.RecordError(err) + span.SetStatus(codes.Error, operation+" failed") + + // Add structured error attributes for analysis + if exErr, ok := err.(internal.Exception); ok { + span.SetAttributes( + attribute.String("error.type", "exception"), + attribute.Int("error.code", int(exErr.Code())), + attribute.String("error.message", exErr.Message()), + attribute.String("error.operation", operation), + attribute.String("error.category", "business_logic"), + ) + } else { + span.SetAttributes( + attribute.String("error.type", "generic"), + attribute.String("error.message", err.Error()), + attribute.String("error.operation", operation), + attribute.String("error.category", "system"), + ) + } +} + +// AddBusinessContext adds common business context attributes to a span +func AddBusinessContext(span oteltrace.Span, scopeKey string, modelCount int, operation string) { + span.SetAttributes( + attribute.String("business.scope_key", scopeKey), + attribute.Int("business.model_count", modelCount), + attribute.String("business.operation", operation), + attribute.String("business.category", "data_operation"), + ) +} + +// AddDatabaseContext adds database operation context to a span. +// The dbSystem parameter should identify the database (e.g. "neo4j", "postgres"). +// WARNING: cypher/statement may contain sensitive data — sanitise before passing. +func AddDatabaseContext(span oteltrace.Span, dbSystem, queryName, statement string, paramCount int) { + span.SetAttributes( + attribute.String("db.system", dbSystem), + attribute.String("db.operation", queryName), + attribute.String("db.statement", statement), + attribute.Int("db.param_count", paramCount), + ) +} + +// AddPerformanceMetrics adds performance-related attributes to a span +func AddPerformanceMetrics(span oteltrace.Span, duration time.Duration, resultCount int) { + span.SetAttributes( + attribute.Int64("performance.duration_ms", duration.Milliseconds()), + attribute.Int("performance.result_count", resultCount), + ) +} + +// AddMemoryMetrics adds current memory usage metrics to a span +func AddMemoryMetrics(span oteltrace.Span) { + var m runtime.MemStats + runtime.ReadMemStats(&m) + + span.SetAttributes( + attribute.Int64("memory.heap_bytes", int64(m.HeapInuse)), + attribute.Int64("memory.sys_bytes", int64(m.Sys)), + attribute.Int64("memory.gc_count", int64(m.NumGC)), + ) +} + +// TrackAPIOperation creates a span for API-level operations with rich context +func TrackAPIOperation(ctx context.Context, operation string, inputSize int) (context.Context, oteltrace.Span) { + ctx, span := StartSpan(ctx, "api", operation) + + span.SetAttributes( + attribute.String("api.operation", operation), + attribute.Int("api.input_size_bytes", inputSize), + attribute.String("api.category", "customer_facing"), + ) + + AddMemoryMetrics(span) + return ctx, span +} + +// TrackDatabaseOperation creates a span for database-level operations. +func TrackDatabaseOperation(ctx context.Context, operation, queryName string, querySize int) (context.Context, oteltrace.Span) { + ctx, span := StartSpan(ctx, "db", operation) + + span.SetAttributes( + attribute.String("db.operation", operation), + ) + + if queryName != "" { + span.SetAttributes(attribute.String("db.statement.name", queryName)) + } + + if querySize > 0 { + span.SetAttributes(attribute.Int("db.statement.size_bytes", querySize)) + } + + return ctx, span +} + +// TrackDataOperation creates a span for data service operations +func TrackDataOperation(ctx context.Context, operation, scopeKey string, modelCount int, dryRun bool) (context.Context, oteltrace.Span) { + ctx, span := StartSpan(ctx, "data", operation) + + span.SetAttributes( + attribute.String("data.operation", operation), + attribute.String("data.scope_key", scopeKey), + attribute.Int("data.model_count", modelCount), + attribute.Bool("data.dry_run", dryRun), + attribute.String("data.category", "business_logic"), + ) + + return ctx, span +} + +// TrackStoreOperation creates a span for store/database operations +func TrackStoreOperation(ctx context.Context, operation string) (context.Context, oteltrace.Span) { + ctx, span := StartSpan(ctx, "store", operation) + + span.SetAttributes( + attribute.String("store.operation", operation), + attribute.String("store.category", "infrastructure"), + ) + + return ctx, span +} + +// TrackQueryExecution creates a span for individual query execution. +// WARNING: The statement string is recorded as a span attribute. Sanitise it +// before passing if it may contain sensitive data. +func TrackQueryExecution(ctx context.Context, queryName, statement string, params map[string]any) (context.Context, oteltrace.Span) { + ctx, span := StartSpan(ctx, "query", "exec") + + span.SetAttributes( + attribute.String("query.name", queryName), + attribute.String("query.statement", statement), + attribute.Int("query.param_count", len(params)), + ) + + return ctx, span +} + +// FinishSpanWithResult finishes a span with timing and result metrics. +// It calls span.End() — do not call End() separately after this. +func FinishSpanWithResult(span oteltrace.Span, duration time.Duration, resultCount int, err error) { + span.SetAttributes( + attribute.Int64("duration_ms", duration.Milliseconds()), + ) + + if err != nil { + span.SetStatus(codes.Error, err.Error()) + span.SetAttributes(attribute.Bool("operation.success", false)) + } else { + span.SetStatus(codes.Ok, "") + span.SetAttributes( + attribute.Bool("operation.success", true), + attribute.Int("result.count", resultCount), + ) + } + + span.End() +} + +// TrackErrorAndFinish is a concise helper for error cases with optional attributes +func TrackErrorAndFinish(span oteltrace.Span, start time.Time, err error, errorType string, attrs ...attribute.KeyValue) { + TraceError(span, err, errorType) + if len(attrs) > 0 { + span.SetAttributes(attrs...) + } + FinishSpanWithResult(span, time.Since(start), 0, err) +} + +// TrackSuccessAndFinish is a concise helper for success cases with metrics +func TrackSuccessAndFinish(span oteltrace.Span, start time.Time, resultCount int, attrs ...attribute.KeyValue) { + if len(attrs) > 0 { + span.SetAttributes(attrs...) + } + FinishSpanWithResult(span, time.Since(start), resultCount, nil) +} diff --git a/tracing/tracing_test.go b/tracing/tracing_test.go new file mode 100644 index 0000000..f050c7d --- /dev/null +++ b/tracing/tracing_test.go @@ -0,0 +1,95 @@ +package tracing + +import ( + "context" + "fmt" + "testing" + + "go.opentelemetry.io/otel/trace/noop" +) + +// TestNoopSpan_IsNotRecording verifies that a span created by the noop tracer +// provider has IsRecording() == false. This proves the fix is mechanically correct: +// when StartSpanWithSampling uses noop.NewTracerProvider(), the returned span will +// not be recording, and therefore will not accumulate attributes meant for the +// skipped operation onto the parent span. +func TestNoopSpan_IsNotRecording(t *testing.T) { + ctx := context.Background() + _, span := noop.NewTracerProvider().Tracer("").Start(ctx, "test.op") + if span.IsRecording() { + t.Fatal("noop span must not be recording") + } +} + +// TestStartSpan_SpanStoredInContext verifies that the context returned by StartSpan +// contains the new span (i.e. the span is a child context carrier). +func TestStartSpan_SpanStoredInContext(t *testing.T) { + ctx := context.Background() + childCtx, span := StartSpan(ctx, "test", "operation") + if span == nil { + t.Fatal("StartSpan returned nil span") + } + // The returned context must be distinct from the parent when a span is active. + if childCtx == ctx { + t.Fatal("StartSpan did not produce a new context") + } +} + +// TestTraceError_Nil verifies that TraceError does not panic when err is nil. +func TestTraceError_Nil(t *testing.T) { + _, span := noop.NewTracerProvider().Tracer("").Start(context.Background(), "test") + defer func() { + if r := recover(); r != nil { + t.Fatalf("TraceError panicked on nil error: %v", r) + } + }() + TraceError(span, nil, "op") +} + +// TestTraceError_GenericError verifies that TraceError does not panic on a plain error. +func TestTraceError_GenericError(t *testing.T) { + _, span := noop.NewTracerProvider().Tracer("").Start(context.Background(), "test") + defer func() { + if r := recover(); r != nil { + t.Fatalf("TraceError panicked on generic error: %v", r) + } + }() + TraceError(span, fmt.Errorf("something went wrong"), "op") +} + +// TestAddBusinessContext_NoopSpan verifies AddBusinessContext does not panic with a noop span. +func TestAddBusinessContext_NoopSpan(t *testing.T) { + _, span := noop.NewTracerProvider().Tracer("").Start(context.Background(), "test") + defer func() { + if r := recover(); r != nil { + t.Fatalf("AddBusinessContext panicked: %v", r) + } + }() + AddBusinessContext(span, "scope-key", 42, "read") +} + +// TestAddMemoryMetrics_NoopSpan verifies AddMemoryMetrics does not panic with a noop span. +func TestAddMemoryMetrics_NoopSpan(t *testing.T) { + _, span := noop.NewTracerProvider().Tracer("").Start(context.Background(), "test") + defer func() { + if r := recover(); r != nil { + t.Fatalf("AddMemoryMetrics panicked: %v", r) + } + }() + AddMemoryMetrics(span) +} + +// TestStartSpanWithSampling_NoManager verifies that when no sampling manager is +// initialized, StartSpanWithSampling falls through to StartSpan and returns a +// non-nil span in a new context. +func TestStartSpanWithSampling_NoManager_FallsThrough(t *testing.T) { + // When sampling.GetManager() returns nil the function takes the StartSpan path. + // We cannot reset the sampling singleton from here (it's in package sampling), + // so we just verify the function returns without panicking and produces a span. + ctx := context.Background() + childCtx, span := StartSpanWithSampling(ctx, "test", "op", false) + if span == nil { + t.Fatal("StartSpanWithSampling returned nil span") + } + _ = childCtx +}