Skip to content
72 changes: 72 additions & 0 deletions sdk/go/ai/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ This package provides AI/LLM capabilities for the AgentField Go SDK, supporting
- ✅ **Type-Safe**: Automatic conversion from Go structs to JSON schemas
- ✅ **Functional Options**: Clean, idiomatic Go API with functional options pattern
- ✅ **Automatic Configuration**: Reads from environment variables by default
- ✅ **Rate Limiting**: Built-in exponential backoff and circuit breaker for production resilience (see Rate Limiting section below)

## Quick Start

Expand Down Expand Up @@ -233,12 +234,83 @@ if err := response.Into(&result); err != nil {
}
```

## Rate Limiting

The Go SDK includes built-in rate limiting with exponential backoff and circuit breaker patterns for production resilience.

### Configuration

Rate limiting is **enabled by default** with sensible defaults:

```go
config := ai.DefaultConfig()
// Uses default rate limiting:
// - MaxRetries: 5
// - BaseDelay: 1 second
// - MaxDelay: 30 seconds
// - JitterFactor: 0.1
// - CircuitBreakerThreshold: 5 consecutive failures
// - CircuitBreakerTimeout: 60 seconds
```

### Custom Configuration

```go
config := &ai.Config{
APIKey: os.Getenv("OPENAI_API_KEY"),
Model: "gpt-4o",

// Custom rate limiting
RateLimitMaxRetries: 10,
RateLimitBaseDelay: 500 * time.Millisecond,
RateLimitMaxDelay: 60 * time.Second,
RateLimitJitterFactor: 0.2,
CircuitBreakerThreshold: 3,
CircuitBreakerTimeout: 30 * time.Second,
}
```

### Disable Rate Limiting

```go
config := ai.DefaultConfig()
config.DisableRateLimiter = true // Disable rate limiting completely
```

### How It Works

**Exponential Backoff**: Delays increase exponentially (1s → 2s → 4s → 8s...)
**Jitter**: Adds randomness to prevent thundering herd
**Circuit Breaker**: Opens after N consecutive failures, prevents cascade
**Automatic Detection**: Identifies rate limit errors from status codes and error messages

### Thread Safety

The AI client and rate limiter are safe for concurrent use by multiple goroutines:

```go
agent, _ := agent.New(config)

// Safe to call from multiple goroutines
var wg sync.WaitGroup
for i := 0; i < 10; i++ {
wg.Add(1)
go func() {
defer wg.Done()
response, err := agent.AI(ctx, "Hello")
// Process response
}()
}
wg.Wait()
```

## Performance Considerations

1. **Connection Pooling**: The HTTP client uses connection pooling for efficient requests
2. **Context Cancellation**: Always use contexts with timeouts for AI calls
3. **Streaming**: Use streaming for long responses to improve perceived latency
4. **Model Selection**: Choose appropriate models for your use case (faster models = lower latency)
5. **Rate Limiting**: Built-in rate limiting handles API throttling automatically

## Examples

Expand Down
93 changes: 69 additions & 24 deletions sdk/go/ai/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ import (

// Client provides AI/LLM capabilities using OpenAI or OpenRouter API.
type Client struct {
config *Config
httpClient *http.Client
config *Config
httpClient *http.Client
rateLimiter *RateLimiter
}

// NewClient creates a new AI client with the given configuration.
Expand All @@ -26,8 +27,22 @@ func NewClient(config *Config) (*Client, error) {
return nil, fmt.Errorf("invalid config: %w", err)
}

// Initialize rate limiter if not disabled
var rateLimiter *RateLimiter
if !config.DisableRateLimiter {
rateLimiter = NewRateLimiter(RateLimiterConfig{
MaxRetries: config.RateLimitMaxRetries,
BaseDelay: config.RateLimitBaseDelay,
MaxDelay: config.RateLimitMaxDelay,
JitterFactor: config.RateLimitJitterFactor,
CircuitBreakerThreshold: config.CircuitBreakerThreshold,
CircuitBreakerTimeout: config.CircuitBreakerTimeout,
})
}

return &Client{
config: config,
config: config,
rateLimiter: rateLimiter,
httpClient: &http.Client{
Timeout: config.Timeout,
},
Expand All @@ -53,7 +68,13 @@ func (c *Client) Complete(ctx context.Context, prompt string, opts ...Option) (*
}
}

// Make HTTP request
// Make HTTP request with rate limiting
if c.rateLimiter != nil {
return c.rateLimiter.ExecuteWithRetry(ctx, func() (*Response, error) {
return c.doRequest(ctx, req)
})
}

return c.doRequest(ctx, req)
}

Expand All @@ -73,6 +94,13 @@ func (c *Client) CompleteWithMessages(ctx context.Context, messages []Message, o
}
}

// Make HTTP request with rate limiting
if c.rateLimiter != nil {
return c.rateLimiter.ExecuteWithRetry(ctx, func() (*Response, error) {
return c.doRequest(ctx, req)
})
}

return c.doRequest(ctx, req)
}

Expand Down Expand Up @@ -144,33 +172,50 @@ func (c *Client) doRequest(ctx context.Context, req *Request) (*Response, error)
// StreamComplete makes a streaming chat completion request.
// Returns a channel of response chunks.
func (c *Client) StreamComplete(ctx context.Context, prompt string, opts ...Option) (<-chan StreamChunk, <-chan error) {
// Build request with streaming enabled
opts = append(opts, WithStream())
req := &Request{
Messages: []Message{
{Role: "user", Content: prompt},
},
Model: c.config.Model,
Temperature: &c.config.Temperature,
MaxTokens: &c.config.MaxTokens,
Stream: true,
}

// Apply options
for _, opt := range opts {
// If option application fails, return error channels immediately
if err := opt(req); err != nil {
chunkCh := make(chan StreamChunk)
errCh := make(chan error, 1)
close(chunkCh)
errCh <- fmt.Errorf("apply option: %w", err)
close(errCh)
return chunkCh, errCh
}
}

// Use rate limiter if enabled
if c.rateLimiter != nil {
return c.rateLimiter.ExecuteStreamWithRetry(ctx, func() (<-chan StreamChunk, <-chan error) {
return c.doStreamRequest(ctx, req)
})
}

return c.doStreamRequest(ctx, req)
}

// doStreamRequest executes the streaming HTTP request.
func (c *Client) doStreamRequest(ctx context.Context, req *Request) (<-chan StreamChunk, <-chan error) {
chunkCh := make(chan StreamChunk)
errCh := make(chan error, 1)

go func() {
defer close(chunkCh)
defer close(errCh)

// Build request with streaming enabled
opts = append(opts, WithStream())
req := &Request{
Messages: []Message{
{Role: "user", Content: prompt},
},
Model: c.config.Model,
Temperature: &c.config.Temperature,
MaxTokens: &c.config.MaxTokens,
Stream: true,
}

// Apply options
for _, opt := range opts {
if err := opt(req); err != nil {
errCh <- fmt.Errorf("apply option: %w", err)
return
}
}

// Marshal request
body, err := json.Marshal(req)
if err != nil {
Expand Down
18 changes: 18 additions & 0 deletions sdk/go/ai/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,15 @@ type Config struct {

// Optional: Site name for OpenRouter rankings
SiteName string

// Rate Limiter Configuration
RateLimitMaxRetries int // Maximum number of retry attempts (default: 5)
RateLimitBaseDelay time.Duration // Base delay for exponential backoff (default: 1s)
RateLimitMaxDelay time.Duration // Maximum delay between retries (default: 30s)
RateLimitJitterFactor float64 // Jitter factor 0.0-1.0 (default: 0.1)
CircuitBreakerThreshold int // Consecutive failures before opening circuit (default: 5)
CircuitBreakerTimeout time.Duration // Time before attempting to close circuit (default: 60s)
DisableRateLimiter bool // Disable rate limiting completely (default: false)
}

// DefaultConfig returns a Config with sensible defaults.
Expand Down Expand Up @@ -67,6 +76,15 @@ func DefaultConfig() *Config {
Temperature: 0.7,
MaxTokens: 4096,
Timeout: 30 * time.Second,

// Rate Limiter Defaults
RateLimitMaxRetries: 5,
RateLimitBaseDelay: time.Second,
RateLimitMaxDelay: 30 * time.Second,
RateLimitJitterFactor: 0.1,
CircuitBreakerThreshold: 5,
CircuitBreakerTimeout: 60 * time.Second,
DisableRateLimiter: false,
}
}

Expand Down
Loading