Agent-Field · VedantMadane · Jan 25, 2026 · Jan 25, 2026 · Jan 25, 2026 · Jan 25, 2026
diff --git a/sdk/go/ai/README.md b/sdk/go/ai/README.md
@@ -10,6 +10,7 @@ This package provides AI/LLM capabilities for the AgentField Go SDK, supporting
 - ✅ **Type-Safe**: Automatic conversion from Go structs to JSON schemas
 - ✅ **Functional Options**: Clean, idiomatic Go API with functional options pattern
 - ✅ **Automatic Configuration**: Reads from environment variables by default
+- ✅ **Rate Limiting**: Built-in exponential backoff and circuit breaker for production resilience (see Rate Limiting section below)
 
 ## Quick Start
 
@@ -233,12 +234,83 @@ if err := response.Into(&result); err != nil {
 }
 ```
 
+## Rate Limiting
+
+The Go SDK includes built-in rate limiting with exponential backoff and circuit breaker patterns for production resilience.
+
+### Configuration
+
+Rate limiting is **enabled by default** with sensible defaults:
+
+```go
+config := ai.DefaultConfig()
+// Uses default rate limiting:
+// - MaxRetries: 5
+// - BaseDelay: 1 second
+// - MaxDelay: 30 seconds
+// - JitterFactor: 0.1
+// - CircuitBreakerThreshold: 5 consecutive failures
+// - CircuitBreakerTimeout: 60 seconds
+```
+
+### Custom Configuration
+
+```go
+config := &ai.Config{
+    APIKey:  os.Getenv("OPENAI_API_KEY"),
+    Model:   "gpt-4o",
+
+    // Custom rate limiting
+    RateLimitMaxRetries:         10,
+    RateLimitBaseDelay:          500 * time.Millisecond,
+    RateLimitMaxDelay:           60 * time.Second,
+    RateLimitJitterFactor:       0.2,
+    CircuitBreakerThreshold:     3,
+    CircuitBreakerTimeout:       30 * time.Second,
+}
+```
+
+### Disable Rate Limiting
+
+```go
+config := ai.DefaultConfig()
+config.DisableRateLimiter = true  // Disable rate limiting completely
+```
+
+### How It Works
+
+**Exponential Backoff**: Delays increase exponentially (1s → 2s → 4s → 8s...)
+**Jitter**: Adds randomness to prevent thundering herd
+**Circuit Breaker**: Opens after N consecutive failures, prevents cascade
+**Automatic Detection**: Identifies rate limit errors from status codes and error messages
+
+### Thread Safety
+
+The AI client and rate limiter are safe for concurrent use by multiple goroutines:
+
+```go
+agent, _ := agent.New(config)
+
+// Safe to call from multiple goroutines
+var wg sync.WaitGroup
+for i := 0; i < 10; i++ {
+    wg.Add(1)
+    go func() {
+        defer wg.Done()
+        response, err := agent.AI(ctx, "Hello")
+        // Process response
+    }()
+}
+wg.Wait()
+```
+
 ## Performance Considerations
 
 1. **Connection Pooling**: The HTTP client uses connection pooling for efficient requests
 2. **Context Cancellation**: Always use contexts with timeouts for AI calls
 3. **Streaming**: Use streaming for long responses to improve perceived latency
 4. **Model Selection**: Choose appropriate models for your use case (faster models = lower latency)
+5. **Rate Limiting**: Built-in rate limiting handles API throttling automatically
 
 ## Examples
 

diff --git a/sdk/go/ai/client.go b/sdk/go/ai/client.go
@@ -12,8 +12,9 @@ import (
 
 // Client provides AI/LLM capabilities using OpenAI or OpenRouter API.
 type Client struct {
-	config     *Config
-	httpClient *http.Client
+	config      *Config
+	httpClient  *http.Client
+	rateLimiter *RateLimiter
 }
 
 // NewClient creates a new AI client with the given configuration.
@@ -26,8 +27,22 @@ func NewClient(config *Config) (*Client, error) {
 		return nil, fmt.Errorf("invalid config: %w", err)
 	}
 
+	// Initialize rate limiter if not disabled
+	var rateLimiter *RateLimiter
+	if !config.DisableRateLimiter {
+		rateLimiter = NewRateLimiter(RateLimiterConfig{
+			MaxRetries:              config.RateLimitMaxRetries,
+			BaseDelay:               config.RateLimitBaseDelay,
+			MaxDelay:                config.RateLimitMaxDelay,
+			JitterFactor:            config.RateLimitJitterFactor,
+			CircuitBreakerThreshold: config.CircuitBreakerThreshold,
+			CircuitBreakerTimeout:   config.CircuitBreakerTimeout,
+		})
+	}
+
 	return &Client{
-		config: config,
+		config:      config,
+		rateLimiter: rateLimiter,
 		httpClient: &http.Client{
 			Timeout: config.Timeout,
 		},
@@ -53,7 +68,13 @@ func (c *Client) Complete(ctx context.Context, prompt string, opts ...Option) (*
 		}
 	}
 
-	// Make HTTP request
+	// Make HTTP request with rate limiting
+	if c.rateLimiter != nil {
+		return c.rateLimiter.ExecuteWithRetry(ctx, func() (*Response, error) {
+			return c.doRequest(ctx, req)
+		})
+	}
+
 	return c.doRequest(ctx, req)
 }
 
@@ -73,6 +94,13 @@ func (c *Client) CompleteWithMessages(ctx context.Context, messages []Message, o
 		}
 	}
 
+	// Make HTTP request with rate limiting
+	if c.rateLimiter != nil {
+		return c.rateLimiter.ExecuteWithRetry(ctx, func() (*Response, error) {
+			return c.doRequest(ctx, req)
+		})
+	}
+
 	return c.doRequest(ctx, req)
 }
 
@@ -144,33 +172,50 @@ func (c *Client) doRequest(ctx context.Context, req *Request) (*Response, error)
 // StreamComplete makes a streaming chat completion request.
 // Returns a channel of response chunks.
 func (c *Client) StreamComplete(ctx context.Context, prompt string, opts ...Option) (<-chan StreamChunk, <-chan error) {
+	// Build request with streaming enabled
+	opts = append(opts, WithStream())
+	req := &Request{
+		Messages: []Message{
+			{Role: "user", Content: prompt},
+		},
+		Model:       c.config.Model,
+		Temperature: &c.config.Temperature,
+		MaxTokens:   &c.config.MaxTokens,
+		Stream:      true,
+	}
+
+	// Apply options
+	for _, opt := range opts {
+		// If option application fails, return error channels immediately
+		if err := opt(req); err != nil {
+			chunkCh := make(chan StreamChunk)
+			errCh := make(chan error, 1)
+			close(chunkCh)
+			errCh <- fmt.Errorf("apply option: %w", err)
+			close(errCh)
+			return chunkCh, errCh
+		}
+	}
+
+	// Use rate limiter if enabled
+	if c.rateLimiter != nil {
+		return c.rateLimiter.ExecuteStreamWithRetry(ctx, func() (<-chan StreamChunk, <-chan error) {
+			return c.doStreamRequest(ctx, req)
+		})
+	}
+
+	return c.doStreamRequest(ctx, req)
+}
+
+// doStreamRequest executes the streaming HTTP request.
+func (c *Client) doStreamRequest(ctx context.Context, req *Request) (<-chan StreamChunk, <-chan error) {
 	chunkCh := make(chan StreamChunk)
 	errCh := make(chan error, 1)
 
 	go func() {
 		defer close(chunkCh)
 		defer close(errCh)
 
-		// Build request with streaming enabled
-		opts = append(opts, WithStream())
-		req := &Request{
-			Messages: []Message{
-				{Role: "user", Content: prompt},
-			},
-			Model:       c.config.Model,
-			Temperature: &c.config.Temperature,
-			MaxTokens:   &c.config.MaxTokens,
-			Stream:      true,
-		}
-
-		// Apply options
-		for _, opt := range opts {
-			if err := opt(req); err != nil {
-				errCh <- fmt.Errorf("apply option: %w", err)
-				return
-			}
-		}
-
 		// Marshal request
 		body, err := json.Marshal(req)
 		if err != nil {

diff --git a/sdk/go/ai/config.go b/sdk/go/ai/config.go
@@ -33,6 +33,15 @@ type Config struct {
 
 	// Optional: Site name for OpenRouter rankings
 	SiteName string
+
+	// Rate Limiter Configuration
+	RateLimitMaxRetries         int           // Maximum number of retry attempts (default: 5)
+	RateLimitBaseDelay          time.Duration // Base delay for exponential backoff (default: 1s)
+	RateLimitMaxDelay           time.Duration // Maximum delay between retries (default: 30s)
+	RateLimitJitterFactor       float64       // Jitter factor 0.0-1.0 (default: 0.1)
+	CircuitBreakerThreshold     int           // Consecutive failures before opening circuit (default: 5)
+	CircuitBreakerTimeout       time.Duration // Time before attempting to close circuit (default: 60s)
+	DisableRateLimiter          bool          // Disable rate limiting completely (default: false)
 }
 
 // DefaultConfig returns a Config with sensible defaults.
@@ -67,6 +76,15 @@ func DefaultConfig() *Config {
 		Temperature: 0.7,
 		MaxTokens:   4096,
 		Timeout:     30 * time.Second,
+
+		// Rate Limiter Defaults
+		RateLimitMaxRetries:         5,
+		RateLimitBaseDelay:          time.Second,
+		RateLimitMaxDelay:           30 * time.Second,
+		RateLimitJitterFactor:       0.1,
+		CircuitBreakerThreshold:     5,
+		CircuitBreakerTimeout:       60 * time.Second,
+		DisableRateLimiter:          false,
 	}
 }