Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 114 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# registries

Go library for fetching package metadata from registry APIs. Supports 25 ecosystems with a unified interface.
Go library for fetching package metadata from registry APIs. Supports 25 ecosystems with a unified interface. Also provides sub-packages for HTTP client usage (`client/`) and streaming artifact downloads (`fetch/`).

## Installation

Expand Down Expand Up @@ -281,22 +281,130 @@ if err != nil {
}
```

## HTTP Client
## HTTP Client (`client/`)

The `client` sub-package provides an HTTP client with retry logic, error types, and URL building. You can use it through the top-level `registries` package or import it directly.

The default client includes:

- 30 second timeout
- 5 retries with exponential backoff (50ms base)
- 5 retries with exponential backoff (50ms base, 10% jitter)
- Automatic retry on 429 and 5xx responses

Custom client:
Custom client via the top-level package:

```go
client := registries.NewClient(
c := registries.NewClient(
registries.WithTimeout(60 * time.Second),
registries.WithMaxRetries(3),
)
pkg, err := registries.FetchPackageFromPURL(ctx, "pkg:npm/lodash", client)
pkg, err := registries.FetchPackageFromPURL(ctx, "pkg:npm/lodash", c)
```

Or import the sub-package directly:

```go
import "github.com/git-pkgs/registries/client"

c := client.NewClient(
client.WithTimeout(60 * time.Second),
client.WithMaxRetries(3),
)

// JSON decoding
var data map[string]any
err := c.GetJSON(ctx, "https://registry.npmjs.org/lodash", &data)

// Raw body
body, err := c.GetBody(ctx, "https://crates.io/api/v1/crates/serde")

// HEAD request
statusCode, err := c.Head(ctx, "https://registry.npmjs.org/lodash")
```

## Artifact Downloads (`fetch/`)

The `fetch` sub-package provides streaming artifact downloads with retry, circuit breaking, DNS caching, and URL resolution.

### Fetching artifacts

```go
import "github.com/git-pkgs/registries/fetch"

f := fetch.NewFetcher(
fetch.WithMaxRetries(3),
fetch.WithBaseDelay(500 * time.Millisecond),
fetch.WithUserAgent("my-app/1.0"),
)

artifact, err := f.Fetch(ctx, "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz")
if err != nil {
log.Fatal(err)
}
defer artifact.Body.Close()

// artifact.Body is an io.ReadCloser
// artifact.Size is the content length (-1 if unknown)
// artifact.ContentType and artifact.ETag are also available
io.Copy(dst, artifact.Body)
```

The fetcher uses DNS caching (5-minute refresh), connection pooling, and a 5-minute timeout suited for large artifacts. It retries on rate limits and server errors with exponential backoff and jitter.

### Authentication

Pass a function that returns auth headers per URL:

```go
f := fetch.NewFetcher(
fetch.WithAuthFunc(func(url string) (string, string) {
if strings.Contains(url, "npm.pkg.github.com") {
return "Authorization", "Bearer " + token
}
return "", ""
}),
)
```

### Circuit breaker

Wrap a fetcher with per-host circuit breakers to avoid hammering a failing registry. The breaker trips after 5 consecutive failures and resets with exponential backoff (30s initial, 5min max).

```go
f := fetch.NewFetcher()
cbf := fetch.NewCircuitBreakerFetcher(f)

// Same interface as Fetcher
artifact, err := cbf.Fetch(ctx, url)

// Check breaker states for health monitoring
states := cbf.GetBreakerState()
// map[string]string{"registry.npmjs.org": "closed", "crates.io": "open"}
```

### URL resolution

The resolver maps ecosystem/name/version to download URLs and filenames. It uses each registry's `URLBuilder` when available, and falls back to hardcoded URL patterns for common ecosystems (npm, cargo, gem, golang, hex, pub, maven, nuget). For ecosystems with dynamic URLs (like PyPI), it fetches version metadata to find the download link.

```go
resolver := fetch.NewResolver()

// Register a registry for URL building and metadata fallback
reg, _ := registries.New("cargo", "", nil)
resolver.RegisterRegistry(reg)

info, err := resolver.Resolve(ctx, "cargo", "serde", "1.0.0")
// info.URL = "https://static.crates.io/crates/serde/serde-1.0.0.crate"
// info.Filename = "serde-1.0.0.crate"
// info.Integrity may be populated for metadata-resolved URLs
```

The resolver also works without a registered registry for ecosystems with predictable URL patterns:

```go
resolver := fetch.NewResolver()
info, _ := resolver.Resolve(ctx, "npm", "lodash", "4.17.21")
// info.URL = "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz"
```

## Private Registries
Expand Down
207 changes: 207 additions & 0 deletions client/client.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
// Package client provides an HTTP client with retry logic for registry APIs.
package client

import (
"context"
"encoding/json"
"io"
"math"
"math/rand"
"net/http"
"strconv"
"time"
)

// RateLimiter controls request pacing.
type RateLimiter interface {
Wait(ctx context.Context) error
}

// Client is an HTTP client with retry logic for registry APIs.
type Client struct {
HTTPClient *http.Client
UserAgent string
MaxRetries int
BaseDelay time.Duration
RateLimiter RateLimiter
}

// DefaultClient returns a client with sensible defaults.
func DefaultClient() *Client {
return &Client{
HTTPClient: &http.Client{
Timeout: 30 * time.Second,
},
UserAgent: "registries",
MaxRetries: 5,
BaseDelay: 50 * time.Millisecond,
}
}

// GetJSON fetches a URL and decodes the JSON response into v.
func (c *Client) GetJSON(ctx context.Context, url string, v any) error {
body, err := c.GetBody(ctx, url)
if err != nil {
return err
}
return json.Unmarshal(body, v)
}

// GetBody fetches a URL and returns the response body.
func (c *Client) GetBody(ctx context.Context, url string) ([]byte, error) {
var lastErr error

for attempt := 0; attempt <= c.MaxRetries; attempt++ {
if attempt > 0 {
delay := c.BaseDelay * time.Duration(math.Pow(2, float64(attempt-1)))
jitter := time.Duration(float64(delay) * (rand.Float64() * 0.1))
delay += jitter

select {
case <-ctx.Done():
return nil, ctx.Err()
case <-time.After(delay):
}
}

if c.RateLimiter != nil {
if err := c.RateLimiter.Wait(ctx); err != nil {
return nil, err
}
}

body, err := c.doRequest(ctx, url)
if err == nil {
return body, nil
}

lastErr = err

var httpErr *HTTPError
if ok := isHTTPError(err, &httpErr); ok {
if httpErr.StatusCode == 404 {
return nil, err
}
if httpErr.StatusCode == 429 || httpErr.StatusCode >= 500 {
continue
}
return nil, err
}
}

return nil, lastErr
}

func (c *Client) doRequest(ctx context.Context, url string) ([]byte, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, err
}

req.Header.Set("User-Agent", c.UserAgent)
req.Header.Set("Accept", "application/json")

resp, err := c.HTTPClient.Do(req)
if err != nil {
return nil, err
}
defer func() { _ = resp.Body.Close() }()

body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}

if resp.StatusCode >= 400 {
httpErr := &HTTPError{
StatusCode: resp.StatusCode,
URL: url,
Body: string(body),
}
if resp.StatusCode == 429 {
if retryAfter := resp.Header.Get("Retry-After"); retryAfter != "" {
if seconds, err := strconv.Atoi(retryAfter); err == nil {
return nil, &RateLimitError{RetryAfter: seconds}
}
}
}
return nil, httpErr
}

return body, nil
}

func isHTTPError(err error, target **HTTPError) bool {
if httpErr, ok := err.(*HTTPError); ok {
*target = httpErr
return true
}
return false
}

// GetText fetches a URL and returns the response body as a string.
func (c *Client) GetText(ctx context.Context, url string) (string, error) {
body, err := c.GetBody(ctx, url)
if err != nil {
return "", err
}
return string(body), nil
}

// Head sends a HEAD request and returns the status code.
func (c *Client) Head(ctx context.Context, url string) (int, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodHead, url, nil)
if err != nil {
return 0, err
}

req.Header.Set("User-Agent", c.UserAgent)

resp, err := c.HTTPClient.Do(req)
if err != nil {
return 0, err
}
_ = resp.Body.Close()

return resp.StatusCode, nil
}

// WithRateLimiter returns a copy of the client with the given rate limiter.
func (c *Client) WithRateLimiter(rl RateLimiter) *Client {
copy := *c
copy.RateLimiter = rl
return &copy
}

// WithUserAgent returns a copy of the client with the given user agent.
func (c *Client) WithUserAgent(ua string) *Client {
copy := *c
copy.UserAgent = ua
return &copy
}

// Option configures a Client.
type Option func(*Client)

// WithTimeout sets the HTTP client timeout.
func WithTimeout(d time.Duration) Option {
return func(c *Client) {
c.HTTPClient.Timeout = d
}
}

// WithMaxRetries sets the maximum number of retries.
func WithMaxRetries(n int) Option {
return func(c *Client) {
c.MaxRetries = n
}
}

// NewClient creates a new client with the given options.
func NewClient(opts ...Option) *Client {
c := DefaultClient()
for _, opt := range opts {
opt(c)
}
return c
}
Loading