Skip to content

Commit c9a4446

Browse files
authored
Merge pull request #8 from git-pkgs/extract-fetch-client
Add client/ and fetch/ sub-packages
2 parents 230096d + 42b1a66 commit c9a4446

15 files changed

Lines changed: 1798 additions & 354 deletions

README.md

Lines changed: 114 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# registries
22

3-
Go library for fetching package metadata from registry APIs. Supports 25 ecosystems with a unified interface.
3+
Go library for fetching package metadata from registry APIs. Supports 25 ecosystems with a unified interface. Also provides sub-packages for HTTP client usage (`client/`) and streaming artifact downloads (`fetch/`).
44

55
## Installation
66

@@ -281,22 +281,130 @@ if err != nil {
281281
}
282282
```
283283

284-
## HTTP Client
284+
## HTTP Client (`client/`)
285+
286+
The `client` sub-package provides an HTTP client with retry logic, error types, and URL building. You can use it through the top-level `registries` package or import it directly.
285287

286288
The default client includes:
287289

288290
- 30 second timeout
289-
- 5 retries with exponential backoff (50ms base)
291+
- 5 retries with exponential backoff (50ms base, 10% jitter)
290292
- Automatic retry on 429 and 5xx responses
291293

292-
Custom client:
294+
Custom client via the top-level package:
293295

294296
```go
295-
client := registries.NewClient(
297+
c := registries.NewClient(
296298
registries.WithTimeout(60 * time.Second),
297299
registries.WithMaxRetries(3),
298300
)
299-
pkg, err := registries.FetchPackageFromPURL(ctx, "pkg:npm/lodash", client)
301+
pkg, err := registries.FetchPackageFromPURL(ctx, "pkg:npm/lodash", c)
302+
```
303+
304+
Or import the sub-package directly:
305+
306+
```go
307+
import "github.com/git-pkgs/registries/client"
308+
309+
c := client.NewClient(
310+
client.WithTimeout(60 * time.Second),
311+
client.WithMaxRetries(3),
312+
)
313+
314+
// JSON decoding
315+
var data map[string]any
316+
err := c.GetJSON(ctx, "https://registry.npmjs.org/lodash", &data)
317+
318+
// Raw body
319+
body, err := c.GetBody(ctx, "https://crates.io/api/v1/crates/serde")
320+
321+
// HEAD request
322+
statusCode, err := c.Head(ctx, "https://registry.npmjs.org/lodash")
323+
```
324+
325+
## Artifact Downloads (`fetch/`)
326+
327+
The `fetch` sub-package provides streaming artifact downloads with retry, circuit breaking, DNS caching, and URL resolution.
328+
329+
### Fetching artifacts
330+
331+
```go
332+
import "github.com/git-pkgs/registries/fetch"
333+
334+
f := fetch.NewFetcher(
335+
fetch.WithMaxRetries(3),
336+
fetch.WithBaseDelay(500 * time.Millisecond),
337+
fetch.WithUserAgent("my-app/1.0"),
338+
)
339+
340+
artifact, err := f.Fetch(ctx, "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz")
341+
if err != nil {
342+
log.Fatal(err)
343+
}
344+
defer artifact.Body.Close()
345+
346+
// artifact.Body is an io.ReadCloser
347+
// artifact.Size is the content length (-1 if unknown)
348+
// artifact.ContentType and artifact.ETag are also available
349+
io.Copy(dst, artifact.Body)
350+
```
351+
352+
The fetcher uses DNS caching (5-minute refresh), connection pooling, and a 5-minute timeout suited for large artifacts. It retries on rate limits and server errors with exponential backoff and jitter.
353+
354+
### Authentication
355+
356+
Pass a function that returns auth headers per URL:
357+
358+
```go
359+
f := fetch.NewFetcher(
360+
fetch.WithAuthFunc(func(url string) (string, string) {
361+
if strings.Contains(url, "npm.pkg.github.com") {
362+
return "Authorization", "Bearer " + token
363+
}
364+
return "", ""
365+
}),
366+
)
367+
```
368+
369+
### Circuit breaker
370+
371+
Wrap a fetcher with per-host circuit breakers to avoid hammering a failing registry. The breaker trips after 5 consecutive failures and resets with exponential backoff (30s initial, 5min max).
372+
373+
```go
374+
f := fetch.NewFetcher()
375+
cbf := fetch.NewCircuitBreakerFetcher(f)
376+
377+
// Same interface as Fetcher
378+
artifact, err := cbf.Fetch(ctx, url)
379+
380+
// Check breaker states for health monitoring
381+
states := cbf.GetBreakerState()
382+
// map[string]string{"registry.npmjs.org": "closed", "crates.io": "open"}
383+
```
384+
385+
### URL resolution
386+
387+
The resolver maps ecosystem/name/version to download URLs and filenames. It uses each registry's `URLBuilder` when available, and falls back to hardcoded URL patterns for common ecosystems (npm, cargo, gem, golang, hex, pub, maven, nuget). For ecosystems with dynamic URLs (like PyPI), it fetches version metadata to find the download link.
388+
389+
```go
390+
resolver := fetch.NewResolver()
391+
392+
// Register a registry for URL building and metadata fallback
393+
reg, _ := registries.New("cargo", "", nil)
394+
resolver.RegisterRegistry(reg)
395+
396+
info, err := resolver.Resolve(ctx, "cargo", "serde", "1.0.0")
397+
// info.URL = "https://static.crates.io/crates/serde/serde-1.0.0.crate"
398+
// info.Filename = "serde-1.0.0.crate"
399+
// info.Integrity may be populated for metadata-resolved URLs
400+
```
401+
402+
The resolver also works without a registered registry for ecosystems with predictable URL patterns:
403+
404+
```go
405+
resolver := fetch.NewResolver()
406+
info, _ := resolver.Resolve(ctx, "npm", "lodash", "4.17.21")
407+
// info.URL = "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz"
300408
```
301409

302410
## Private Registries

client/client.go

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
// Package client provides an HTTP client with retry logic for registry APIs.
2+
package client
3+
4+
import (
5+
"context"
6+
"encoding/json"
7+
"io"
8+
"math"
9+
"math/rand"
10+
"net/http"
11+
"strconv"
12+
"time"
13+
)
14+
15+
// RateLimiter controls request pacing.
16+
type RateLimiter interface {
17+
Wait(ctx context.Context) error
18+
}
19+
20+
// Client is an HTTP client with retry logic for registry APIs.
21+
type Client struct {
22+
HTTPClient *http.Client
23+
UserAgent string
24+
MaxRetries int
25+
BaseDelay time.Duration
26+
RateLimiter RateLimiter
27+
}
28+
29+
// DefaultClient returns a client with sensible defaults.
30+
func DefaultClient() *Client {
31+
return &Client{
32+
HTTPClient: &http.Client{
33+
Timeout: 30 * time.Second,
34+
},
35+
UserAgent: "registries",
36+
MaxRetries: 5,
37+
BaseDelay: 50 * time.Millisecond,
38+
}
39+
}
40+
41+
// GetJSON fetches a URL and decodes the JSON response into v.
42+
func (c *Client) GetJSON(ctx context.Context, url string, v any) error {
43+
body, err := c.GetBody(ctx, url)
44+
if err != nil {
45+
return err
46+
}
47+
return json.Unmarshal(body, v)
48+
}
49+
50+
// GetBody fetches a URL and returns the response body.
51+
func (c *Client) GetBody(ctx context.Context, url string) ([]byte, error) {
52+
var lastErr error
53+
54+
for attempt := 0; attempt <= c.MaxRetries; attempt++ {
55+
if attempt > 0 {
56+
delay := c.BaseDelay * time.Duration(math.Pow(2, float64(attempt-1)))
57+
jitter := time.Duration(float64(delay) * (rand.Float64() * 0.1))
58+
delay += jitter
59+
60+
select {
61+
case <-ctx.Done():
62+
return nil, ctx.Err()
63+
case <-time.After(delay):
64+
}
65+
}
66+
67+
if c.RateLimiter != nil {
68+
if err := c.RateLimiter.Wait(ctx); err != nil {
69+
return nil, err
70+
}
71+
}
72+
73+
body, err := c.doRequest(ctx, url)
74+
if err == nil {
75+
return body, nil
76+
}
77+
78+
lastErr = err
79+
80+
var httpErr *HTTPError
81+
if ok := isHTTPError(err, &httpErr); ok {
82+
if httpErr.StatusCode == 404 {
83+
return nil, err
84+
}
85+
if httpErr.StatusCode == 429 || httpErr.StatusCode >= 500 {
86+
continue
87+
}
88+
return nil, err
89+
}
90+
}
91+
92+
return nil, lastErr
93+
}
94+
95+
func (c *Client) doRequest(ctx context.Context, url string) ([]byte, error) {
96+
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
97+
if err != nil {
98+
return nil, err
99+
}
100+
101+
req.Header.Set("User-Agent", c.UserAgent)
102+
req.Header.Set("Accept", "application/json")
103+
104+
resp, err := c.HTTPClient.Do(req)
105+
if err != nil {
106+
return nil, err
107+
}
108+
defer func() { _ = resp.Body.Close() }()
109+
110+
body, err := io.ReadAll(resp.Body)
111+
if err != nil {
112+
return nil, err
113+
}
114+
115+
if resp.StatusCode >= 400 {
116+
httpErr := &HTTPError{
117+
StatusCode: resp.StatusCode,
118+
URL: url,
119+
Body: string(body),
120+
}
121+
if resp.StatusCode == 429 {
122+
if retryAfter := resp.Header.Get("Retry-After"); retryAfter != "" {
123+
if seconds, err := strconv.Atoi(retryAfter); err == nil {
124+
return nil, &RateLimitError{RetryAfter: seconds}
125+
}
126+
}
127+
}
128+
return nil, httpErr
129+
}
130+
131+
return body, nil
132+
}
133+
134+
func isHTTPError(err error, target **HTTPError) bool {
135+
if httpErr, ok := err.(*HTTPError); ok {
136+
*target = httpErr
137+
return true
138+
}
139+
return false
140+
}
141+
142+
// GetText fetches a URL and returns the response body as a string.
143+
func (c *Client) GetText(ctx context.Context, url string) (string, error) {
144+
body, err := c.GetBody(ctx, url)
145+
if err != nil {
146+
return "", err
147+
}
148+
return string(body), nil
149+
}
150+
151+
// Head sends a HEAD request and returns the status code.
152+
func (c *Client) Head(ctx context.Context, url string) (int, error) {
153+
req, err := http.NewRequestWithContext(ctx, http.MethodHead, url, nil)
154+
if err != nil {
155+
return 0, err
156+
}
157+
158+
req.Header.Set("User-Agent", c.UserAgent)
159+
160+
resp, err := c.HTTPClient.Do(req)
161+
if err != nil {
162+
return 0, err
163+
}
164+
_ = resp.Body.Close()
165+
166+
return resp.StatusCode, nil
167+
}
168+
169+
// WithRateLimiter returns a copy of the client with the given rate limiter.
170+
func (c *Client) WithRateLimiter(rl RateLimiter) *Client {
171+
copy := *c
172+
copy.RateLimiter = rl
173+
return &copy
174+
}
175+
176+
// WithUserAgent returns a copy of the client with the given user agent.
177+
func (c *Client) WithUserAgent(ua string) *Client {
178+
copy := *c
179+
copy.UserAgent = ua
180+
return &copy
181+
}
182+
183+
// Option configures a Client.
184+
type Option func(*Client)
185+
186+
// WithTimeout sets the HTTP client timeout.
187+
func WithTimeout(d time.Duration) Option {
188+
return func(c *Client) {
189+
c.HTTPClient.Timeout = d
190+
}
191+
}
192+
193+
// WithMaxRetries sets the maximum number of retries.
194+
func WithMaxRetries(n int) Option {
195+
return func(c *Client) {
196+
c.MaxRetries = n
197+
}
198+
}
199+
200+
// NewClient creates a new client with the given options.
201+
func NewClient(opts ...Option) *Client {
202+
c := DefaultClient()
203+
for _, opt := range opts {
204+
opt(c)
205+
}
206+
return c
207+
}

0 commit comments

Comments
 (0)