diff --git a/acceptance/cmd/sandbox/list/region-unavailable/out.test.toml b/acceptance/cmd/sandbox/list/region-unavailable/out.test.toml new file mode 100644 index 0000000000..d6187dcb04 --- /dev/null +++ b/acceptance/cmd/sandbox/list/region-unavailable/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = [] diff --git a/acceptance/cmd/sandbox/list/region-unavailable/output.txt b/acceptance/cmd/sandbox/list/region-unavailable/output.txt new file mode 100644 index 0000000000..564ddd8a4f --- /dev/null +++ b/acceptance/cmd/sandbox/list/region-unavailable/output.txt @@ -0,0 +1,3 @@ +Error: failed to list sandboxes: the Databricks Sandbox feature is not available in your region, or the service is temporarily unavailable + +Exit code: 1 diff --git a/acceptance/cmd/sandbox/list/region-unavailable/script b/acceptance/cmd/sandbox/list/region-unavailable/script new file mode 100644 index 0000000000..62cc0f91e4 --- /dev/null +++ b/acceptance/cmd/sandbox/list/region-unavailable/script @@ -0,0 +1 @@ +errcode $CLI sandbox list diff --git a/acceptance/cmd/sandbox/list/region-unavailable/test.toml b/acceptance/cmd/sandbox/list/region-unavailable/test.toml new file mode 100644 index 0000000000..fc08c98d7c --- /dev/null +++ b/acceptance/cmd/sandbox/list/region-unavailable/test.toml @@ -0,0 +1,6 @@ +# A persistent 503 is retried max503Attempts times, then reported as a +# user-facing unavailability error. +[[Server]] +Pattern = "GET /api/2.0/lakebox/sandboxes" +Response.StatusCode = 503 +Response.Body = 'no healthy upstream' diff --git a/cmd/sandbox/api.go b/cmd/sandbox/api.go index 1fc05957e3..74cc444a58 100644 --- a/cmd/sandbox/api.go +++ b/cmd/sandbox/api.go @@ -2,6 +2,7 @@ package sandbox import ( "context" + "errors" "fmt" "net/http" "net/url" @@ -10,7 +11,10 @@ import ( "github.com/databricks/cli/libs/auth" "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/apierr" "github.com/databricks/databricks-sdk-go/client" + "github.com/databricks/databricks-sdk-go/config" + "github.com/databricks/databricks-sdk-go/httpclient" ) // sandboxPath returns the URL path for a single sandbox resource. The ID is @@ -36,6 +40,38 @@ const ( sandboxKeysAPIPath = sandboxAPIRoot + "/ssh-keys" ) +// max503Attempts caps attempts when the server keeps answering 503; the +// SDK's default budget would otherwise retry for up to 5 minutes. +const max503Attempts = 3 + +type attempt503CounterKey struct{} + +// arm503Budget attaches the request's 503 attempt counter; retries of one +// request run sequentially, so a plain *int suffices. +func arm503Budget(ctx context.Context) context.Context { + return context.WithValue(ctx, attempt503CounterKey{}, new(int)) +} + +// allow503Retry consumes one unit of the request's 503 budget. Unarmed +// contexts get no retries. +func allow503Retry(ctx context.Context) bool { + n, ok := ctx.Value(attempt503CounterKey{}).(*int) + if !ok { + return false + } + *n++ + return *n < max503Attempts +} + +// translateError replaces a 503 with a user-facing message; the gateway +// body adds nothing, so it is dropped rather than wrapped. +func translateError(err error) error { + if apiErr, ok := errors.AsType[*apierr.APIError](err); ok && apiErr.StatusCode == http.StatusServiceUnavailable { + return errors.New("the Databricks Sandbox feature is not available in your region, or the service is temporarily unavailable") + } + return err +} + // orgIDHeader scopes the credential to a workspace on multi-workspace // gateways. Without it, requests fail with "Credential was not sent or was // of an unsupported type for this API." @@ -176,7 +212,20 @@ type registerKeyRequest struct { // newSandboxAPI returns a sandboxAPI bound to the workspace client's config. func newSandboxAPI(w *databricks.WorkspaceClient) (*sandboxAPI, error) { - c, err := client.New(w.Config) + clientCfg, err := config.HTTPClientConfigFromConfig(w.Config) + if err != nil { + return nil, fmt.Errorf("failed to create sandbox API client: %w", err) + } + defaultRetriable := clientCfg.ErrorRetriable + // Cap 503 retries by count, not deadline, so the final 503 surfaces as + // the APIError rather than a racy context.DeadlineExceeded. + clientCfg.ErrorRetriable = func(ctx context.Context, err error) bool { + if apiErr, ok := errors.AsType[*apierr.APIError](err); ok && apiErr.StatusCode == http.StatusServiceUnavailable { + return allow503Retry(ctx) + } + return defaultRetriable(ctx, err) + } + c, err := client.NewWithClient(w.Config, httpclient.NewApiClient(clientCfg)) if err != nil { return nil, fmt.Errorf("failed to create sandbox API client: %w", err) } @@ -195,12 +244,18 @@ func (a *sandboxAPI) headers() map[string]string { return map[string]string{orgIDHeader: wsID} } +// do issues one sandbox API request: arms the 503 retry budget, attaches +// the workspace routing headers, and translates terminal errors. +func (a *sandboxAPI) do(ctx context.Context, method, path string, request, response any) error { + return translateError(a.c.Do(arm503Budget(ctx), method, path, a.headers(), nil, request, response)) +} + // create calls POST /api/2.0/lakebox/sandboxes. An empty `name` is omitted // so the server treats it as "unset" rather than "explicit empty string". func (a *sandboxAPI) create(ctx context.Context, name string) (*createResponse, error) { body := createRequest{Sandbox: sandboxCreateBody{Name: name}} var resp createResponse - err := a.c.Do(ctx, http.MethodPost, sandboxAPIPath, a.headers(), nil, body, &resp) + err := a.do(ctx, http.MethodPost, sandboxAPIPath, body, &resp) if err != nil { return nil, err } @@ -239,7 +294,7 @@ func (a *sandboxAPI) listPage(ctx context.Context, pageToken string) (*listRespo query["page_token"] = pageToken } var resp listResponse - err := a.c.Do(ctx, http.MethodGet, sandboxAPIPath, a.headers(), nil, query, &resp) + err := a.do(ctx, http.MethodGet, sandboxAPIPath, query, &resp) if err != nil { return nil, err } @@ -249,7 +304,7 @@ func (a *sandboxAPI) listPage(ctx context.Context, pageToken string) (*listRespo // get calls GET /api/2.0/lakebox/sandboxes/{id}. func (a *sandboxAPI) get(ctx context.Context, id string) (*sandboxEntry, error) { var resp sandboxEntry - err := a.c.Do(ctx, http.MethodGet, sandboxPath(id), a.headers(), nil, nil, &resp) + err := a.do(ctx, http.MethodGet, sandboxPath(id), nil, &resp) if err != nil { return nil, err } @@ -273,7 +328,7 @@ func (a *sandboxAPI) update(ctx context.Context, id string, name *string, idleTi NoAutostop: noAutostop, } var resp sandboxEntry - err := a.c.Do(ctx, http.MethodPatch, sandboxPath(id), a.headers(), nil, body, &resp) + err := a.do(ctx, http.MethodPatch, sandboxPath(id), body, &resp) if err != nil { return nil, err } @@ -282,7 +337,7 @@ func (a *sandboxAPI) update(ctx context.Context, id string, name *string, idleTi // delete calls DELETE /api/2.0/lakebox/sandboxes/{id}. func (a *sandboxAPI) delete(ctx context.Context, id string) error { - return a.c.Do(ctx, http.MethodDelete, sandboxPath(id), a.headers(), nil, nil, nil) + return a.do(ctx, http.MethodDelete, sandboxPath(id), nil, nil) } // stop calls POST /api/2.0/lakebox/sandboxes/{id}/stop and returns the @@ -290,7 +345,7 @@ func (a *sandboxAPI) delete(ctx context.Context, id string) error { func (a *sandboxAPI) stop(ctx context.Context, id string) (*sandboxEntry, error) { body := map[string]string{"sandbox_id": id} var resp sandboxEntry - err := a.c.Do(ctx, http.MethodPost, sandboxPath(id)+"/stop", a.headers(), nil, body, &resp) + err := a.do(ctx, http.MethodPost, sandboxPath(id)+"/stop", body, &resp) if err != nil { return nil, err } @@ -302,7 +357,7 @@ func (a *sandboxAPI) stop(ctx context.Context, id string) (*sandboxEntry, error) func (a *sandboxAPI) start(ctx context.Context, id string) (*sandboxEntry, error) { body := map[string]string{"sandbox_id": id} var resp sandboxEntry - err := a.c.Do(ctx, http.MethodPost, sandboxPath(id)+"/start", a.headers(), nil, body, &resp) + err := a.do(ctx, http.MethodPost, sandboxPath(id)+"/start", body, &resp) if err != nil { return nil, err } @@ -316,7 +371,7 @@ func (a *sandboxAPI) start(ctx context.Context, id string) (*sandboxEntry, error // `create` call. func (a *sandboxAPI) registerKey(ctx context.Context, publicKey, name string) (*sshKeyEntry, error) { var resp sshKeyEntry - err := a.c.Do(ctx, http.MethodPost, sandboxKeysAPIPath, a.headers(), nil, registerKeyRequest{PublicKey: publicKey, Name: name}, &resp) + err := a.do(ctx, http.MethodPost, sandboxKeysAPIPath, registerKeyRequest{PublicKey: publicKey, Name: name}, &resp) if err != nil { return nil, err } @@ -345,7 +400,7 @@ type listKeysResponse struct { // listKeys calls GET /api/2.0/lakebox/ssh-keys. func (a *sandboxAPI) listKeys(ctx context.Context) ([]sshKeyEntry, error) { var resp listKeysResponse - err := a.c.Do(ctx, http.MethodGet, sandboxKeysAPIPath, a.headers(), nil, nil, &resp) + err := a.do(ctx, http.MethodGet, sandboxKeysAPIPath, nil, &resp) if err != nil { return nil, err } @@ -354,5 +409,5 @@ func (a *sandboxAPI) listKeys(ctx context.Context) ([]sshKeyEntry, error) { // deleteKey calls DELETE /api/2.0/lakebox/ssh-keys/{key_hash}. func (a *sandboxAPI) deleteKey(ctx context.Context, keyHash string) error { - return a.c.Do(ctx, http.MethodDelete, sandboxKeysAPIPath+"/"+url.PathEscape(keyHash), a.headers(), nil, nil, nil) + return a.do(ctx, http.MethodDelete, sandboxKeysAPIPath+"/"+url.PathEscape(keyHash), nil, nil) } diff --git a/cmd/sandbox/api_test.go b/cmd/sandbox/api_test.go index 97025c9c7c..e9bf139c0a 100644 --- a/cmd/sandbox/api_test.go +++ b/cmd/sandbox/api_test.go @@ -1,9 +1,11 @@ package sandbox import ( + "net/http" "strings" "testing" + "github.com/databricks/databricks-sdk-go/apierr" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -21,6 +23,33 @@ func TestValidateNameRejectsOversize(t *testing.T) { assert.Contains(t, err.Error(), "256") } +func TestTranslateErrorRewrites503(t *testing.T) { + orig := &apierr.APIError{StatusCode: http.StatusServiceUnavailable, Message: "Service Unavailable"} + err := translateError(orig) + require.Error(t, err) + assert.Equal(t, "the Databricks Sandbox feature is not available in your region, or the service is temporarily unavailable", err.Error()) +} + +func TestAllow503RetryConsumesBudget(t *testing.T) { + ctx := arm503Budget(t.Context()) + // max503Attempts-1 retries are allowed, then the budget is exhausted. + for range max503Attempts - 1 { + assert.True(t, allow503Retry(ctx)) + } + assert.False(t, allow503Retry(ctx)) +} + +func TestAllow503RetryUnarmedContext(t *testing.T) { + assert.False(t, allow503Retry(t.Context())) +} + +func TestTranslateErrorPassesThroughOthers(t *testing.T) { + require.NoError(t, translateError(nil)) + + notFound := &apierr.APIError{StatusCode: http.StatusNotFound, Message: "Sandbox not found"} + assert.Equal(t, error(notFound), translateError(notFound)) +} + func TestValidateNameCountsBytesNotRunes(t *testing.T) { // 64 panda emoji = 64 × 4 bytes = 256 bytes — at the limit, OK. require.NoError(t, validateName(strings.Repeat("🐼", 64)))