From 48b68e3c5fc337602c4c02ecf9e45bd2a65e65b6 Mon Sep 17 00:00:00 2001 From: Jon Langevin Date: Wed, 20 May 2026 19:56:07 -0400 Subject: [PATCH] feat: support product capture workloads --- go.mod | 2 +- go.sum | 8 ++--- internal/cli.go | 44 +++++++++++++++++++++++++- internal/cli_test.go | 46 +++++++++++++++++++++++++++ internal/steps_test.go | 70 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 162 insertions(+), 8 deletions(-) diff --git a/go.mod b/go.mod index 6d2f5bb..cb29f8a 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ toolchain go1.26.2 require ( github.com/GoCodeAlone/workflow v0.27.0 - github.com/GoCodeAlone/workflow-compute v0.0.0-20260520232556-8e112579a8c0 + github.com/GoCodeAlone/workflow-compute v0.0.0-20260521000401-ae5c20e7b123 ) require ( diff --git a/go.sum b/go.sum index 06ac3d4..4e66224 100644 --- a/go.sum +++ b/go.sum @@ -48,10 +48,8 @@ github.com/GoCodeAlone/modular/modules/eventbus/v2 v2.8.0 h1:buYs0TGNbAZgtTq1Qb+ github.com/GoCodeAlone/modular/modules/eventbus/v2 v2.8.0/go.mod h1:329flAKmwrPq2JEwu9iltWv6A83H/Di82Xze+kvdKDw= github.com/GoCodeAlone/workflow v0.27.0 h1:oufPjwWTuwbZw5PckBEDrIah+w7JJcj51nKQzLe5io0= github.com/GoCodeAlone/workflow v0.27.0/go.mod h1:Ue+5YDScTZgtA36q6r/kDaIRxGJFkyxXbeyJVNVJ0Cc= -github.com/GoCodeAlone/workflow-compute v0.0.0-20260509213854-e62dca3c1662 h1:nC/EvC0w5KQpCVbzxN+d2iBEbmAunp/0qchf3cUENto= -github.com/GoCodeAlone/workflow-compute v0.0.0-20260509213854-e62dca3c1662/go.mod h1:Loml3Kueb3XI4Nh+CIPR55dUlLXkd/bnfsE3rf/WVlw= -github.com/GoCodeAlone/workflow-compute v0.0.0-20260520232556-8e112579a8c0 h1:gQC+CIChZZFvUwQ4omKHz+fZ+GscRhZcUyoV641U9C8= -github.com/GoCodeAlone/workflow-compute v0.0.0-20260520232556-8e112579a8c0/go.mod h1:m1GFY/28DcdOp2ok+tTlvqnJqNYhWk5cwJs/8zUFMh4= +github.com/GoCodeAlone/workflow-compute v0.0.0-20260521000401-ae5c20e7b123 h1:zZpxqNcU2YRvkZoiQYDyVo/Y7cmDVk6E2Tu2Y0jx+T4= +github.com/GoCodeAlone/workflow-compute v0.0.0-20260521000401-ae5c20e7b123/go.mod h1:m1GFY/28DcdOp2ok+tTlvqnJqNYhWk5cwJs/8zUFMh4= github.com/GoCodeAlone/yaegi v0.17.2 h1:WK6Y6e0t1a6U7r+S2dN3CGWW1PizYD3zO0zneToZPxM= github.com/GoCodeAlone/yaegi v0.17.2/go.mod h1:z5Pr6Wse6QJcQvpgxTxzMAevFarH0N37TG88Y9dprx0= github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.32.0 h1:rIkQfkCOVKc1OiRCNcSDD8ml5RJlZbH/Xsq7lbpynwc= @@ -285,8 +283,6 @@ github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsI github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo= github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM= -github.com/jackc/pgx/v5 v5.9.1 h1:uwrxJXBnx76nyISkhr33kQLlUqjv7et7b9FjCen/tdc= -github.com/jackc/pgx/v5 v5.9.1/go.mod h1:mal1tBGAFfLHvZzaYh77YS/eC6IX9OWbRV1QIIM0Jn4= github.com/jackc/pgx/v5 v5.9.2 h1:3ZhOzMWnR4yJ+RW1XImIPsD1aNSz4T4fyP7zlQb56hw= github.com/jackc/pgx/v5 v5.9.2/go.mod h1:mal1tBGAFfLHvZzaYh77YS/eC6IX9OWbRV1QIIM0Jn4= github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= diff --git a/internal/cli.go b/internal/cli.go index 6a0e262..a7468df 100644 --- a/internal/cli.go +++ b/internal/cli.go @@ -197,13 +197,15 @@ func (c *computeCLI) runRun(ctx context.Context, args []string) error { func (c *computeCLI) runSubmit(ctx context.Context, args []string) error { if len(args) == 0 { - return errors.New("usage: wfctl compute submit ") + return errors.New("usage: wfctl compute submit ") } switch args[0] { case "command": return c.runSubmitCommand(ctx, args[1:]) case "container-build": return c.runSubmitContainerBuild(ctx, args[1:]) + case "product-capture": + return c.runSubmitProductCapture(ctx, args[1:]) default: return fmt.Errorf("unknown wfctl compute submit workload %q", args[0]) } @@ -281,6 +283,46 @@ func (c *computeCLI) runSubmitContainerBuild(ctx context.Context, args []string) return c.writeTaskReceipt(ctx, client, task) } +func (c *computeCLI) runSubmitProductCapture(ctx context.Context, args []string) error { + fs := c.newFlagSet("compute submit product-capture") + common := addCLICommonFlags(fs) + taskFlags := addCLITaskFlags(fs) + productURL := fs.String("url", "", "supported product URL") + allowedHosts := csvFlag{} + captureMode := fs.String("capture-mode", string(protocol.ProductCaptureModeBrowser), "capture mode") + captureTimeout := fs.Int("capture-timeout", 45, "capture timeout seconds") + maxHTMLBytes := fs.Int64("max-html-bytes", protocol.MaxProductCaptureHTMLBytes, "maximum captured HTML bytes") + maxImageCount := fs.Int("max-image-count", 8, "maximum product images to return") + metadataOnly := fs.Bool("metadata-only", false, "request metadata-only extraction when supported") + fs.Var(&allowedHosts, "allowed-host", "allowed URL host; repeatable or comma-separated") + if err := fs.Parse(args); err != nil { + return err + } + if err := taskFlags.validate(); err != nil { + return err + } + workload := protocol.WorkloadSpec{ + Kind: protocol.WorkloadProductCapture, + ProductCapture: &protocol.ProductCaptureWorkload{ + URL: *productURL, + AllowedHosts: allowedHosts.values(), + CaptureMode: protocol.ProductCaptureMode(*captureMode), + TimeoutSeconds: *captureTimeout, + MaxHTMLBytes: *maxHTMLBytes, + MaxImageCount: *maxImageCount, + MetadataOnly: *metadataOnly, + }, + } + if err := workload.Validate(); err != nil { + return err + } + client, err := common.client() + if err != nil { + return err + } + return c.writeTaskReceipt(ctx, client, taskFlags.task(workload)) +} + func (c *computeCLI) writeTaskReceipt(ctx context.Context, client *computeClient, task protocol.Task) error { submitted, err := client.submitTask(ctx, task) if err != nil { diff --git a/internal/cli_test.go b/internal/cli_test.go index 739f09f..5d5f727 100644 --- a/internal/cli_test.go +++ b/internal/cli_test.go @@ -149,6 +149,52 @@ func TestT8_CLISubmitContainerBuild(t *testing.T) { } } +func TestV739_CLISubmitProductCapture(t *testing.T) { + var got protocol.Task + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost || r.URL.Path != "/v1/tasks" { + t.Fatalf("request: %s %s", r.Method, r.URL.Path) + } + if err := json.NewDecoder(r.Body).Decode(&got); err != nil { + t.Fatalf("decode task: %v", err) + } + w.WriteHeader(http.StatusCreated) + _ = json.NewEncoder(w).Encode(map[string]any{"task": got}) + })) + defer srv.Close() + + var stdout, stderr bytes.Buffer + code := newCLI(&stdout, &stderr).RunCLI([]string{ + "compute", "submit", "product-capture", + "--server", srv.URL, + "--token", "token", + "--id", "capture-1", + "--org", "org-1", + "--pool", "pool-1", + "--url", "https://www.amazon.com/Microsoft-Xbox-Gaming-Console-video-game/dp/B08H75RTZ8", + "--allowed-host", "www.amazon.com", + "--capture-mode", "browser", + "--capture-timeout", "45", + "--max-html-bytes", "10485760", + "--max-image-count", "6", + }) + + if code != 0 { + t.Fatalf("RunCLI code=%d stderr=%s", code, stderr.String()) + } + if got.Workload.Kind != protocol.WorkloadProductCapture || got.Workload.ProductCapture == nil { + t.Fatalf("task: got %+v", got) + } + if got.Workload.ProductCapture.AllowedHosts[0] != "www.amazon.com" { + t.Fatalf("allowed hosts: %+v", got.Workload.ProductCapture) + } + for _, forbidden := range [][]byte{[]byte("token"), []byte("signature"), []byte("workload"), []byte("amazon.com")} { + if bytes.Contains(stdout.Bytes(), forbidden) { + t.Fatalf("stdout leaked %q: %s", forbidden, stdout.String()) + } + } +} + func TestT8_CLISubmitValidatesBeforeAPICall(t *testing.T) { var calls int srv := httptest.NewServer(http.HandlerFunc(func(http.ResponseWriter, *http.Request) { diff --git a/internal/steps_test.go b/internal/steps_test.go index 5f62413..bcacb49 100644 --- a/internal/steps_test.go +++ b/internal/steps_test.go @@ -75,6 +75,52 @@ func TestDispatchStepRejectsUnknownNestedWorkloadConfig(t *testing.T) { } } +func TestDispatchStepAcceptsProductCaptureWorkload(t *testing.T) { + var got protocol.Task + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost || r.URL.Path != "/v1/tasks" { + t.Fatalf("request: %s %s", r.Method, r.URL.Path) + } + if err := json.NewDecoder(r.Body).Decode(&got); err != nil { + t.Fatalf("decode task: %v", err) + } + w.WriteHeader(http.StatusCreated) + _ = json.NewEncoder(w).Encode(map[string]any{"task": got}) + })) + defer srv.Close() + + step, err := newDispatchStep("dispatch", productCaptureConfigMap(srv.URL)) + if err != nil { + t.Fatalf("newDispatchStep: %v", err) + } + result, err := step.Execute(context.Background(), nil, nil, nil, nil, runtimeSecrets()) + if err != nil { + t.Fatalf("Execute: %v", err) + } + if result.StopPipeline { + t.Fatalf("unexpected stop: %+v", result.Output) + } + if got.Workload.Kind != protocol.WorkloadProductCapture || got.Workload.ProductCapture == nil { + t.Fatalf("workload: got %+v", got.Workload) + } + if got.Workload.ProductCapture.URL != "https://www.amazon.com/Microsoft-Xbox-Gaming-Console-video-game/dp/B08H75RTZ8" { + t.Fatalf("product url: %+v", got.Workload.ProductCapture) + } + if got.Workload.ProductCapture.CaptureMode != protocol.ProductCaptureModeBrowser { + t.Fatalf("capture mode: %+v", got.Workload.ProductCapture) + } +} + +func TestDispatchStepRejectsUnknownNestedProductCaptureConfig(t *testing.T) { + cfg := productCaptureConfigMap("https://compute.example.test") + workload := cfg["workload"].(map[string]any) + productCapture := workload["product_capture"].(map[string]any) + productCapture["extra"] = true + if _, err := newDispatchStep("dispatch", cfg); err == nil { + t.Fatal("expected strict nested product_capture unknown-field error") + } +} + func TestWaitStepReadsTaskStatus(t *testing.T) { var taskCalls int srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -725,6 +771,30 @@ func taskConfigMap(id string) map[string]any { } } +func productCaptureConfigMap(serverURL string) map[string]any { + cfg := map[string]any{ + "id": "capture-1", + "org_id": "org-1", + "pool_id": "pool-1", + "policy_id": "policy-1", + "timeout_seconds": 60, + "server_url": serverURL, + "auth_token_ref": "secret:compute-token", + "workload": map[string]any{ + "kind": "product-capture", + "product_capture": map[string]any{ + "url": "https://www.amazon.com/Microsoft-Xbox-Gaming-Console-video-game/dp/B08H75RTZ8", + "allowed_hosts": []any{"www.amazon.com"}, + "capture_mode": "browser", + "timeout_seconds": 45, + "max_html_bytes": 10485760, + "max_image_count": 6, + }, + }, + } + return cfg +} + func runtimeSecrets() map[string]any { return map[string]any{ "secrets": map[string]any{