From 541f1f83ac537554773f2fddb5aa1c497b106b35 Mon Sep 17 00:00:00 2001 From: Yuval Kashtan Date: Thu, 21 May 2026 19:33:41 +0000 Subject: [PATCH 1/7] feat: harden Cloud Run ingress with per-service GCLB and Cloud Armor WAF Restrict Cloud Run ingress from 'all' to 'internal-and-cloud-load-balancing' for both services (service.yaml, marketplace-handler.yaml). Add optional per-service Google Cloud Load Balancers with independent Cloud Armor WAF policies so traffic must pass through the GCLB security stack. Each service (agent, handler) gets its own GCLB: static IP, SSL cert, NEG, backend, URL map, HTTPS proxy, forwarding rule, and Cloud Armor policy. Key behaviors: - When LB is enabled: ingress stays restricted, traffic goes through GCLB - When LB is not enabled: deploy.sh overrides ingress to 'all' so external traffic is not blocked by the YAML default - SSL certificate existence is validated before HTTPS proxy creation - 'lb' deploy mode validates services exist before creating LB resources - cleanup.sh always attempts LB resource cleanup defensively, regardless of ENABLE_LB_* flags, preventing orphaned resources - Post-deploy env var updates warn on failure instead of silently succeeding WAF rules (OWASP ModSecurity CRS v3.3): sqli, xss, lfi, rfi, rce, scanner detection, protocol attack, session fixation. Config: ENABLE_LB_AGENT, AGENT_DOMAIN_NAME, ENABLE_LB_HANDLER, HANDLER_DOMAIN_NAME, ENABLE_CLOUD_ARMOR_AGENT, ENABLE_CLOUD_ARMOR_HANDLER. Co-Authored-By: Claude Opus 4.6 (1M context) --- deploy/cloudrun/cleanup.sh | 39 ++- deploy/cloudrun/deploy.sh | 420 +++++++++++++++++++---- deploy/cloudrun/marketplace-handler.yaml | 2 +- deploy/cloudrun/service.yaml | 2 +- deploy/cloudrun/setup.sh | 85 +++++ 5 files changed, 479 insertions(+), 69 deletions(-) diff --git a/deploy/cloudrun/cleanup.sh b/deploy/cloudrun/cleanup.sh index 58e08628..5f3408cf 100755 --- a/deploy/cloudrun/cleanup.sh +++ b/deploy/cloudrun/cleanup.sh @@ -53,6 +53,9 @@ PUBSUB_INVOKER_SA="${PUBSUB_INVOKER_NAME}@${PROJECT_ID}.iam.gserviceaccount.com" PUBSUB_TOPIC="${PUBSUB_TOPIC:-marketplace-entitlements}" PUBSUB_SUBSCRIPTION="${PUBSUB_SUBSCRIPTION:-${PUBSUB_TOPIC}-sub}" +# Load balancer resource name prefix (used by cleanup_service_lb) +LB_NAME="${LB_NAME:-lightspeed-lb}" + # Parse arguments FORCE=false @@ -80,6 +83,8 @@ fi log_warn "This will delete the following resources from project: $PROJECT_ID" echo "" echo " - Cloud Run services: $SERVICE_NAME, $HANDLER_SERVICE_NAME" +echo " - Load balancer resources (if any): forwarding rules, HTTPS proxies," +echo " URL maps, SSL certs, backend services, NEGs, static IPs, Cloud Armor policies" echo " - Pub/Sub topic: $PUBSUB_TOPIC" echo " - Pub/Sub subscription: $PUBSUB_SUBSCRIPTION" echo " - Secrets: redhat-sso-client-id, redhat-sso-client-secret, database-url," @@ -130,7 +135,34 @@ else fi # ============================================================================= -# Step 2: Delete Pub/Sub Resources +# Step 2: Delete Load Balancer Resources (per-service) +# ============================================================================= +# Delete all LB resources for a single service (reverse dependency order). +# Uses try-delete: nonexistent resources are silently skipped. +cleanup_service_lb() { + local service_label="$1" + local p="${LB_NAME}-${service_label}" + + log_info "Cleaning up ${service_label} LB resources..." + + gcloud compute forwarding-rules delete "${p}-forwarding-rule" --global --project="$PROJECT_ID" --quiet 2>/dev/null || true + gcloud compute target-https-proxies delete "${p}-https-proxy" --global --project="$PROJECT_ID" --quiet 2>/dev/null || true + gcloud compute url-maps delete "${p}-url-map" --global --project="$PROJECT_ID" --quiet 2>/dev/null || true + gcloud compute ssl-certificates delete "${p}-cert" --global --project="$PROJECT_ID" --quiet 2>/dev/null || true + # Detach Cloud Armor before deleting backend (may have been enabled without the flag) + gcloud compute backend-services update "${p}-backend" --security-policy="" --global --project="$PROJECT_ID" 2>/dev/null || true + gcloud compute security-policies delete "${p}-security-policy" --global --project="$PROJECT_ID" --quiet 2>/dev/null || true + gcloud compute backend-services delete "${p}-backend" --global --project="$PROJECT_ID" --quiet 2>/dev/null || true + gcloud compute network-endpoint-groups delete "${p}-neg" --region="$REGION" --project="$PROJECT_ID" --quiet 2>/dev/null || true + gcloud compute addresses delete "${p}-ip" --global --project="$PROJECT_ID" --quiet 2>/dev/null || true +} + +log_info "Cleaning up load balancer resources (if any)..." +cleanup_service_lb "agent" +cleanup_service_lb "handler" + +# ============================================================================= +# Step 3: Delete Pub/Sub Resources # ============================================================================= log_info "Deleting Pub/Sub resources..." @@ -157,7 +189,7 @@ else fi # ============================================================================= -# Step 3: Delete Secrets +# Step 4: Delete Secrets # ============================================================================= log_info "Deleting secrets from Secret Manager..." @@ -184,7 +216,7 @@ for secret in "${secrets[@]}"; do done # ============================================================================= -# Step 4: Remove IAM Bindings and Delete Service Account +# Step 5: Remove IAM Bindings and Delete Service Account # ============================================================================= log_info "Removing service account IAM bindings..." @@ -265,6 +297,7 @@ log_info "==========================================" echo "" echo "The following resources have been removed:" echo " - Cloud Run services ($SERVICE_NAME, $HANDLER_SERVICE_NAME)" +echo " - Load balancer resources (if any existed)" echo " - Pub/Sub topic and subscription" echo " - Secret Manager secrets" echo " - Service accounts (runtime + Pub/Sub invoker) and IAM bindings" diff --git a/deploy/cloudrun/deploy.sh b/deploy/cloudrun/deploy.sh index 1bf72bd2..45239479 100755 --- a/deploy/cloudrun/deploy.sh +++ b/deploy/cloudrun/deploy.sh @@ -14,8 +14,9 @@ # ./deploy/cloudrun/deploy.sh [OPTIONS] # # Options: -# --service Which service to deploy: all, handler, agent +# --service Which service to deploy: all, handler, agent, lb # (default: all) +# "lb" sets up load balancers only (no service redeploy) # --image Container image for the agent # (default: gcr.io/$PROJECT_ID/lightspeed-agent:latest) # --handler-image Container image for the marketplace handler @@ -74,6 +75,15 @@ PUBSUB_INVOKER_SA="${PUBSUB_INVOKER_NAME}@${PROJECT_ID}.iam.gserviceaccount.com" # Marketplace configuration ENABLE_MARKETPLACE="${ENABLE_MARKETPLACE:-true}" + +# Per-service load balancer configuration +ENABLE_LB_AGENT="${ENABLE_LB_AGENT:-false}" +ENABLE_LB_HANDLER="${ENABLE_LB_HANDLER:-false}" +ENABLE_CLOUD_ARMOR_AGENT="${ENABLE_CLOUD_ARMOR_AGENT:-false}" +ENABLE_CLOUD_ARMOR_HANDLER="${ENABLE_CLOUD_ARMOR_HANDLER:-false}" +AGENT_DOMAIN_NAME="${AGENT_DOMAIN_NAME:-}" +HANDLER_DOMAIN_NAME="${HANDLER_DOMAIN_NAME:-}" +LB_NAME="${LB_NAME:-lightspeed-lb}" SERVICE_CONTROL_SERVICE_NAME="${SERVICE_CONTROL_SERVICE_NAME:-}" PUBSUB_TOPIC="${PUBSUB_TOPIC:-marketplace-entitlements}" @@ -128,7 +138,7 @@ while [[ $# -gt 0 ]]; do ;; *) log_error "Unknown option: $1" - echo "Usage: $0 [--service all|handler|agent] [--image IMAGE] [--handler-image IMAGE] [--mcp-image IMAGE] [--allow-unauthenticated] [--build]" + echo "Usage: $0 [--service all|handler|agent|lb] [--image IMAGE] [--handler-image IMAGE] [--mcp-image IMAGE] [--allow-unauthenticated] [--build]" exit 1 ;; esac @@ -140,6 +150,28 @@ if [[ -z "$PROJECT_ID" ]]; then exit 1 fi +if [[ "$ENABLE_LB_AGENT" == "true" && -z "$AGENT_DOMAIN_NAME" ]]; then + log_error "AGENT_DOMAIN_NAME is required when ENABLE_LB_AGENT=true" + echo " export AGENT_DOMAIN_NAME=agent.example.com" + exit 1 +fi + +if [[ "$ENABLE_LB_HANDLER" == "true" && -z "$HANDLER_DOMAIN_NAME" ]]; then + log_error "HANDLER_DOMAIN_NAME is required when ENABLE_LB_HANDLER=true" + echo " export HANDLER_DOMAIN_NAME=dcr.example.com" + exit 1 +fi + +if [[ "$ENABLE_CLOUD_ARMOR_AGENT" == "true" && "$ENABLE_LB_AGENT" != "true" ]]; then + log_error "ENABLE_CLOUD_ARMOR_AGENT requires ENABLE_LB_AGENT=true" + exit 1 +fi + +if [[ "$ENABLE_CLOUD_ARMOR_HANDLER" == "true" && "$ENABLE_LB_HANDLER" != "true" ]]; then + log_error "ENABLE_CLOUD_ARMOR_HANDLER requires ENABLE_LB_HANDLER=true" + exit 1 +fi + # Set default images if not specified if [[ -z "$AGENT_IMAGE" ]]; then AGENT_IMAGE="gcr.io/${PROJECT_ID}/lightspeed-agent:${IMAGE_TAG}" @@ -337,6 +369,180 @@ configure_pubsub_push() { log_info " Auth SA: $PUBSUB_INVOKER_SA" } +# ============================================================================= +# Configure per-service Google Cloud Load Balancer +# ============================================================================= +# Creates an independent GCLB for a single Cloud Run service. +# Usage: setup_service_lb +# service_label: "agent" or "handler" (used in resource naming) +# cloud_run_service: Cloud Run service name to front with the LB +# domain_name: Domain for the Google-managed SSL certificate +# cloud_armor_enabled: "true" to create and attach a Cloud Armor WAF policy +setup_service_lb() { + local service_label="$1" + local cloud_run_service="$2" + local domain_name="$3" + local cloud_armor_enabled="$4" + + local neg_name="${LB_NAME}-${service_label}-neg" + local backend_name="${LB_NAME}-${service_label}-backend" + local policy_name="${LB_NAME}-${service_label}-security-policy" + local url_map_name="${LB_NAME}-${service_label}-url-map" + local cert_name="${LB_NAME}-${service_label}-cert" + local proxy_name="${LB_NAME}-${service_label}-https-proxy" + local rule_name="${LB_NAME}-${service_label}-forwarding-rule" + local ip_name="${LB_NAME}-${service_label}-ip" + + log_info "Setting up load balancer for ${service_label} (${cloud_run_service})..." + + # ------------------------------------------------------------------------- + # Create serverless NEG + # ------------------------------------------------------------------------- + if ! gcloud compute network-endpoint-groups describe "$neg_name" \ + --region="$REGION" --project="$PROJECT_ID" &>/dev/null; then + gcloud compute network-endpoint-groups create "$neg_name" \ + --region="$REGION" \ + --network-endpoint-type=serverless \ + --cloud-run-service="$cloud_run_service" \ + --project="$PROJECT_ID" + log_info "NEG '$neg_name' created" + else + log_info "NEG '$neg_name' already exists" + fi + + # ------------------------------------------------------------------------- + # Create backend service + # ------------------------------------------------------------------------- + if ! gcloud compute backend-services describe "$backend_name" \ + --global --project="$PROJECT_ID" &>/dev/null; then + gcloud compute backend-services create "$backend_name" \ + --global \ + --project="$PROJECT_ID" + gcloud compute backend-services add-backend "$backend_name" \ + --global \ + --network-endpoint-group="$neg_name" \ + --network-endpoint-group-region="$REGION" \ + --project="$PROJECT_ID" + log_info "Backend service '$backend_name' created" + else + log_info "Backend service '$backend_name' already exists" + fi + + # ------------------------------------------------------------------------- + # Create Cloud Armor security policy (if enabled) + # ------------------------------------------------------------------------- + if [[ "$cloud_armor_enabled" == "true" ]]; then + log_info "Configuring Cloud Armor security policy for ${service_label}..." + + if ! gcloud compute security-policies describe "$policy_name" \ + --global --project="$PROJECT_ID" &>/dev/null; then + gcloud compute security-policies create "$policy_name" \ + --global \ + --project="$PROJECT_ID" + log_info "Security policy '$policy_name' created" + else + log_info "Security policy '$policy_name' already exists" + fi + + # Add preconfigured WAF rules (OWASP ModSecurity CRS) + declare -A WAF_RULES=( + [1000]="sqli-v33-stable" + [1100]="xss-v33-stable" + [1200]="lfi-v33-stable" + [1300]="rfi-v33-stable" + [1400]="rce-v33-stable" + [1500]="scannerdetection-v33-stable" + [1600]="protocolattack-v33-stable" + [1700]="sessionfixation-v33-stable" + ) + + for priority in $(echo "${!WAF_RULES[@]}" | tr ' ' '\n' | sort -n); do + local waf_rule_name="${WAF_RULES[$priority]}" + if ! gcloud compute security-policies rules describe "$priority" \ + --security-policy="$policy_name" \ + --global --project="$PROJECT_ID" &>/dev/null; then + gcloud compute security-policies rules create "$priority" \ + --security-policy="$policy_name" \ + --expression="evaluatePreconfiguredExpr('${waf_rule_name}')" \ + --action=deny-403 \ + --global \ + --project="$PROJECT_ID" + log_info "WAF rule '${waf_rule_name}' added at priority $priority" + else + log_info "WAF rule at priority $priority already exists" + fi + done + + log_info "Attaching security policy to backend service..." + gcloud compute backend-services update "$backend_name" \ + --security-policy="$policy_name" \ + --global --project="$PROJECT_ID" + log_info "Cloud Armor security policy attached to '$backend_name'" + fi + + # ------------------------------------------------------------------------- + # Create URL map (simple default backend, no path routing needed) + # ------------------------------------------------------------------------- + if ! gcloud compute url-maps describe "$url_map_name" \ + --global --project="$PROJECT_ID" &>/dev/null; then + gcloud compute url-maps create "$url_map_name" \ + --default-service="$backend_name" \ + --global \ + --project="$PROJECT_ID" + log_info "URL map '$url_map_name' created" + else + log_info "URL map '$url_map_name' already exists" + fi + + # ------------------------------------------------------------------------- + # Create HTTPS proxy with managed SSL certificate + # ------------------------------------------------------------------------- + if ! gcloud compute ssl-certificates describe "$cert_name" \ + --global --project="$PROJECT_ID" &>/dev/null; then + log_error "SSL certificate '$cert_name' does not exist. Run setup.sh first." + return 1 + fi + + if ! gcloud compute target-https-proxies describe "$proxy_name" \ + --global --project="$PROJECT_ID" &>/dev/null; then + gcloud compute target-https-proxies create "$proxy_name" \ + --ssl-certificates="$cert_name" \ + --url-map="$url_map_name" \ + --global \ + --project="$PROJECT_ID" + log_info "HTTPS proxy '$proxy_name' created" + else + log_info "HTTPS proxy '$proxy_name' already exists" + fi + + # ------------------------------------------------------------------------- + # Create global forwarding rule + # ------------------------------------------------------------------------- + if ! gcloud compute forwarding-rules describe "$rule_name" \ + --global --project="$PROJECT_ID" &>/dev/null; then + gcloud compute forwarding-rules create "$rule_name" \ + --global \ + --target-https-proxy="$proxy_name" \ + --address="$ip_name" \ + --ports=443 \ + --project="$PROJECT_ID" + log_info "Forwarding rule '$rule_name' created" + else + log_info "Forwarding rule '$rule_name' already exists" + fi + + # ------------------------------------------------------------------------- + # Override Cloud Run ingress to internal-and-cloud-load-balancing + # ------------------------------------------------------------------------- + log_info "Updating $cloud_run_service ingress to internal-and-cloud-load-balancing..." + gcloud run services update "$cloud_run_service" \ + --region="$REGION" \ + --project="$PROJECT_ID" \ + --ingress=internal-and-cloud-load-balancing \ + --quiet + log_info "Cloud Run ingress updated for $cloud_run_service" +} + # ============================================================================= # Main deployment # ============================================================================= @@ -363,17 +569,71 @@ case "$DEPLOY_SERVICE" in deploy_handler configure_pubsub_push deploy_agent + if [[ "$ENABLE_LB_HANDLER" == "true" ]]; then + setup_service_lb "handler" "$HANDLER_SERVICE_NAME" "$HANDLER_DOMAIN_NAME" "$ENABLE_CLOUD_ARMOR_HANDLER" + else + log_info "No LB for handler — setting ingress to all..." + gcloud run services update "$HANDLER_SERVICE_NAME" \ + --region="$REGION" --project="$PROJECT_ID" \ + --ingress=all --quiet + fi + if [[ "$ENABLE_LB_AGENT" == "true" ]]; then + setup_service_lb "agent" "$SERVICE_NAME" "$AGENT_DOMAIN_NAME" "$ENABLE_CLOUD_ARMOR_AGENT" + else + log_info "No LB for agent — setting ingress to all..." + gcloud run services update "$SERVICE_NAME" \ + --region="$REGION" --project="$PROJECT_ID" \ + --ingress=all --quiet + fi ;; handler) deploy_handler configure_pubsub_push + if [[ "$ENABLE_LB_HANDLER" == "true" ]]; then + setup_service_lb "handler" "$HANDLER_SERVICE_NAME" "$HANDLER_DOMAIN_NAME" "$ENABLE_CLOUD_ARMOR_HANDLER" + else + log_info "No LB for handler — setting ingress to all..." + gcloud run services update "$HANDLER_SERVICE_NAME" \ + --region="$REGION" --project="$PROJECT_ID" \ + --ingress=all --quiet + fi ;; agent) deploy_agent + if [[ "$ENABLE_LB_AGENT" == "true" ]]; then + setup_service_lb "agent" "$SERVICE_NAME" "$AGENT_DOMAIN_NAME" "$ENABLE_CLOUD_ARMOR_AGENT" + else + log_info "No LB for agent — setting ingress to all..." + gcloud run services update "$SERVICE_NAME" \ + --region="$REGION" --project="$PROJECT_ID" \ + --ingress=all --quiet + fi + ;; + lb) + log_info "Setting up load balancers only (no service redeploy)..." + if [[ "$ENABLE_LB_HANDLER" == "true" ]]; then + if ! gcloud run services describe "$HANDLER_SERVICE_NAME" \ + --region="$REGION" --project="$PROJECT_ID" &>/dev/null; then + log_error "$HANDLER_SERVICE_NAME does not exist. Deploy it first before setting up its LB." + exit 1 + fi + setup_service_lb "handler" "$HANDLER_SERVICE_NAME" "$HANDLER_DOMAIN_NAME" "$ENABLE_CLOUD_ARMOR_HANDLER" + fi + if [[ "$ENABLE_LB_AGENT" == "true" ]]; then + if ! gcloud run services describe "$SERVICE_NAME" \ + --region="$REGION" --project="$PROJECT_ID" &>/dev/null; then + log_error "$SERVICE_NAME does not exist. Deploy it first before setting up its LB." + exit 1 + fi + setup_service_lb "agent" "$SERVICE_NAME" "$AGENT_DOMAIN_NAME" "$ENABLE_CLOUD_ARMOR_AGENT" + fi + if [[ "$ENABLE_LB_AGENT" != "true" && "$ENABLE_LB_HANDLER" != "true" ]]; then + log_warn "No load balancers enabled. Set ENABLE_LB_AGENT=true and/or ENABLE_LB_HANDLER=true." + fi ;; *) log_error "Unknown service: $DEPLOY_SERVICE" - echo "Valid services: all, handler, agent" + echo "Valid services: all, handler, agent, lb" exit 1 ;; esac @@ -403,6 +663,40 @@ show_service_info() { fi } +# Update AGENT_PROVIDER_URL and MARKETPLACE_HANDLER_URL on the agent service +# so the AgentCard advertises the correct externally-reachable URLs. +# When per-service LBs are enabled, uses GCLB domains instead of Cloud Run URLs. +update_agentcard_urls() { + local service_url handler_url env_vars + service_url=$(gcloud run services describe "$SERVICE_NAME" \ + --region="$REGION" --project="$PROJECT_ID" \ + --format='value(status.url)' 2>/dev/null || echo "") + handler_url=$(gcloud run services describe "$HANDLER_SERVICE_NAME" \ + --region="$REGION" --project="$PROJECT_ID" \ + --format='value(status.url)' 2>/dev/null || echo "") + + [[ "$ENABLE_LB_AGENT" == "true" ]] && service_url="https://$AGENT_DOMAIN_NAME" + [[ "$ENABLE_LB_HANDLER" == "true" ]] && handler_url="https://$HANDLER_DOMAIN_NAME" + + [[ -z "$service_url" ]] && return + + env_vars="AGENT_PROVIDER_URL=$service_url" + if [[ -n "$handler_url" ]]; then + env_vars="$env_vars,MARKETPLACE_HANDLER_URL=$handler_url" + else + log_warn "Could not retrieve $HANDLER_SERVICE_NAME URL. MARKETPLACE_HANDLER_URL not set." + fi + + log_info "Updating agent env vars: $env_vars" + if gcloud run services update "$SERVICE_NAME" \ + --region="$REGION" --project="$PROJECT_ID" \ + --update-env-vars="$env_vars" --quiet 2>/dev/null; then + log_info "Agent env vars updated successfully" + else + log_warn "Could not update agent env vars. Set AGENT_PROVIDER_URL and MARKETPLACE_HANDLER_URL manually." + fi +} + # Show info for deployed services case "$DEPLOY_SERVICE" in all) @@ -410,37 +704,7 @@ case "$DEPLOY_SERVICE" in show_service_info "$HANDLER_SERVICE_NAME" echo "" show_service_info "$SERVICE_NAME" - - # Update AGENT_PROVIDER_URL (agent base URL) and MARKETPLACE_HANDLER_URL - # on the agent service so the AgentCard advertises the correct URLs. - # Note: AGENT_PROVIDER_ORGANIZATION_URL (JWT audience for DCR) is set - # in service.yaml and does NOT change per deployment — it's the - # provider's website (e.g., https://www.redhat.com). - service_url=$(gcloud run services describe "$SERVICE_NAME" \ - --region="$REGION" \ - --project="$PROJECT_ID" \ - --format='value(status.url)' 2>/dev/null) - handler_url=$(gcloud run services describe "$HANDLER_SERVICE_NAME" \ - --region="$REGION" \ - --project="$PROJECT_ID" \ - --format='value(status.url)' 2>/dev/null || echo "") - - if [[ -n "$service_url" ]]; then - env_vars="AGENT_PROVIDER_URL=$service_url" - if [[ -n "$handler_url" ]]; then - env_vars="$env_vars,MARKETPLACE_HANDLER_URL=$handler_url" - else - log_warn "Could not retrieve $HANDLER_SERVICE_NAME URL. MARKETPLACE_HANDLER_URL not set." - log_warn "DCR endpoints in the AgentCard will fall back to AGENT_PROVIDER_URL." - fi - log_info "Updating agent env vars with service URLs" - gcloud run services update "$SERVICE_NAME" \ - --region="$REGION" \ - --project="$PROJECT_ID" \ - --update-env-vars="$env_vars" \ - --quiet 2>&1 | grep -v "Deploying\|Creating\|Routing" || true - log_info "Agent env vars updated successfully" - fi + update_agentcard_urls echo "" echo "Architecture:" @@ -462,42 +726,70 @@ case "$DEPLOY_SERVICE" in agent) echo "" show_service_info "$SERVICE_NAME" - - # Update AGENT_PROVIDER_URL (agent base URL) and MARKETPLACE_HANDLER_URL - # on the agent service. AGENT_PROVIDER_ORGANIZATION_URL is set in - # service.yaml and does NOT change per deployment. - service_url=$(gcloud run services describe "$SERVICE_NAME" \ - --region="$REGION" \ - --project="$PROJECT_ID" \ - --format='value(status.url)' 2>/dev/null) - handler_url=$(gcloud run services describe "$HANDLER_SERVICE_NAME" \ - --region="$REGION" \ - --project="$PROJECT_ID" \ - --format='value(status.url)' 2>/dev/null || echo "") - - if [[ -n "$service_url" ]]; then - env_vars="AGENT_PROVIDER_URL=$service_url" - if [[ -n "$handler_url" ]]; then - env_vars="$env_vars,MARKETPLACE_HANDLER_URL=$handler_url" - else - log_warn "Could not retrieve $HANDLER_SERVICE_NAME URL. MARKETPLACE_HANDLER_URL not set." - log_warn "DCR endpoints in the AgentCard will fall back to AGENT_PROVIDER_URL." - fi - log_info "Updating agent env vars with service URLs" - gcloud run services update "$SERVICE_NAME" \ - --region="$REGION" \ - --project="$PROJECT_ID" \ - --update-env-vars="$env_vars" \ - --quiet 2>&1 | grep -v "Deploying\|Creating\|Routing" || true - log_info "Agent env vars updated successfully" - fi + update_agentcard_urls echo "" echo "Test the agent:" - echo " curl \$(gcloud run services describe $SERVICE_NAME --region=$REGION --format='value(status.url)')/.well-known/agent-card.json" + echo " curl \$(gcloud run services describe $SERVICE_NAME --region=$REGION --format='value(status.url)')/.well-known/agent.json" + ;; + lb) + update_agentcard_urls ;; esac +# Show per-service load balancer info +if [[ "$ENABLE_LB_AGENT" == "true" ]]; then + echo "" + log_info "==========================================" + log_info "Agent Load Balancer" + log_info "==========================================" + + AGENT_LB_IP=$(gcloud compute addresses describe "${LB_NAME}-agent-ip" \ + --global --project="$PROJECT_ID" \ + --format='value(address)' 2>/dev/null || echo "unknown") + + AGENT_CERT_STATUS=$(gcloud compute ssl-certificates describe "${LB_NAME}-agent-cert" \ + --global --project="$PROJECT_ID" \ + --format='value(managed.status)' 2>/dev/null || echo "unknown") + + echo "" + echo " URL: https://$AGENT_DOMAIN_NAME" + echo " Static IP: $AGENT_LB_IP" + echo " SSL status: $AGENT_CERT_STATUS" + echo "" + if [[ "$AGENT_CERT_STATUS" != "ACTIVE" ]]; then + log_warn "SSL certificate is not yet active (status: $AGENT_CERT_STATUS)" + log_warn "Ensure DNS A record points $AGENT_DOMAIN_NAME → $AGENT_LB_IP" + log_warn "Provisioning can take up to 60 minutes after DNS propagation." + fi +fi + +if [[ "$ENABLE_LB_HANDLER" == "true" ]]; then + echo "" + log_info "==========================================" + log_info "Handler Load Balancer" + log_info "==========================================" + + HANDLER_LB_IP=$(gcloud compute addresses describe "${LB_NAME}-handler-ip" \ + --global --project="$PROJECT_ID" \ + --format='value(address)' 2>/dev/null || echo "unknown") + + HANDLER_CERT_STATUS=$(gcloud compute ssl-certificates describe "${LB_NAME}-handler-cert" \ + --global --project="$PROJECT_ID" \ + --format='value(managed.status)' 2>/dev/null || echo "unknown") + + echo "" + echo " URL: https://$HANDLER_DOMAIN_NAME" + echo " Static IP: $HANDLER_LB_IP" + echo " SSL status: $HANDLER_CERT_STATUS" + echo "" + if [[ "$HANDLER_CERT_STATUS" != "ACTIVE" ]]; then + log_warn "SSL certificate is not yet active (status: $HANDLER_CERT_STATUS)" + log_warn "Ensure DNS A record points $HANDLER_DOMAIN_NAME → $HANDLER_LB_IP" + log_warn "Provisioning can take up to 60 minutes after DNS propagation." + fi +fi + echo "" echo "View logs:" echo " gcloud run services logs read $HANDLER_SERVICE_NAME --region=$REGION --project=$PROJECT_ID" diff --git a/deploy/cloudrun/marketplace-handler.yaml b/deploy/cloudrun/marketplace-handler.yaml index f3aaf4db..ac151877 100644 --- a/deploy/cloudrun/marketplace-handler.yaml +++ b/deploy/cloudrun/marketplace-handler.yaml @@ -27,7 +27,7 @@ metadata: component: provisioning annotations: run.googleapis.com/description: "Marketplace provisioning and DCR handler for Lightspeed Agent" - run.googleapis.com/ingress: all + run.googleapis.com/ingress: internal-and-cloud-load-balancing run.googleapis.com/launch-stage: GA run.googleapis.com/minScale: "1" run.googleapis.com/maxScale: "2" diff --git a/deploy/cloudrun/service.yaml b/deploy/cloudrun/service.yaml index a14ab1c8..49d6c7c6 100644 --- a/deploy/cloudrun/service.yaml +++ b/deploy/cloudrun/service.yaml @@ -22,7 +22,7 @@ metadata: managed-by: cloud-build annotations: run.googleapis.com/description: "Red Hat Lightspeed Agent for Google Cloud - A2A-ready agent using Google ADK" - run.googleapis.com/ingress: all + run.googleapis.com/ingress: internal-and-cloud-load-balancing run.googleapis.com/launch-stage: GA run.googleapis.com/minScale: "1" run.googleapis.com/maxScale: "10" diff --git a/deploy/cloudrun/setup.sh b/deploy/cloudrun/setup.sh index 40d8ec62..1a03eeab 100755 --- a/deploy/cloudrun/setup.sh +++ b/deploy/cloudrun/setup.sh @@ -51,6 +51,13 @@ PUBSUB_INVOKER_SA="${PUBSUB_INVOKER_NAME}@${PROJECT_ID}.iam.gserviceaccount.com" # Optional features ENABLE_MARKETPLACE="${ENABLE_MARKETPLACE:-true}" +# Per-service load balancer configuration +ENABLE_LB_AGENT="${ENABLE_LB_AGENT:-false}" +ENABLE_LB_HANDLER="${ENABLE_LB_HANDLER:-false}" +AGENT_DOMAIN_NAME="${AGENT_DOMAIN_NAME:-}" +HANDLER_DOMAIN_NAME="${HANDLER_DOMAIN_NAME:-}" +LB_NAME="${LB_NAME:-lightspeed-lb}" + # Validate required variables if [[ -z "$PROJECT_ID" ]]; then log_error "GOOGLE_CLOUD_PROJECT environment variable is required" @@ -58,6 +65,18 @@ if [[ -z "$PROJECT_ID" ]]; then exit 1 fi +if [[ "$ENABLE_LB_AGENT" == "true" && -z "$AGENT_DOMAIN_NAME" ]]; then + log_error "AGENT_DOMAIN_NAME is required when ENABLE_LB_AGENT=true" + echo " export AGENT_DOMAIN_NAME=agent.example.com" + exit 1 +fi + +if [[ "$ENABLE_LB_HANDLER" == "true" && -z "$HANDLER_DOMAIN_NAME" ]]; then + log_error "HANDLER_DOMAIN_NAME is required when ENABLE_LB_HANDLER=true" + echo " export HANDLER_DOMAIN_NAME=dcr.example.com" + exit 1 +fi + log_info "Setting up Cloud Run deployment for project: $PROJECT_ID" log_info "Region: $REGION" log_info "Service: $SERVICE_NAME" @@ -66,6 +85,8 @@ log_info "Handler service: $HANDLER_SERVICE_NAME" log_info "DB instance: $DB_INSTANCE_NAME" log_info "Pub/Sub invoker SA: $PUBSUB_INVOKER_NAME" log_info "Marketplace integration: $ENABLE_MARKETPLACE" +log_info "Agent load balancer: $ENABLE_LB_AGENT" +log_info "Handler load balancer: $ENABLE_LB_HANDLER" # ============================================================================= # Step 1: Enable Required APIs @@ -96,6 +117,11 @@ apis=( "vpcaccess.googleapis.com" ) +# Add Compute Engine API when any load balancer is enabled +if [[ "$ENABLE_LB_AGENT" == "true" || "$ENABLE_LB_HANDLER" == "true" ]]; then + apis+=("compute.googleapis.com") +fi + for api in "${apis[@]}"; do log_info " Enabling $api..." gcloud services enable "$api" --project="$PROJECT_ID" --quiet || true @@ -302,6 +328,43 @@ else log_info "Skipping Pub/Sub setup (ENABLE_MARKETPLACE=false)" fi +# ============================================================================= +# Step 5: Set Up Load Balancer Resources (Optional, per-service) +# ============================================================================= + +# Reserve a static IP and create a Google-managed SSL certificate for one service. +setup_lb_resources() { + local service_label="$1" + local domain_name="$2" + local ip_name="${LB_NAME}-${service_label}-ip" + local cert_name="${LB_NAME}-${service_label}-cert" + + log_info "Setting up ${service_label} load balancer resources..." + + if ! gcloud compute addresses describe "$ip_name" --global --project="$PROJECT_ID" &>/dev/null; then + gcloud compute addresses create "$ip_name" --global --project="$PROJECT_ID" + log_info "Static IP address '$ip_name' reserved" + else + log_info "Static IP address '$ip_name' already exists" + fi + + log_info "${service_label^} static IP: $(gcloud compute addresses describe "$ip_name" --global --project="$PROJECT_ID" --format='value(address)')" + + if ! gcloud compute ssl-certificates describe "$cert_name" --global --project="$PROJECT_ID" &>/dev/null; then + gcloud compute ssl-certificates create "$cert_name" --domains="$domain_name" --global --project="$PROJECT_ID" + log_info "Managed SSL certificate '$cert_name' created for $domain_name" + else + log_info "Managed SSL certificate '$cert_name' already exists" + fi +} + +[[ "$ENABLE_LB_AGENT" == "true" ]] && setup_lb_resources "agent" "$AGENT_DOMAIN_NAME" +[[ "$ENABLE_LB_HANDLER" == "true" ]] && setup_lb_resources "handler" "$HANDLER_DOMAIN_NAME" + +if [[ "$ENABLE_LB_AGENT" != "true" && "$ENABLE_LB_HANDLER" != "true" ]]; then + log_info "Skipping load balancer setup (no per-service LBs enabled)" +fi + # ============================================================================= # Summary # ============================================================================= @@ -315,6 +378,28 @@ echo " Runtime SA: $SERVICE_ACCOUNT" if [[ "$ENABLE_MARKETPLACE" == "true" ]]; then echo " Pub/Sub Invoker SA: $PUBSUB_INVOKER_SA" fi +if [[ "$ENABLE_LB_AGENT" == "true" ]]; then + AGENT_LB_IP=$(gcloud compute addresses describe "${LB_NAME}-agent-ip" --global --project="$PROJECT_ID" --format='value(address)') + echo "" + echo "Agent load balancer resources:" + echo " Static IP: $AGENT_LB_IP" + echo " SSL cert: ${LB_NAME}-agent-cert (domain: $AGENT_DOMAIN_NAME)" + echo "" + log_warn "Configure DNS for the agent before deploying:" + echo " Create an A record: $AGENT_DOMAIN_NAME → $AGENT_LB_IP" + echo " SSL provisioning requires DNS to resolve to this IP." +fi +if [[ "$ENABLE_LB_HANDLER" == "true" ]]; then + HANDLER_LB_IP=$(gcloud compute addresses describe "${LB_NAME}-handler-ip" --global --project="$PROJECT_ID" --format='value(address)') + echo "" + echo "Handler load balancer resources:" + echo " Static IP: $HANDLER_LB_IP" + echo " SSL cert: ${LB_NAME}-handler-cert (domain: $HANDLER_DOMAIN_NAME)" + echo "" + log_warn "Configure DNS for the handler before deploying:" + echo " Create an A record: $HANDLER_DOMAIN_NAME → $HANDLER_LB_IP" + echo " SSL provisioning requires DNS to resolve to this IP." +fi echo "" echo "Next steps:" echo "" From 8b3f102db6be53d748e4c53dbfe9156b6bc6b3cd Mon Sep 17 00:00:00 2001 From: Yuval Kashtan Date: Thu, 21 May 2026 19:41:06 +0000 Subject: [PATCH 2/7] fix: update agent MARKETPLACE_HANDLER_URL on --service handler deploy When deploying with --service handler and ENABLE_LB_HANDLER=true, the handler's Cloud Run ingress is restricted to internal-and-cloud-load-balancing, but the agent's MARKETPLACE_HANDLER_URL was not updated to the GCLB domain. This caused DCR requests from Gemini Enterprise to silently fail. Guards the agent env var update with a service existence check so handler-only deploys don't fail when the agent isn't deployed yet. Co-Authored-By: Claude Opus 4.6 (1M context) --- deploy/cloudrun/deploy.sh | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/deploy/cloudrun/deploy.sh b/deploy/cloudrun/deploy.sh index 45239479..c0af82d4 100755 --- a/deploy/cloudrun/deploy.sh +++ b/deploy/cloudrun/deploy.sh @@ -718,6 +718,35 @@ case "$DEPLOY_SERVICE" in handler) echo "" show_service_info "$HANDLER_SERVICE_NAME" + + # When LB is enabled for the handler, the Cloud Run URL is no longer + # reachable externally — update the agent's MARKETPLACE_HANDLER_URL to + # point to the GCLB domain so the AgentCard advertises the right DCR URL. + handler_url=$(gcloud run services describe "$HANDLER_SERVICE_NAME" \ + --region="$REGION" \ + --project="$PROJECT_ID" \ + --format='value(status.url)' 2>/dev/null || echo "") + if [[ "$ENABLE_LB_HANDLER" == "true" ]]; then + handler_url="https://$HANDLER_DOMAIN_NAME" + fi + if [[ -n "$handler_url" ]]; then + if gcloud run services describe "$SERVICE_NAME" \ + --region="$REGION" --project="$PROJECT_ID" &>/dev/null; then + log_info "Updating agent MARKETPLACE_HANDLER_URL=$handler_url" + if gcloud run services update "$SERVICE_NAME" \ + --region="$REGION" \ + --project="$PROJECT_ID" \ + --update-env-vars="MARKETPLACE_HANDLER_URL=$handler_url" \ + --quiet 2>/dev/null; then + log_info "Agent env vars updated successfully" + else + log_warn "Could not update MARKETPLACE_HANDLER_URL. Set it manually on the agent service." + fi + else + log_warn "Agent service $SERVICE_NAME not found. Deploy the agent to set MARKETPLACE_HANDLER_URL." + fi + fi + echo "" echo "The marketplace handler is ready to receive:" echo " - Pub/Sub events from Google Cloud Marketplace" From b9a4310efa3a6df7d9ed5ae8dfef65cfe7d589f1 Mon Sep 17 00:00:00 2001 From: Yuval Kashtan Date: Thu, 21 May 2026 19:41:17 +0000 Subject: [PATCH 3/7] docs: GCLB architecture, deployment guide, and ingress troubleshooting Add comprehensive documentation for the per-service GCLB and Cloud Armor WAF feature: - Architecture diagram showing independent agent and handler LBs - "Why Enable WAF" section explaining GCLB as the path to Cloud Armor - OWASP WAF rules table mapping each rule to its Top 10 category - Per-service configuration tables, DNS setup, SSL provisioning - Ingress restriction behavior for both LB-enabled and LB-disabled modes - DCR troubleshooting for ingress-hardened deployments: diagnosis commands, GCLB upgrade instructions, and MARKETPLACE_HANDLER_URL quick-fix - Scaling table with per-service values (agent vs handler) - Network diagram comments showing both with/without GCLB paths - Cross-references from api.md and configuration.md Co-Authored-By: Claude Opus 4.6 (1M context) --- deploy/cloudrun/README.md | 622 +++++++++++++++++++++++++++++++++++--- docs/api.md | 3 +- docs/architecture.md | 1 + docs/configuration.md | 16 + docs/network-diagram.md | 22 +- 5 files changed, 613 insertions(+), 51 deletions(-) diff --git a/deploy/cloudrun/README.md b/deploy/cloudrun/README.md index 735472f4..aebbe996 100644 --- a/deploy/cloudrun/README.md +++ b/deploy/cloudrun/README.md @@ -6,15 +6,18 @@ Deploy the Red Hat Lightspeed Agent for Google Cloud to Google Cloud Run for pro - [Architecture](#architecture) - [Service Accounts](#service-accounts) +- [Load Balancer (Optional)](#load-balancer-optional) + - [Cloud Armor (WAF)](#cloud-armor-waf) - [Prerequisites](#prerequisites) - [Quick Start](#quick-start) - [1. Set Environment Variables](#1-set-environment-variables) - [2. Run Setup Script](#2-run-setup-script) - [3. Set Up Cloud SQL Database](#3-set-up-cloud-sql-database) - [4. Redis Setup for Rate Limiting](#4-redis-setup-for-rate-limiting) - - [5. Configure Secrets](#5-configure-secrets) - - [6. Copy MCP Image to GCR](#6-copy-mcp-image-to-gcr) - - [7. Deploy](#7-deploy) + - [5. Configure Load Balancer (Optional)](#5-configure-load-balancer-optional) + - [6. Configure Secrets](#6-configure-secrets) + - [7. Copy MCP Image to GCR](#7-copy-mcp-image-to-gcr) + - [8. Deploy](#8-deploy) - [Service Configuration](#service-configuration) - [Agent Container](#agent-container) - [Using a Different LLM](#using-a-different-llm) @@ -45,11 +48,12 @@ Deploy the Red Hat Lightspeed Agent for Google Cloud to Google Cloud Run for pro - [Audit Logging](#audit-logging) - [Monitoring](#monitoring) - [Troubleshooting](#troubleshooting) + - [DCR Requests Not Reaching Marketplace Handler](#dcr-requests-not-reaching-marketplace-handler) - [Cleanup / Teardown](#cleanup--teardown) ## Architecture -The deployment consists of **two separate Cloud Run services** plus **Cloud Memorystore for Redis** (for rate limiting): +The deployment consists of **two separate Cloud Run services** plus **Cloud Memorystore for Redis** (for rate limiting), with optional **per-service Google Cloud Load Balancers (GCLB)** for SSL termination, DDoS protection, and WAF: ``` Google Cloud Marketplace @@ -60,39 +64,36 @@ The deployment consists of **two separate Cloud Run services** plus **Cloud Memo ┌──────────────────────┐ ┌──────────────────────────────────┐ │ Pub/Sub (Events) │ │ Gemini Enterprise (DCR) │ └──────────┬───────────┘ └──────────────────┬───────────────┘ + │ (internal) │ (external) │ │ - ▼ ▼ -┌─────────────────────────────────────────────────────────────────────────────────┐ -│ Marketplace Handler Service (Port 8001) │ -│ ─────────────────────────────────────── │ -│ - Always running (minScale=1) to receive Pub/Sub events │ -│ - Handles entitlement approvals via Procurement API (filtered by product) │ -│ - Handles DCR requests (creates OAuth clients in Red Hat SSO) │ -│ - Stores data in PostgreSQL │ -└──────────┬──────────────────────────────────────────────────────────────────────┘ - │ │ - │ Shared PostgreSQL Database │ DCR (create OAuth clients) - ▼ ▼ -┌──────────────────────────────────────────────┐ ┌──────────────────────┐ -│ Lightspeed Agent Service (Port 8000) │ │ Red Hat SSO │ -│ ───────────────────────────────────── │ │ (GMA SSO API) │ -│ ┌──────────────────┐ ┌──────────────────┐ │ │ │ -│ │ Lightspeed Agent │ │ Lightspeed MCP │ │ │ Production: │ -│ │ │ │ Server (8081) │ │ │ sso.redhat.com │ -│ │ - LLM (config.) │ │ │ │ │ │ -│ │ - A2A protocol │◄-►│ - Advisor tools │ │ │ │ -│ │ - OAuth 2.0 │ │ - Inventory tools│ │ │ │ -│ │ │ │ - Vuln. tools │ │ │ │ -│ └──────────────────┘ └────────┬─────────┘ │ └──────────────────────┘ -│ │ │ -└──────────────────────────────────┼───────────┘ - │ - ▼ - ┌──────────────────┐ - │console.redhat.com│ - │ (Insights APIs) │ - └──────────────────┘ -``` + │ ┌──────────────────────────┐ ┌──────────────┴─────────────┐ + │ │ Handler LB (Optional) │ │ Agent LB (Optional) │ + │ │ ────────────────────── │ │ ──────────────────── │ + │ │ - SSL (handler cert) │ │ - SSL (agent cert) │ + │ │ - Cloud Armor (handler) │ │ - Cloud Armor (agent) │ + │ │ - Default backend only │ │ - Default backend only │ + │ └────────────┬─────────────┘ └────────────┬───────────────┘ + │ │ │ + ▼ ▼ ▼ + ┌──────────────────────────────────┐ ┌──────────────────────────────────┐ + │ Marketplace Handler (Port 8001) │ │ Lightspeed Agent (Port 8000) │ + │ ────────────────────────────── │ │ ────────────────────────────── │ + │ - Always running (minScale=1) │ │ ┌──────────────┐ ┌───────────┐ │ + │ - Pub/Sub events (internal) │ │ │ Lightspeed │ │ MCP │ │ + │ - DCR requests (via LB) │ │ │ Agent │ │ Server │ │ + │ - Entitlement approval │ │ │ (Gemini) │◄►│ (8081) │ │ + │ │ │ │ A2A + OAuth │ │ Advisor │ │ + └───────┬──────────────────┬───────┘ │ └──────────────┘ └─────┬─────┘ │ + │ │ └──────────────────────────┼───────┘ + Shared DB│ │ DCR │ + ▼ ▼ ▼ + ┌────────────┐ ┌──────────────┐ ┌──────────────────┐ + │ PostgreSQL │ │ Red Hat SSO │ │console.redhat.com│ + └────────────┘ │ (GMA SSO API)│ │ (Insights APIs) │ + └──────────────┘ └──────────────────┘ +``` + +Each service can have its own independent load balancer. When a service's LB is enabled (`ENABLE_LB_AGENT=true` or `ENABLE_LB_HANDLER=true`), Cloud Run ingress for that service is restricted to `internal-and-cloud-load-balancing`, meaning external traffic **must** go through its GCLB. Without a LB, external traffic reaches the Cloud Run service directly via its Cloud Run URL. Pub/Sub traffic is always internal Google Cloud traffic and reaches the handler directly, bypassing any load balancer. ### Service Responsibilities @@ -126,6 +127,232 @@ The deployment uses **two separate service accounts** following the principle of Both are created automatically by `setup.sh`. The Pub/Sub Invoker SA is only created when `ENABLE_MARKETPLACE=true` (the default). +## Load Balancer (Optional) + +The deployment scripts can create **independent per-service Google Cloud Load Balancers (GCLB)** — one for the agent and one for the marketplace handler. Each LB is fully self-contained with its own static IP, SSL certificate, Cloud Armor policy, and domain. This is optional — without LBs, services are accessed directly via their Cloud Run URLs. + +### What GCLB Provides + +- **SSL termination** with Google-managed certificates for your custom domains +- **DDoS protection** via Cloud Armor +- **WAF capabilities** (Web Application Firewall) +- **Per-service isolation** — each service has its own independent LB, blast radius is contained +- **Independent WAF policies** — tailor Cloud Armor rules per service (e.g., stricter rules for the agent) + +### Per-Service Architecture + +Each service that has an LB enabled gets its own independent set of resources — there is no shared state between the agent and handler LBs. Each LB has a simple default backend (no path-based routing needed since each fronts a single service). + +Pub/Sub events are internal Google Cloud traffic and reach the marketplace handler directly, bypassing the load balancer. + +### Ingress Restriction + +`deploy.sh` manages Cloud Run ingress for each service based on its LB configuration: + +- **LB enabled** → ingress is set to `internal-and-cloud-load-balancing`. External traffic **must** go through the service's GCLB (direct Cloud Run URLs are blocked from the internet). +- **LB not enabled** → ingress is set to `all`. External traffic reaches the service directly via its Cloud Run URL. + +In both cases: + +- Internal Google Cloud traffic (e.g., Pub/Sub to handler) always reaches services directly +- Health checks from the load balancer are allowed +- Each service's ingress is managed independently — enabling the agent LB does not affect the handler's ingress, and vice versa + +> **Note:** The YAML configs (`service.yaml`, `marketplace-handler.yaml`) default to `internal-and-cloud-load-balancing`. `deploy.sh` overrides this to `all` for any service without an LB. If you deploy using `gcloud run services replace` directly (bypassing `deploy.sh`), set `run.googleapis.com/ingress: all` in the YAML manually when not using a GCLB. + +### Resources Created + +Each enabled LB creates the following resources (all prefixed with `LB_NAME` and the service label, default prefix: `lightspeed-lb`): + +**Agent LB resources** (when `ENABLE_LB_AGENT=true`): + +| Resource | Name | Description | +|----------|------|-------------| +| Global static IP | `{LB_NAME}-agent-ip` | External IP address for agent DNS | +| Serverless NEG | `{LB_NAME}-agent-neg` | Network endpoint group for agent service | +| Backend service | `{LB_NAME}-agent-backend` | Backend for agent NEG | +| URL map | `{LB_NAME}-agent-url-map` | Default backend (agent) | +| SSL certificate | `{LB_NAME}-agent-cert` | Google-managed SSL certificate for agent domain | +| HTTPS target proxy | `{LB_NAME}-agent-https-proxy` | Terminates SSL and forwards to URL map | +| Global forwarding rule | `{LB_NAME}-agent-forwarding-rule` | Maps static IP:443 to HTTPS proxy | + +**Handler LB resources** (when `ENABLE_LB_HANDLER=true`): + +| Resource | Name | Description | +|----------|------|-------------| +| Global static IP | `{LB_NAME}-handler-ip` | External IP address for handler DNS | +| Serverless NEG | `{LB_NAME}-handler-neg` | Network endpoint group for handler service | +| Backend service | `{LB_NAME}-handler-backend` | Backend for handler NEG | +| URL map | `{LB_NAME}-handler-url-map` | Default backend (handler) | +| SSL certificate | `{LB_NAME}-handler-cert` | Google-managed SSL certificate for handler domain | +| HTTPS target proxy | `{LB_NAME}-handler-https-proxy` | Terminates SSL and forwards to URL map | +| Global forwarding rule | `{LB_NAME}-handler-forwarding-rule` | Maps static IP:443 to HTTPS proxy | + +### Configuration + +| Variable | Default | Description | +|----------|---------|-------------| +| `ENABLE_LB_AGENT` | `false` | Enable GCLB for the agent service | +| `AGENT_DOMAIN_NAME` | (required when agent LB enabled) | Domain for the agent's Google-managed SSL certificate (e.g., `agent.example.com`) | +| `ENABLE_LB_HANDLER` | `false` | Enable GCLB for the marketplace handler service | +| `HANDLER_DOMAIN_NAME` | (required when handler LB enabled) | Domain for the handler's Google-managed SSL certificate (e.g., `dcr.example.com`) | +| `ENABLE_CLOUD_ARMOR_AGENT` | `false` | Enable Cloud Armor WAF for the agent LB (requires `ENABLE_LB_AGENT=true`) | +| `ENABLE_CLOUD_ARMOR_HANDLER` | `false` | Enable Cloud Armor WAF for the handler LB (requires `ENABLE_LB_HANDLER=true`) | +| `LB_NAME` | `lightspeed-lb` | Prefix for all load balancer resource names | + +### DNS Setup + +After `setup.sh` reserves the static IPs, create DNS A records for each enabled LB: + +1. Get the static IP addresses: + ```bash + # Agent LB IP (if ENABLE_LB_AGENT=true) + gcloud compute addresses describe ${LB_NAME:-lightspeed-lb}-agent-ip \ + --global \ + --project=$GOOGLE_CLOUD_PROJECT \ + --format='value(address)' + + # Handler LB IP (if ENABLE_LB_HANDLER=true) + gcloud compute addresses describe ${LB_NAME:-lightspeed-lb}-handler-ip \ + --global \ + --project=$GOOGLE_CLOUD_PROJECT \ + --format='value(address)' + ``` + +2. Create A records in your DNS provider: + ``` + agent.example.com. A + dcr.example.com. A + ``` + +3. Verify DNS propagation: + ```bash + dig +short $AGENT_DOMAIN_NAME + dig +short $HANDLER_DOMAIN_NAME + ``` + +### SSL Certificate Provisioning + +Google-managed SSL certificates require each domain to resolve to its respective static IP before provisioning begins. Certificate provisioning typically takes **15 to 60 minutes** after DNS is correctly configured. + +Check certificate status: + +```bash +# Agent certificate +gcloud compute ssl-certificates describe ${LB_NAME:-lightspeed-lb}-agent-cert \ + --global \ + --project=$GOOGLE_CLOUD_PROJECT \ + --format='value(managed.status)' + +# Handler certificate +gcloud compute ssl-certificates describe ${LB_NAME:-lightspeed-lb}-handler-cert \ + --global \ + --project=$GOOGLE_CLOUD_PROJECT \ + --format='value(managed.status)' +``` + +Each certificate goes through these states: `PROVISIONING` → `ACTIVE`. HTTPS traffic will not work for a service until its certificate reaches `ACTIVE` status. + +### Cloud Armor (WAF) + +Each service can have its own independent Cloud Armor security policy. Set `ENABLE_CLOUD_ARMOR_AGENT=true` (requires `ENABLE_LB_AGENT=true`) and/or `ENABLE_CLOUD_ARMOR_HANDLER=true` (requires `ENABLE_LB_HANDLER=true`) to create **Google Cloud Armor** security policies. Cloud Armor provides: + +- **Web Application Firewall (WAF)** with preconfigured OWASP ModSecurity Core Rule Set (CRS) rules +- **DDoS mitigation** at the edge via Google's global infrastructure +- **Layer 7 filtering** to block common web attacks before they reach your services +- **Independent policies per service** — tailor WAF rules for each service's traffic patterns + +#### Why Enable WAF + +Without a GCLB in front of a Cloud Run service, there is no WAF layer — all HTTP traffic reaches your application directly. Cloud Armor WAF is only available through GCLB, so enabling a per-service load balancer is the only way to get WAF protection on Cloud Run. + +WAF enforcement applies the [OWASP ModSecurity Core Rule Set (CRS)](https://owasp.org/www-project-modsecurity-core-rule-set/) at the edge, blocking malicious requests **before they reach your application**. This provides defense-in-depth: even if the application has an undiscovered vulnerability, the WAF catches common exploit patterns at the network edge. + +Per-service policies allow independent tuning for each service's traffic profile. For example, the agent processes free-form A2A JSON-RPC payloads where users may ask about SQL queries or include HTML-like text — aggressive SQL injection rules could cause false positives. The marketplace handler receives structured DCR JSON from Gemini Enterprise — stricter rules are appropriate. Independent policies let you tune each service without compromise. + +#### Enabling Cloud Armor + +```bash +# Enable LBs with Cloud Armor for both services +export ENABLE_LB_AGENT=true +export AGENT_DOMAIN_NAME="agent.example.com" +export ENABLE_CLOUD_ARMOR_AGENT=true + +export ENABLE_LB_HANDLER=true +export HANDLER_DOMAIN_NAME="dcr.example.com" +export ENABLE_CLOUD_ARMOR_HANDLER=true + +./deploy/cloudrun/deploy.sh +``` + +#### Preconfigured WAF Rules + +Each security policy includes the following OWASP ModSecurity CRS rules, each configured to deny matching requests with HTTP 403. The rules map to [OWASP Top 10 (2021)](https://owasp.org/Top10/) categories: + +| Priority | Rule | OWASP Category | What It Blocks | +|----------|------|----------------|----------------| +| 1000 | `sqli-v33-stable` | A03:2021 Injection | SQL injection attempts in query parameters, headers, and request body. Detects patterns like `' OR 1=1`, `UNION SELECT`, and encoded variants. | +| 1100 | `xss-v33-stable` | A03:2021 Injection | Cross-site scripting via `