From 90c28b96e537966bee1d21927604bc00fda2b4e0 Mon Sep 17 00:00:00 2001 From: Arthas <15215604969@163.com> Date: Sat, 30 May 2026 21:44:46 +0800 Subject: [PATCH] feat: add Gemini OpenAI compatibility --- .gitignore | 1 + .../internal/handler/admin/account_data.go | 24 + .../admin/account_data_handler_test.go | 73 ++ backend/internal/handler/endpoint.go | 7 +- backend/internal/handler/endpoint_test.go | 5 + .../gemini_openai_compatible_handler.go | 406 ++++++ .../gemini_openai_compatible_handler_test.go | 94 ++ .../pkg/apicompat/anthropic_responses_test.go | 43 + .../chatcompletions_responses_test.go | 65 + .../apicompat/chatcompletions_to_responses.go | 45 +- .../responses_to_anthropic_request.go | 16 + backend/internal/pkg/apicompat/types.go | 28 +- backend/internal/server/routes/gateway.go | 34 + .../internal/server/routes/gateway_test.go | 47 + .../service/gemini_messages_compat_service.go | 15 + .../gemini_messages_compat_service_test.go | 91 ++ .../service/gemini_openai_embeddings.go | 352 +++++ .../service/gemini_openai_embeddings_test.go | 184 +++ .../internal/service/gemini_openai_images.go | 375 ++++++ .../service/gemini_openai_images_test.go | 193 +++ .../internal/service/pricing_service_test.go | 6 +- ...gemini-openai-compatible-implementation.md | 1158 +++++++++++++++++ ...6-05-29-gemini-openai-compatible-design.md | 265 ++++ frontend/src/components/keys/UseKeyModal.vue | 62 +- .../keys/__tests__/UseKeyModal.spec.ts | 58 + frontend/src/i18n/locales/en.ts | 2 + frontend/src/i18n/locales/zh.ts | 2 + 27 files changed, 3634 insertions(+), 17 deletions(-) create mode 100644 backend/internal/handler/gemini_openai_compatible_handler.go create mode 100644 backend/internal/handler/gemini_openai_compatible_handler_test.go create mode 100644 backend/internal/service/gemini_openai_embeddings.go create mode 100644 backend/internal/service/gemini_openai_embeddings_test.go create mode 100644 backend/internal/service/gemini_openai_images.go create mode 100644 backend/internal/service/gemini_openai_images_test.go create mode 100644 docs/superpowers/plans/2026-05-29-gemini-openai-compatible-implementation.md create mode 100644 docs/superpowers/specs/2026-05-29-gemini-openai-compatible-design.md diff --git a/.gitignore b/.gitignore index cf251f0715a..0fe4b810a8c 100644 --- a/.gitignore +++ b/.gitignore @@ -133,6 +133,7 @@ docs/* !docs/ADMIN_PAYMENT_INTEGRATION_API.md .serena/ .codex/ +.worktrees/ frontend/coverage/ aicodex output/ diff --git a/backend/internal/handler/admin/account_data.go b/backend/internal/handler/admin/account_data.go index 50beadf68e6..9761522efc6 100644 --- a/backend/internal/handler/admin/account_data.go +++ b/backend/internal/handler/admin/account_data.go @@ -2,6 +2,7 @@ package admin import ( "context" + "encoding/json" "errors" "fmt" "strconv" @@ -66,6 +67,29 @@ type DataImportRequest struct { SkipDefaultGroupBind *bool `json:"skip_default_group_bind"` } +func (r *DataImportRequest) UnmarshalJSON(data []byte) error { + var wrapped struct { + Data DataPayload `json:"data"` + SkipDefaultGroupBind *bool `json:"skip_default_group_bind"` + } + if err := json.Unmarshal(data, &wrapped); err != nil { + return err + } + if wrapped.Data.Accounts != nil || wrapped.Data.Proxies != nil || wrapped.Data.ExportedAt != "" || wrapped.Data.Type != "" || wrapped.Data.Version != 0 { + r.Data = wrapped.Data + r.SkipDefaultGroupBind = wrapped.SkipDefaultGroupBind + return nil + } + + var payload DataPayload + if err := json.Unmarshal(data, &payload); err != nil { + return err + } + r.Data = payload + r.SkipDefaultGroupBind = wrapped.SkipDefaultGroupBind + return nil +} + type DataImportResult struct { ProxyCreated int `json:"proxy_created"` ProxyReused int `json:"proxy_reused"` diff --git a/backend/internal/handler/admin/account_data_handler_test.go b/backend/internal/handler/admin/account_data_handler_test.go index 5793983cba3..ff486ef7d55 100644 --- a/backend/internal/handler/admin/account_data_handler_test.go +++ b/backend/internal/handler/admin/account_data_handler_test.go @@ -5,6 +5,7 @@ import ( "encoding/json" "net/http" "net/http/httptest" + "os" "testing" "github.com/Wei-Shaw/sub2api/internal/service" @@ -275,3 +276,75 @@ func TestImportDataReusesProxyAndSkipsDefaultGroup(t *testing.T) { require.Len(t, adminSvc.createdAccounts, 1) require.True(t, adminSvc.createdAccounts[0].SkipDefaultGroupBind) } + +func TestImportDataAcceptsTopLevelExportPayload(t *testing.T) { + router, adminSvc := setupAccountDataRouter() + + dataPayload := map[string]any{ + "exported_at": "2026-05-29T10:09:22Z", + "proxies": []map[string]any{}, + "accounts": []map[string]any{ + { + "name": "agora1", + "platform": service.PlatformGemini, + "type": service.AccountTypeOAuth, + "concurrency": 0, + "priority": 0, + "auto_pause_on_expired": true, + "credentials": map[string]any{ + "_token_version": 2, + "access_token": "access-token", + "refresh_token": "refresh-token", + "token_type": "Bearer", + "expires_at": float64(1790000000), + "oauth_type": "gemini", + "scope": "https://www.googleapis.com/auth/cloud-platform", + "tier_id": "free-tier", + "project_id": "gemini-project", + }, + }, + }, + } + + body, _ := json.Marshal(dataPayload) + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, "/api/v1/admin/accounts/data", bytes.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + router.ServeHTTP(rec, req) + require.Equal(t, http.StatusOK, rec.Code) + + require.Len(t, adminSvc.createdAccounts, 1) + created := adminSvc.createdAccounts[0] + require.Equal(t, "agora1", created.Name) + require.Equal(t, service.PlatformGemini, created.Platform) + require.Equal(t, service.AccountTypeOAuth, created.Type) + require.True(t, created.SkipDefaultGroupBind) + require.Equal(t, "refresh-token", created.Credentials["refresh_token"]) + require.Equal(t, "gemini-project", created.Credentials["project_id"]) +} + +func TestImportDataAcceptsLocalGeminiExportFixture(t *testing.T) { + fixturePath := os.Getenv("SUB2API_ACCOUNT_IMPORT_FIXTURE") + if fixturePath == "" { + t.Skip("SUB2API_ACCOUNT_IMPORT_FIXTURE is not set") + } + + body, err := os.ReadFile(fixturePath) + require.NoError(t, err) + + router, adminSvc := setupAccountDataRouter() + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, "/api/v1/admin/accounts/data", bytes.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + router.ServeHTTP(rec, req) + require.Equal(t, http.StatusOK, rec.Code) + + require.Len(t, adminSvc.createdAccounts, 10) + for _, created := range adminSvc.createdAccounts { + require.Equal(t, service.PlatformGemini, created.Platform) + require.Equal(t, service.AccountTypeOAuth, created.Type) + require.True(t, created.SkipDefaultGroupBind) + require.NotEmpty(t, created.Credentials["refresh_token"]) + require.NotEmpty(t, created.Credentials["project_id"]) + } +} diff --git a/backend/internal/handler/endpoint.go b/backend/internal/handler/endpoint.go index 0d6f4b3cf64..2ce7165ebd9 100644 --- a/backend/internal/handler/endpoint.go +++ b/backend/internal/handler/endpoint.go @@ -43,9 +43,9 @@ const ( func NormalizeInboundEndpoint(path string) string { path = strings.TrimSpace(path) switch { - case strings.Contains(path, EndpointEmbeddings): + case strings.Contains(path, EndpointEmbeddings) || strings.Contains(path, "/embeddings"): return EndpointEmbeddings - case strings.Contains(path, EndpointChatCompletions): + case strings.Contains(path, EndpointChatCompletions) || strings.Contains(path, "/chat/completions"): return EndpointChatCompletions case strings.Contains(path, EndpointMessages): return EndpointMessages @@ -92,6 +92,9 @@ func DeriveUpstreamEndpoint(inbound, rawRequestPath, platform string) string { return EndpointMessages case service.PlatformGemini: + if inbound == EndpointEmbeddings || inbound == EndpointImagesGenerations || inbound == EndpointImagesEdits { + return inbound + } return EndpointGeminiModels case service.PlatformAntigravity: diff --git a/backend/internal/handler/endpoint_test.go b/backend/internal/handler/endpoint_test.go index 42b6d6e71b9..b8110d0eaba 100644 --- a/backend/internal/handler/endpoint_test.go +++ b/backend/internal/handler/endpoint_test.go @@ -29,6 +29,9 @@ func TestNormalizeInboundEndpoint(t *testing.T) { {"/v1/images/generations", EndpointImagesGenerations}, {"/v1/images/edits", EndpointImagesEdits}, {"/v1beta/models", EndpointGeminiModels}, + {"/v1beta/openai/chat/completions", EndpointChatCompletions}, + {"/v1beta/openai/embeddings", EndpointEmbeddings}, + {"/v1beta/openai/images/generations", EndpointImagesGenerations}, // Prefixed paths (antigravity, openai). {"/antigravity/v1/messages", EndpointMessages}, @@ -71,6 +74,8 @@ func TestDeriveUpstreamEndpoint(t *testing.T) { // Gemini. {"gemini models", EndpointGeminiModels, "/v1beta/models/gemini:gen", service.PlatformGemini, EndpointGeminiModels}, + {"gemini openai embeddings", EndpointEmbeddings, "/v1beta/openai/embeddings", service.PlatformGemini, EndpointEmbeddings}, + {"gemini openai image generations", EndpointImagesGenerations, "/v1beta/openai/images/generations", service.PlatformGemini, EndpointImagesGenerations}, // OpenAI — always /v1/responses. {"openai responses root", EndpointResponses, "/v1/responses", service.PlatformOpenAI, EndpointResponses}, diff --git a/backend/internal/handler/gemini_openai_compatible_handler.go b/backend/internal/handler/gemini_openai_compatible_handler.go new file mode 100644 index 00000000000..9f8877e8fd8 --- /dev/null +++ b/backend/internal/handler/gemini_openai_compatible_handler.go @@ -0,0 +1,406 @@ +package handler + +import ( + "context" + "errors" + "net/http" + "strconv" + "strings" + "time" + + "github.com/Wei-Shaw/sub2api/internal/pkg/gemini" + pkghttputil "github.com/Wei-Shaw/sub2api/internal/pkg/httputil" + "github.com/Wei-Shaw/sub2api/internal/pkg/ip" + "github.com/Wei-Shaw/sub2api/internal/pkg/logger" + servermiddleware "github.com/Wei-Shaw/sub2api/internal/server/middleware" + "github.com/Wei-Shaw/sub2api/internal/service" + "github.com/gin-gonic/gin" + "github.com/tidwall/gjson" + "go.uber.org/zap" +) + +type openAICompatModelList struct { + Object string `json:"object"` + Data []openAICompatModelObject `json:"data"` +} + +type openAICompatModelObject struct { + ID string `json:"id"` + Object string `json:"object"` + Created int64 `json:"created"` + OwnedBy string `json:"owned_by"` +} + +type geminiOpenAICompatibleUnaryOptions struct { + Component string + RequestType int16 + BeforeForward func(*gin.Context, *service.APIKey, servermiddleware.AuthSubject, string, []byte) bool + Forward func(context.Context, *gin.Context, *service.Account, []byte) (*service.ForwardResult, error) +} + +func ensureGeminiOpenAICompatibleGroup(c *gin.Context) bool { + apiKey, ok := servermiddleware.GetAPIKeyFromContext(c) + if !ok || apiKey == nil || apiKey.Group == nil || apiKey.Group.Platform != service.PlatformGemini { + geminiOpenAICompatError(c, http.StatusBadRequest, "invalid_request_error", "The /v1beta/openai compatibility endpoint requires a Gemini group") + return false + } + return true +} + +func geminiOpenAICompatError(c *gin.Context, status int, errType string, message string) { + c.JSON(status, gin.H{ + "error": gin.H{ + "type": errType, + "message": message, + }, + }) + c.Abort() +} + +func geminiModelNameToOpenAIID(name string) string { + name = strings.TrimSpace(name) + name = strings.TrimPrefix(name, "models/") + return name +} + +func geminiModelsToOpenAIModelList(src gemini.ModelsListResponse) openAICompatModelList { + out := openAICompatModelList{ + Object: "list", + Data: make([]openAICompatModelObject, 0, len(src.Models)), + } + for _, model := range src.Models { + id := geminiModelNameToOpenAIID(model.Name) + if id == "" { + continue + } + out.Data = append(out.Data, openAICompatModelObject{ + ID: id, + Object: "model", + Created: 0, + OwnedBy: "google", + }) + } + return out +} + +func geminiModelToOpenAIModelObject(model string) openAICompatModelObject { + return openAICompatModelObject{ + ID: geminiModelNameToOpenAIID(model), + Object: "model", + Created: 0, + OwnedBy: "google", + } +} + +func (h *GatewayHandler) GeminiOpenAICompatibleModels(c *gin.Context) { + if !ensureGeminiOpenAICompatibleGroup(c) { + return + } + c.JSON(http.StatusOK, geminiModelsToOpenAIModelList(gemini.FallbackModelsList())) +} + +func (h *GatewayHandler) GeminiOpenAICompatibleGetModel(c *gin.Context) { + if !ensureGeminiOpenAICompatibleGroup(c) { + return + } + model := strings.TrimSpace(c.Param("model")) + if model == "" { + geminiOpenAICompatError(c, http.StatusBadRequest, "invalid_request_error", "model is required") + return + } + c.JSON(http.StatusOK, geminiModelToOpenAIModelObject(model)) +} + +func (h *GatewayHandler) GeminiOpenAICompatibleEmbeddings(c *gin.Context) { + h.handleGeminiOpenAICompatibleUnary(c, geminiOpenAICompatibleUnaryOptions{ + Component: "handler.gemini_openai.embeddings", + RequestType: int16(service.RequestTypeSync), + Forward: func(ctx context.Context, c *gin.Context, account *service.Account, body []byte) (*service.ForwardResult, error) { + return h.geminiCompatService.ForwardOpenAICompatibleEmbeddings(ctx, c, account, body) + }, + }) +} + +func (h *GatewayHandler) GeminiOpenAICompatibleImagesGenerations(c *gin.Context) { + h.handleGeminiOpenAICompatibleUnary(c, geminiOpenAICompatibleUnaryOptions{ + Component: "handler.gemini_openai.images", + RequestType: int16(service.RequestTypeSync), + BeforeForward: func(c *gin.Context, apiKey *service.APIKey, subject servermiddleware.AuthSubject, model string, body []byte) bool { + if !service.GroupAllowsImageGeneration(apiKey.Group) { + geminiOpenAICompatError(c, http.StatusForbidden, "permission_error", service.ImageGenerationPermissionMessage()) + return false + } + reqLog := requestLogger( + c, + "handler.gemini_openai.images", + zap.Int64("user_id", subject.UserID), + zap.Int64("api_key_id", apiKey.ID), + zap.Any("group_id", apiKey.GroupID), + zap.String("model", model), + ) + if decision := h.checkContentModeration(c, reqLog, apiKey, subject, service.ContentModerationProtocolOpenAIImages, model, body); decision != nil && decision.Blocked { + geminiOpenAICompatError(c, contentModerationStatus(decision), contentModerationErrorCode(decision), decision.Message) + return false + } + return true + }, + Forward: func(ctx context.Context, c *gin.Context, account *service.Account, body []byte) (*service.ForwardResult, error) { + return h.geminiCompatService.ForwardOpenAICompatibleImagesGenerations(ctx, c, account, body) + }, + }) +} + +func (h *GatewayHandler) GeminiOpenAICompatibleUnsupported(c *gin.Context) { + geminiOpenAICompatError(c, http.StatusNotFound, "invalid_request_error", "Unsupported endpoint for Gemini OpenAI compatibility") +} + +func (h *GatewayHandler) handleGeminiOpenAICompatibleUnary(c *gin.Context, opts geminiOpenAICompatibleUnaryOptions) { + requestStart := time.Now() + if !ensureGeminiOpenAICompatibleGroup(c) { + return + } + + apiKey, ok := servermiddleware.GetAPIKeyFromContext(c) + if !ok || apiKey == nil { + geminiOpenAICompatError(c, http.StatusUnauthorized, "authentication_error", "Invalid API key") + return + } + subject, ok := servermiddleware.GetAuthSubjectFromContext(c) + if !ok { + geminiOpenAICompatError(c, http.StatusInternalServerError, "api_error", "User context not found") + return + } + reqLog := requestLogger( + c, + opts.Component, + zap.Int64("user_id", subject.UserID), + zap.Int64("api_key_id", apiKey.ID), + zap.Any("group_id", apiKey.GroupID), + ) + + body, err := pkghttputil.ReadRequestBodyWithPrealloc(c.Request) + if err != nil { + if maxErr, ok := extractMaxBytesError(err); ok { + geminiOpenAICompatError(c, http.StatusRequestEntityTooLarge, "invalid_request_error", buildBodyTooLargeMessage(maxErr.Limit)) + return + } + geminiOpenAICompatError(c, http.StatusBadRequest, "invalid_request_error", "Failed to read request body") + return + } + if len(body) == 0 { + geminiOpenAICompatError(c, http.StatusBadRequest, "invalid_request_error", "Request body is empty") + return + } + if !gjson.ValidBytes(body) { + geminiOpenAICompatError(c, http.StatusBadRequest, "invalid_request_error", "Failed to parse request body") + return + } + + modelResult := gjson.GetBytes(body, "model") + if !modelResult.Exists() || modelResult.Type != gjson.String || strings.TrimSpace(modelResult.String()) == "" { + geminiOpenAICompatError(c, http.StatusBadRequest, "invalid_request_error", "model is required") + return + } + reqModel := modelResult.String() + reqLog = reqLog.With(zap.String("model", reqModel)) + setOpsRequestContext(c, reqModel, false) + setOpsEndpointContext(c, "", opts.RequestType) + + if opts.BeforeForward != nil && !opts.BeforeForward(c, apiKey, subject, reqModel, body) { + return + } + + if h.geminiCompatService == nil { + geminiOpenAICompatError(c, http.StatusBadGateway, "upstream_error", "Gemini compatibility service is not configured") + return + } + + channelMapping, _ := h.gatewayService.ResolveChannelMappingAndRestrict(c.Request.Context(), apiKey.GroupID, reqModel) + + if h.errorPassthroughService != nil { + service.BindErrorPassthroughService(c, h.errorPassthroughService) + } + subscription, _ := servermiddleware.GetSubscriptionFromContext(c) + service.SetOpsLatencyMs(c, service.OpsAuthLatencyMsKey, time.Since(requestStart).Milliseconds()) + + streamStarted := false + maxWait := service.CalculateMaxWait(subject.Concurrency) + canWait, err := h.concurrencyHelper.IncrementWaitCount(c.Request.Context(), subject.UserID, maxWait) + waitCounted := false + if err != nil { + reqLog.Warn("gemini_openai.user_wait_counter_increment_failed", zap.Error(err)) + } else if !canWait { + geminiOpenAICompatError(c, http.StatusTooManyRequests, "rate_limit_error", "Too many pending requests, please retry later") + return + } + if err == nil && canWait { + waitCounted = true + } + defer func() { + if waitCounted { + h.concurrencyHelper.DecrementWaitCount(c.Request.Context(), subject.UserID) + } + }() + + userReleaseFunc, err := h.concurrencyHelper.AcquireUserSlotWithWait(c, subject.UserID, subject.Concurrency, false, &streamStarted) + if err != nil { + reqLog.Warn("gemini_openai.user_slot_acquire_failed", zap.Error(err)) + h.handleConcurrencyError(c, err, "user", streamStarted) + return + } + if waitCounted { + h.concurrencyHelper.DecrementWaitCount(c.Request.Context(), subject.UserID) + waitCounted = false + } + userReleaseFunc = wrapReleaseOnDone(c.Request.Context(), userReleaseFunc) + if userReleaseFunc != nil { + defer userReleaseFunc() + } + + if err := h.billingCacheService.CheckBillingEligibility(c.Request.Context(), apiKey.User, apiKey, apiKey.Group, subscription, service.QuotaPlatform(c.Request.Context(), apiKey)); err != nil { + reqLog.Info("gemini_openai.billing_check_failed", zap.Error(err)) + status, code, message, retryAfter := billingErrorDetails(err) + if retryAfter > 0 { + c.Header("Retry-After", strconv.Itoa(retryAfter)) + } + geminiOpenAICompatError(c, status, code, message) + return + } + + fs := NewFailoverState(h.maxAccountSwitchesGemini, false) + routingStart := time.Now() + + for { + selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, "", reqModel, fs.FailedAccountIDs, "", int64(0)) + if err != nil { + if len(fs.FailedAccountIDs) == 0 { + markOpsRoutingCapacityLimitedIfNoAvailable(c, err) + geminiOpenAICompatError(c, http.StatusServiceUnavailable, "api_error", "No available Gemini accounts: "+err.Error()) + return + } + action := fs.HandleSelectionExhausted(c.Request.Context()) + switch action { + case FailoverContinue: + continue + case FailoverCanceled: + return + default: + if fs.LastFailoverErr != nil { + h.handleCCFailoverExhausted(c, fs.LastFailoverErr, streamStarted) + } else { + geminiOpenAICompatError(c, http.StatusBadGateway, "server_error", "All available Gemini accounts exhausted") + } + return + } + } + account := selection.Account + setOpsSelectedAccount(c, account.ID, account.Platform) + + accountReleaseFunc := selection.ReleaseFunc + if !selection.Acquired { + if selection.WaitPlan == nil { + markOpsRoutingCapacityLimited(c) + geminiOpenAICompatError(c, http.StatusServiceUnavailable, "api_error", "No available Gemini accounts") + return + } + accountReleaseFunc, err = h.concurrencyHelper.AcquireAccountSlotWithWaitTimeout( + c, + account.ID, + selection.WaitPlan.MaxConcurrency, + selection.WaitPlan.Timeout, + false, + &streamStarted, + ) + if err != nil { + reqLog.Warn("gemini_openai.account_slot_acquire_failed", zap.Int64("account_id", account.ID), zap.Error(err)) + h.handleConcurrencyError(c, err, "account", streamStarted) + return + } + } + accountReleaseFunc = wrapReleaseOnDone(c.Request.Context(), accountReleaseFunc) + + if account.Platform != service.PlatformGemini { + if accountReleaseFunc != nil { + accountReleaseFunc() + } + fs.FailedAccountIDs[account.ID] = struct{}{} + continue + } + + service.SetOpsLatencyMs(c, service.OpsRoutingLatencyMsKey, time.Since(routingStart).Milliseconds()) + forwardBody := body + if channelMapping.Mapped { + forwardBody = h.gatewayService.ReplaceModelInBody(body, channelMapping.MappedModel) + } + writerSizeBeforeForward := c.Writer.Size() + forwardStart := time.Now() + result, err := opts.Forward(c.Request.Context(), c, account, forwardBody) + forwardDurationMs := time.Since(forwardStart).Milliseconds() + upstreamLatencyMs, _ := getContextInt64(c, service.OpsUpstreamLatencyMsKey) + responseLatencyMs := forwardDurationMs + if upstreamLatencyMs > 0 && forwardDurationMs > upstreamLatencyMs { + responseLatencyMs = forwardDurationMs - upstreamLatencyMs + } + service.SetOpsLatencyMs(c, service.OpsResponseLatencyMsKey, responseLatencyMs) + if accountReleaseFunc != nil { + accountReleaseFunc() + } + + if err != nil { + var failoverErr *service.UpstreamFailoverError + if errors.As(err, &failoverErr) { + if c.Writer.Size() != writerSizeBeforeForward { + h.handleCCFailoverExhausted(c, failoverErr, true) + return + } + action := fs.HandleFailoverError(c.Request.Context(), h.gatewayService, account.ID, account.Platform, failoverErr) + switch action { + case FailoverContinue: + continue + case FailoverExhausted: + h.handleCCFailoverExhausted(c, fs.LastFailoverErr, streamStarted) + return + case FailoverCanceled: + return + } + } + h.ensureForwardErrorResponse(c, streamStarted) + reqLog.Error("gemini_openai.forward_failed", zap.Int64("account_id", account.ID), zap.Error(err)) + return + } + + userAgent := c.GetHeader("User-Agent") + clientIP := ip.GetClientIP(c) + requestPayloadHash := service.HashUsageRequestPayload(body) + inboundEndpoint := GetInboundEndpoint(c) + upstreamEndpoint := GetUpstreamEndpoint(c, account.Platform) + quotaPlatform := service.QuotaPlatform(c.Request.Context(), apiKey) + h.submitUsageRecordTask(c.Request.Context(), func(ctx context.Context) { + if err := h.gatewayService.RecordUsage(ctx, &service.RecordUsageInput{ + Result: result, + QuotaPlatform: quotaPlatform, + APIKey: apiKey, + User: apiKey.User, + Account: account, + Subscription: subscription, + InboundEndpoint: inboundEndpoint, + UpstreamEndpoint: upstreamEndpoint, + UserAgent: userAgent, + IPAddress: clientIP, + RequestPayloadHash: requestPayloadHash, + APIKeyService: h.apiKeyService, + ChannelUsageFields: channelMapping.ToUsageFields(reqModel, result.UpstreamModel), + }); err != nil { + logger.L().With( + zap.String("component", opts.Component), + zap.Int64("user_id", subject.UserID), + zap.Int64("api_key_id", apiKey.ID), + zap.Any("group_id", apiKey.GroupID), + zap.String("model", reqModel), + zap.Int64("account_id", account.ID), + ).Error("gemini_openai.record_usage_failed", zap.Error(err)) + } + }) + return + } +} diff --git a/backend/internal/handler/gemini_openai_compatible_handler_test.go b/backend/internal/handler/gemini_openai_compatible_handler_test.go new file mode 100644 index 00000000000..91cc9579284 --- /dev/null +++ b/backend/internal/handler/gemini_openai_compatible_handler_test.go @@ -0,0 +1,94 @@ +package handler + +import ( + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/Wei-Shaw/sub2api/internal/pkg/gemini" + servermiddleware "github.com/Wei-Shaw/sub2api/internal/server/middleware" + "github.com/Wei-Shaw/sub2api/internal/service" + "github.com/gin-gonic/gin" + "github.com/stretchr/testify/require" +) + +func TestGeminiOpenAICompatibleModelsUsesOpenAIShape(t *testing.T) { + got := geminiModelsToOpenAIModelList(gemini.FallbackModelsList()) + + require.Equal(t, "list", got.Object) + require.NotEmpty(t, got.Data) + require.Equal(t, "model", got.Data[0].Object) + require.Equal(t, "google", got.Data[0].OwnedBy) + require.NotContains(t, got.Data[0].ID, "models/") +} + +func TestGeminiOpenAICompatibleRejectsNonGeminiGroup(t *testing.T) { + gin.SetMode(gin.TestMode) + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Request = httptest.NewRequest(http.MethodGet, "/v1beta/openai/models", nil) + groupID := int64(1) + c.Set(string(servermiddleware.ContextKeyAPIKey), &service.APIKey{ + GroupID: &groupID, + Group: &service.Group{Platform: service.PlatformOpenAI}, + }) + + ok := ensureGeminiOpenAICompatibleGroup(c) + + require.False(t, ok) + require.Equal(t, http.StatusBadRequest, w.Code) + require.Contains(t, w.Body.String(), "requires a Gemini group") +} + +func TestGeminiOpenAICompatibleUnsupportedUsesOpenAIErrorShape(t *testing.T) { + gin.SetMode(gin.TestMode) + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Request = httptest.NewRequest(http.MethodPost, "/v1beta/openai/videos", nil) + + (&GatewayHandler{}).GeminiOpenAICompatibleUnsupported(c) + + require.Equal(t, http.StatusNotFound, w.Code) + require.JSONEq(t, `{"error":{"type":"invalid_request_error","message":"Unsupported endpoint for Gemini OpenAI compatibility"}}`, w.Body.String()) +} + +func TestGeminiOpenAICompatibleEmbeddingsValidatesModel(t *testing.T) { + gin.SetMode(gin.TestMode) + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Request = httptest.NewRequest(http.MethodPost, "/v1beta/openai/embeddings", strings.NewReader(`{"input":"hello"}`)) + c.Request.Header.Set("Content-Type", "application/json") + groupID := int64(1) + c.Set(string(servermiddleware.ContextKeyAPIKey), &service.APIKey{ + ID: 10, + GroupID: &groupID, + Group: &service.Group{Platform: service.PlatformGemini}, + }) + c.Set(string(servermiddleware.ContextKeyUser), servermiddleware.AuthSubject{UserID: 20, Concurrency: 1}) + + (&GatewayHandler{}).GeminiOpenAICompatibleEmbeddings(c) + + require.Equal(t, http.StatusBadRequest, w.Code) + require.Contains(t, w.Body.String(), "model is required") +} + +func TestGeminiOpenAICompatibleImagesRequireGroupPermission(t *testing.T) { + gin.SetMode(gin.TestMode) + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Request = httptest.NewRequest(http.MethodPost, "/v1beta/openai/images/generations", strings.NewReader(`{"model":"gemini-2.5-flash-image","prompt":"draw"}`)) + c.Request.Header.Set("Content-Type", "application/json") + groupID := int64(1) + c.Set(string(servermiddleware.ContextKeyAPIKey), &service.APIKey{ + ID: 11, + GroupID: &groupID, + Group: &service.Group{Platform: service.PlatformGemini, AllowImageGeneration: false}, + }) + c.Set(string(servermiddleware.ContextKeyUser), servermiddleware.AuthSubject{UserID: 21, Concurrency: 1}) + + (&GatewayHandler{}).GeminiOpenAICompatibleImagesGenerations(c) + + require.Equal(t, http.StatusForbidden, w.Code) + require.Contains(t, w.Body.String(), service.ImageGenerationPermissionMessage()) +} diff --git a/backend/internal/pkg/apicompat/anthropic_responses_test.go b/backend/internal/pkg/apicompat/anthropic_responses_test.go index 8997835c2aa..e916d0a6c76 100644 --- a/backend/internal/pkg/apicompat/anthropic_responses_test.go +++ b/backend/internal/pkg/apicompat/anthropic_responses_test.go @@ -1256,6 +1256,49 @@ func TestResponsesToAnthropicRequest_ToolChoiceLegacyFunctionName(t *testing.T) assert.Equal(t, "get_weather", tc["name"]) } +func TestResponsesToAnthropicRequest_UserInputAudioBlock(t *testing.T) { + req := &ResponsesRequest{ + Model: "gemini-3-flash-preview", + Input: json.RawMessage(`[ + {"role":"user","content":[ + {"type":"input_text","text":"Listen to this"}, + {"type":"input_audio","input_audio":{"data":"UklGRg==","format":"wav"}} + ]} + ]`), + } + + resp, err := ResponsesToAnthropicRequest(req) + require.NoError(t, err) + require.Len(t, resp.Messages, 1) + + var blocks []map[string]any + require.NoError(t, json.Unmarshal(resp.Messages[0].Content, &blocks)) + require.Len(t, blocks, 2) + assert.Equal(t, "text", blocks[0]["type"]) + assert.Equal(t, "Listen to this", blocks[0]["text"]) + assert.Equal(t, "input_audio", blocks[1]["type"]) + source, ok := blocks[1]["source"].(map[string]any) + require.True(t, ok) + assert.Equal(t, "base64", source["type"]) + assert.Equal(t, "audio/wav", source["media_type"]) + assert.Equal(t, "UklGRg==", source["data"]) +} + +func TestResponsesToAnthropicRequest_UserInputAudioInvalidFormat(t *testing.T) { + req := &ResponsesRequest{ + Model: "gemini-3-flash-preview", + Input: json.RawMessage(`[ + {"role":"user","content":[ + {"type":"input_audio","input_audio":{"data":"UklGRg==","format":"webm"}} + ]} + ]`), + } + + _, err := ResponsesToAnthropicRequest(req) + require.Error(t, err) + assert.Contains(t, err.Error(), "unsupported input_audio format") +} + // --------------------------------------------------------------------------- // Image content block conversion tests // --------------------------------------------------------------------------- diff --git a/backend/internal/pkg/apicompat/chatcompletions_responses_test.go b/backend/internal/pkg/apicompat/chatcompletions_responses_test.go index b03b012fc7a..2018bf1ec41 100644 --- a/backend/internal/pkg/apicompat/chatcompletions_responses_test.go +++ b/backend/internal/pkg/apicompat/chatcompletions_responses_test.go @@ -181,6 +181,71 @@ func TestChatCompletionsToResponses_ImageURL(t *testing.T) { assert.Equal(t, "data:image/png;base64,abc123", parts[1].ImageURL) } +func TestChatCompletionsToResponses_InputAudio(t *testing.T) { + content := `[{"type":"text","text":"Transcribe this"},{"type":"input_audio","input_audio":{"data":"UklGRg==","format":"wav"}}]` + req := &ChatCompletionsRequest{ + Model: "gemini-3-flash-preview", + Messages: []ChatMessage{ + {Role: "user", Content: json.RawMessage(content)}, + }, + } + + resp, err := ChatCompletionsToResponses(req) + require.NoError(t, err) + + var items []ResponsesInputItem + require.NoError(t, json.Unmarshal(resp.Input, &items)) + require.Len(t, items, 1) + + var parts []map[string]any + require.NoError(t, json.Unmarshal(items[0].Content, &parts)) + require.Len(t, parts, 2) + assert.Equal(t, "input_text", parts[0]["type"]) + assert.Equal(t, "Transcribe this", parts[0]["text"]) + assert.Equal(t, "input_audio", parts[1]["type"]) + inputAudio, ok := parts[1]["input_audio"].(map[string]any) + require.True(t, ok) + assert.Equal(t, "UklGRg==", inputAudio["data"]) + assert.Equal(t, "wav", inputAudio["format"]) +} + +func TestChatCompletionsToResponses_InvalidInputAudioFormat(t *testing.T) { + content := `[{"type":"input_audio","input_audio":{"data":"UklGRg==","format":"webm"}}]` + req := &ChatCompletionsRequest{ + Model: "gemini-3-flash-preview", + Messages: []ChatMessage{ + {Role: "user", Content: json.RawMessage(content)}, + }, + } + + _, err := ChatCompletionsToResponses(req) + require.Error(t, err) + assert.Contains(t, err.Error(), "unsupported input_audio format") +} + +func TestChatCompletionsToResponses_EmptyInputAudioSkippedWhenTextExists(t *testing.T) { + content := `[{"type":"text","text":"Keep me"},{"type":"input_audio","input_audio":{"data":"","format":"wav"}}]` + req := &ChatCompletionsRequest{ + Model: "gemini-3-flash-preview", + Messages: []ChatMessage{ + {Role: "user", Content: json.RawMessage(content)}, + }, + } + + resp, err := ChatCompletionsToResponses(req) + require.NoError(t, err) + + var items []ResponsesInputItem + require.NoError(t, json.Unmarshal(resp.Input, &items)) + require.Len(t, items, 1) + + var parts []ResponsesContentPart + require.NoError(t, json.Unmarshal(items[0].Content, &parts)) + require.Len(t, parts, 1) + assert.Equal(t, "input_text", parts[0].Type) + assert.Equal(t, "Keep me", parts[0].Text) +} + func TestChatCompletionsToResponses_EmptyBase64ImageURLSkipped(t *testing.T) { content := `[{"type":"text","text":"Describe this"},{"type":"image_url","image_url":{"url":"data:image/png;base64,"}}]` req := &ChatCompletionsRequest{ diff --git a/backend/internal/pkg/apicompat/chatcompletions_to_responses.go b/backend/internal/pkg/apicompat/chatcompletions_to_responses.go index 463bdd0d15d..5e4f8f8789a 100644 --- a/backend/internal/pkg/apicompat/chatcompletions_to_responses.go +++ b/backend/internal/pkg/apicompat/chatcompletions_to_responses.go @@ -342,7 +342,10 @@ func marshalChatInputContent(content chatMessageContent) (json.RawMessage, error if content.Text != nil { return json.Marshal(*content.Text) } - parts := convertChatContentPartsToResponses(content.Parts) + parts, err := convertChatContentPartsToResponses(content.Parts) + if err != nil { + return nil, err + } if len(parts) == 0 { // A nil slice marshals to JSON null, which the upstream Responses API // rejects ("expected an array of objects or string, but got null"). @@ -352,7 +355,7 @@ func marshalChatInputContent(content chatMessageContent) (json.RawMessage, error return json.Marshal(parts) } -func convertChatContentPartsToResponses(parts []ChatContentPart) []ResponsesContentPart { +func convertChatContentPartsToResponses(parts []ChatContentPart) ([]ResponsesContentPart, error) { var responseParts []ResponsesContentPart for _, p := range parts { switch p.Type { @@ -370,9 +373,45 @@ func convertChatContentPartsToResponses(parts []ChatContentPart) []ResponsesCont ImageURL: p.ImageURL.URL, }) } + case "input_audio": + if p.InputAudio == nil || strings.TrimSpace(p.InputAudio.Data) == "" { + continue + } + format := strings.ToLower(strings.TrimSpace(p.InputAudio.Format)) + if _, ok := OpenAIInputAudioFormatToMIMEType(format); !ok { + return nil, fmt.Errorf("unsupported input_audio format %q", p.InputAudio.Format) + } + responseParts = append(responseParts, ResponsesContentPart{ + Type: "input_audio", + InputAudio: &ResponsesInputAudio{ + Data: p.InputAudio.Data, + Format: format, + }, + }) } } - return responseParts + return responseParts, nil +} + +// OpenAIInputAudioFormatToMIMEType maps OpenAI-compatible input_audio format +// values to MIME types accepted by Gemini inlineData. +func OpenAIInputAudioFormatToMIMEType(format string) (string, bool) { + switch strings.ToLower(strings.TrimSpace(format)) { + case "wav": + return "audio/wav", true + case "mp3": + return "audio/mpeg", true + case "m4a": + return "audio/mp4", true + case "aac": + return "audio/aac", true + case "flac": + return "audio/flac", true + case "ogg": + return "audio/ogg", true + default: + return "", false + } } func isEmptyBase64DataURI(raw string) bool { diff --git a/backend/internal/pkg/apicompat/responses_to_anthropic_request.go b/backend/internal/pkg/apicompat/responses_to_anthropic_request.go index 8fa652f2bd1..1c627d2acec 100644 --- a/backend/internal/pkg/apicompat/responses_to_anthropic_request.go +++ b/backend/internal/pkg/apicompat/responses_to_anthropic_request.go @@ -259,6 +259,22 @@ func convertResponsesUserToAnthropicContent(raw json.RawMessage) (json.RawMessag Source: src, }) } + case "input_audio": + if p.InputAudio == nil || strings.TrimSpace(p.InputAudio.Data) == "" { + continue + } + mediaType, ok := OpenAIInputAudioFormatToMIMEType(p.InputAudio.Format) + if !ok { + return nil, fmt.Errorf("unsupported input_audio format %q", p.InputAudio.Format) + } + blocks = append(blocks, AnthropicContentBlock{ + Type: "input_audio", + Source: &AnthropicImageSource{ + Type: "base64", + MediaType: mediaType, + Data: p.InputAudio.Data, + }, + }) } } diff --git a/backend/internal/pkg/apicompat/types.go b/backend/internal/pkg/apicompat/types.go index b4451f235bb..3a1bd53fa22 100644 --- a/backend/internal/pkg/apicompat/types.go +++ b/backend/internal/pkg/apicompat/types.go @@ -241,9 +241,17 @@ type ResponsesInputItem struct { // ResponsesContentPart is a typed content part in a Responses message. type ResponsesContentPart struct { - Type string `json:"type"` // "input_text" | "output_text" | "input_image" - Text string `json:"text,omitempty"` - ImageURL string `json:"image_url,omitempty"` // data URI for input_image + Type string `json:"type"` // "input_text" | "output_text" | "input_image" | "input_audio" + Text string `json:"text,omitempty"` + ImageURL string `json:"image_url,omitempty"` // data URI for input_image + InputAudio *ResponsesInputAudio `json:"input_audio,omitempty"` +} + +// ResponsesInputAudio contains base64-encoded audio data in OpenAI-compatible +// shape. Format is the client-facing extension such as "wav" or "mp3". +type ResponsesInputAudio struct { + Data string `json:"data"` + Format string `json:"format"` } // ResponsesTool describes a tool in the Responses API. @@ -460,9 +468,10 @@ type ChatMessage struct { // ChatContentPart is a typed content part in a multi-modal message. type ChatContentPart struct { - Type string `json:"type"` // "text" | "image_url" - Text string `json:"text,omitempty"` - ImageURL *ChatImageURL `json:"image_url,omitempty"` + Type string `json:"type"` // "text" | "image_url" | "input_audio" + Text string `json:"text,omitempty"` + ImageURL *ChatImageURL `json:"image_url,omitempty"` + InputAudio *ChatInputAudio `json:"input_audio,omitempty"` } // ChatImageURL contains the URL for an image content part. @@ -471,6 +480,13 @@ type ChatImageURL struct { Detail string `json:"detail,omitempty"` // "auto" | "low" | "high" } +// ChatInputAudio contains base64-encoded audio data in OpenAI Chat Completions +// format. +type ChatInputAudio struct { + Data string `json:"data"` + Format string `json:"format"` +} + // ChatTool describes a tool available to the model. type ChatTool struct { Type string `json:"type"` // "function" diff --git a/backend/internal/server/routes/gateway.go b/backend/internal/server/routes/gateway.go index b039a6ecd48..318f253e37b 100644 --- a/backend/internal/server/routes/gateway.go +++ b/backend/internal/server/routes/gateway.go @@ -145,6 +145,25 @@ func RegisterGatewayRoutes( gemini.POST("/models/*modelAction", h.Gateway.GeminiV1BetaModels) } + // Gemini OpenAI-compatible API surface. + geminiOpenAI := r.Group("/v1beta/openai") + geminiOpenAI.Use(bodyLimit) + geminiOpenAI.Use(clientRequestID) + geminiOpenAI.Use(opsErrorLogger) + geminiOpenAI.Use(endpointNorm) + geminiOpenAI.Use(middleware.APIKeyAuthWithSubscriptionGoogle(apiKeyService, subscriptionService, cfg)) + geminiOpenAI.Use(requireGroupGoogle) + geminiOpenAI.Use(requireGeminiOpenAICompatibleGroup) + { + geminiOpenAI.GET("/models", h.Gateway.GeminiOpenAICompatibleModels) + geminiOpenAI.GET("/models/:model", h.Gateway.GeminiOpenAICompatibleGetModel) + geminiOpenAI.POST("/chat/completions", h.Gateway.ChatCompletions) + geminiOpenAI.POST("/embeddings", h.Gateway.GeminiOpenAICompatibleEmbeddings) + geminiOpenAI.POST("/images/generations", h.Gateway.GeminiOpenAICompatibleImagesGenerations) + geminiOpenAI.POST("/videos", h.Gateway.GeminiOpenAICompatibleUnsupported) + geminiOpenAI.GET("/videos/:id", h.Gateway.GeminiOpenAICompatibleUnsupported) + } + // OpenAI Responses API(不带v1前缀的别名)— auto-route based on group platform responsesHandler := func(c *gin.Context) { if getGroupPlatform(c) == service.PlatformOpenAI { @@ -254,3 +273,18 @@ func getGroupPlatform(c *gin.Context) string { } return apiKey.Group.Platform } + +func requireGeminiOpenAICompatibleGroup(c *gin.Context) { + apiKey, ok := middleware.GetAPIKeyFromContext(c) + if !ok || apiKey == nil || apiKey.Group == nil || apiKey.Group.Platform != service.PlatformGemini { + c.JSON(http.StatusBadRequest, gin.H{ + "error": gin.H{ + "type": "invalid_request_error", + "message": "The /v1beta/openai compatibility endpoint requires a Gemini group", + }, + }) + c.Abort() + return + } + c.Next() +} diff --git a/backend/internal/server/routes/gateway_test.go b/backend/internal/server/routes/gateway_test.go index 19ef568600c..21f0abd6899 100644 --- a/backend/internal/server/routes/gateway_test.go +++ b/backend/internal/server/routes/gateway_test.go @@ -77,3 +77,50 @@ func TestGatewayRoutesOpenAIImagesPathsAreRegistered(t *testing.T) { require.NotEqual(t, http.StatusNotFound, w.Code, "path=%s should hit OpenAI images handler", path) } } + +func TestGatewayRoutesGeminiOpenAICompatiblePathsAreRegistered(t *testing.T) { + router := newGatewayRoutesTestRouter() + + routes := map[string]bool{} + for _, route := range router.Routes() { + routes[route.Method+" "+route.Path] = true + } + + required := []string{ + http.MethodGet + " /v1beta/openai/models", + http.MethodGet + " /v1beta/openai/models/:model", + http.MethodPost + " /v1beta/openai/chat/completions", + http.MethodPost + " /v1beta/openai/embeddings", + http.MethodPost + " /v1beta/openai/images/generations", + http.MethodPost + " /v1beta/openai/videos", + http.MethodGet + " /v1beta/openai/videos/:id", + } + for _, key := range required { + require.True(t, routes[key], "route %s should be registered", key) + } +} + +func TestRequireGeminiOpenAICompatibleGroupRejectsNonGemini(t *testing.T) { + gin.SetMode(gin.TestMode) + router := gin.New() + router.Use(func(c *gin.Context) { + groupID := int64(1) + c.Set(string(servermiddleware.ContextKeyAPIKey), &service.APIKey{ + GroupID: &groupID, + Group: &service.Group{Platform: service.PlatformOpenAI}, + }) + c.Next() + }) + router.Use(requireGeminiOpenAICompatibleGroup) + router.POST("/v1beta/openai/chat/completions", func(c *gin.Context) { + c.JSON(http.StatusOK, gin.H{"ok": true}) + }) + + req := httptest.NewRequest(http.MethodPost, "/v1beta/openai/chat/completions", strings.NewReader(`{"model":"gpt-test"}`)) + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + require.Equal(t, http.StatusBadRequest, w.Code) + require.Contains(t, w.Body.String(), "requires a Gemini group") +} diff --git a/backend/internal/service/gemini_messages_compat_service.go b/backend/internal/service/gemini_messages_compat_service.go index 64f19b2ec75..0545c2d4afe 100644 --- a/backend/internal/service/gemini_messages_compat_service.go +++ b/backend/internal/service/gemini_messages_compat_service.go @@ -3212,6 +3212,21 @@ func convertClaudeMessagesToGeminiContents(messages any, toolUseIDToName map[str } } } + case "input_audio", "audio": + if src, ok := bm["source"].(map[string]any); ok { + if srcType, _ := src["type"].(string); srcType == "base64" { + mediaType, _ := src["media_type"].(string) + data, _ := src["data"].(string) + if mediaType != "" && data != "" { + parts = append(parts, map[string]any{ + "inlineData": map[string]any{ + "mimeType": mediaType, + "data": data, + }, + }) + } + } + } default: // best-effort: preserve unknown blocks as text if b, err := json.Marshal(bm); err == nil { diff --git a/backend/internal/service/gemini_messages_compat_service_test.go b/backend/internal/service/gemini_messages_compat_service_test.go index 79db633aae0..76458dd0dee 100644 --- a/backend/internal/service/gemini_messages_compat_service_test.go +++ b/backend/internal/service/gemini_messages_compat_service_test.go @@ -170,6 +170,97 @@ func TestGeminiForwardAsChatCompletions_StreamsOpenAIChunksFromGeminiSSE(t *test require.Contains(t, out, "data: [DONE]") } +func TestGeminiForwardAsChatCompletions_InputAudioSentAsInlineData(t *testing.T) { + gin.SetMode(gin.TestMode) + + httpStub := &geminiCompatHTTPUpstreamStub{ + response: &http.Response{ + StatusCode: http.StatusOK, + Header: http.Header{"Content-Type": []string{"application/json"}}, + Body: io.NopCloser(strings.NewReader(`{ + "candidates":[{"content":{"parts":[{"text":"heard it"}]},"finishReason":"STOP"}], + "usageMetadata":{"promptTokenCount":4,"candidatesTokenCount":2} + }`)), + }, + } + svc := &GeminiMessagesCompatService{ + httpUpstream: httpStub, + cfg: &config.Config{}, + } + account := &Account{ + ID: 103, + Platform: PlatformGemini, + Type: AccountTypeAPIKey, + Credentials: map[string]any{ + "api_key": "gemini-api-key", + }, + Concurrency: 1, + } + + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + body := []byte(`{"model":"gemini-3-flash-preview","messages":[{"role":"user","content":[{"type":"text","text":"classify"},{"type":"input_audio","input_audio":{"data":"UklGRg==","format":"wav"}}]}]}`) + c.Request = httptest.NewRequest(http.MethodPost, "/v1beta/openai/chat/completions", bytes.NewReader(body)) + + result, err := svc.ForwardAsChatCompletions(context.Background(), c, account, body) + require.NoError(t, err) + require.NotNil(t, result) + require.Equal(t, http.StatusOK, rec.Code) + + require.NotNil(t, httpStub.lastReq) + sentBody, err := io.ReadAll(httpStub.lastReq.Body) + require.NoError(t, err) + var sent map[string]any + require.NoError(t, json.Unmarshal(sentBody, &sent)) + contents, ok := sent["contents"].([]any) + require.True(t, ok) + require.Len(t, contents, 1) + firstContent, ok := contents[0].(map[string]any) + require.True(t, ok) + parts, ok := firstContent["parts"].([]any) + require.True(t, ok) + require.Len(t, parts, 2) + textPart, ok := parts[0].(map[string]any) + require.True(t, ok) + require.Equal(t, "classify", textPart["text"]) + audioPart, ok := parts[1].(map[string]any) + require.True(t, ok) + inlineData, ok := audioPart["inlineData"].(map[string]any) + require.True(t, ok) + require.Equal(t, "audio/wav", inlineData["mimeType"]) + require.Equal(t, "UklGRg==", inlineData["data"]) +} + +func TestConvertClaudeMessagesToGeminiGenerateContent_InputAudio(t *testing.T) { + body := []byte(`{ + "model":"gemini-3-flash-preview", + "messages":[{"role":"user","content":[ + {"type":"text","text":"classify"}, + {"type":"input_audio","source":{"type":"base64","media_type":"audio/wav","data":"UklGRg=="}} + ]}] + }`) + + got, err := convertClaudeMessagesToGeminiGenerateContent(body) + require.NoError(t, err) + + var sent map[string]any + require.NoError(t, json.Unmarshal(got, &sent)) + contents, ok := sent["contents"].([]any) + require.True(t, ok) + require.Len(t, contents, 1) + firstContent, ok := contents[0].(map[string]any) + require.True(t, ok) + parts, ok := firstContent["parts"].([]any) + require.True(t, ok) + require.Len(t, parts, 2) + audioPart, ok := parts[1].(map[string]any) + require.True(t, ok) + inlineData, ok := audioPart["inlineData"].(map[string]any) + require.True(t, ok) + require.Equal(t, "audio/wav", inlineData["mimeType"]) + require.Equal(t, "UklGRg==", inlineData["data"]) +} + // TestConvertClaudeToolsToGeminiTools_CustomType 测试custom类型工具转换 func TestConvertClaudeToolsToGeminiTools_CustomType(t *testing.T) { tests := []struct { diff --git a/backend/internal/service/gemini_openai_embeddings.go b/backend/internal/service/gemini_openai_embeddings.go new file mode 100644 index 00000000000..c444868df84 --- /dev/null +++ b/backend/internal/service/gemini_openai_embeddings.go @@ -0,0 +1,352 @@ +package service + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "strings" + "time" + + "github.com/Wei-Shaw/sub2api/internal/pkg/geminicli" + "github.com/gin-gonic/gin" +) + +type geminiOpenAIEmbeddingsRequest struct { + Model string `json:"model"` + Input json.RawMessage `json:"input"` +} + +type geminiOpenAIEmbeddingsResponse struct { + Object string `json:"object"` + Data []geminiOpenAIEmbedding `json:"data"` + Model string `json:"model"` + Usage geminiOpenAIEmbeddingsUsage `json:"usage"` +} + +type geminiOpenAIEmbedding struct { + Object string `json:"object"` + Embedding []float64 `json:"embedding"` + Index int `json:"index"` +} + +type geminiOpenAIEmbeddingsUsage struct { + PromptTokens int `json:"prompt_tokens"` + TotalTokens int `json:"total_tokens"` +} + +type geminiEmbeddingAPIResponse struct { + Embedding *struct { + Values []float64 `json:"values"` + } `json:"embedding,omitempty"` + Embeddings []struct { + Values []float64 `json:"values"` + } `json:"embeddings,omitempty"` + UsageMetadata struct { + PromptTokenCount int `json:"promptTokenCount"` + } `json:"usageMetadata,omitempty"` +} + +// ForwardOpenAICompatibleEmbeddings serves /v1beta/openai/embeddings through +// Gemini native embedContent and batchEmbedContents while returning OpenAI shape. +func (s *GeminiMessagesCompatService) ForwardOpenAICompatibleEmbeddings( + ctx context.Context, + c *gin.Context, + account *Account, + body []byte, +) (*ForwardResult, error) { + startTime := time.Now() + + var req geminiOpenAIEmbeddingsRequest + if err := json.Unmarshal(body, &req); err != nil { + writeGeminiOpenAIEmbeddingsError(c, http.StatusBadRequest, "invalid_request_error", "Failed to parse request body") + return nil, err + } + originalModel := strings.TrimSpace(req.Model) + if originalModel == "" { + writeGeminiOpenAIEmbeddingsError(c, http.StatusBadRequest, "invalid_request_error", "model is required") + return nil, errors.New("model is required") + } + inputs, err := parseGeminiOpenAIEmbeddingInputs(req.Input) + if err != nil { + writeGeminiOpenAIEmbeddingsError(c, http.StatusBadRequest, "invalid_request_error", err.Error()) + return nil, err + } + + mappedModel := originalModel + if account.Type == AccountTypeAPIKey || account.Type == AccountTypeServiceAccount { + mappedModel = account.GetMappedModel(originalModel) + } + batch := len(inputs) > 1 + var upstreamBody []byte + if batch { + upstreamBody = buildGeminiBatchEmbedContentsRequest(mappedModel, inputs) + } else { + upstreamBody = buildGeminiEmbedContentRequest(inputs[0]) + } + + upstreamReq, requestIDHeader, err := s.buildGeminiOpenAIEmbeddingsRequest(ctx, account, mappedModel, batch, upstreamBody) + if err != nil { + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return nil, err + } + writeGeminiOpenAIEmbeddingsError(c, http.StatusBadGateway, "upstream_error", err.Error()) + return nil, err + } + + proxyURL := "" + if account.ProxyID != nil && account.Proxy != nil { + proxyURL = account.Proxy.URL() + } + resp, err := s.httpUpstream.Do(upstreamReq, proxyURL, account.ID, account.Concurrency) + if err != nil { + safeErr := sanitizeUpstreamErrorMessage(err.Error()) + setOpsUpstreamError(c, 0, safeErr, "") + appendOpsUpstreamError(c, OpsUpstreamErrorEvent{ + Platform: account.Platform, + AccountID: account.ID, + AccountName: account.Name, + UpstreamStatusCode: 0, + Kind: "request_error", + Message: safeErr, + }) + writeGeminiOpenAIEmbeddingsError(c, http.StatusBadGateway, "upstream_error", "Upstream request failed") + return nil, fmt.Errorf("upstream request failed: %s", safeErr) + } + defer func() { _ = resp.Body.Close() }() + + requestID := resp.Header.Get(requestIDHeader) + if requestID == "" { + requestID = resp.Header.Get("x-goog-request-id") + } + if requestID != "" { + c.Header("x-request-id", requestID) + } + + if resp.StatusCode >= 400 { + respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 2<<20)) + s.handleGeminiUpstreamError(ctx, account, resp.StatusCode, resp.Header, respBody) + upstreamMsg := sanitizeUpstreamErrorMessage(strings.TrimSpace(extractUpstreamErrorMessage(respBody))) + if s.shouldFailoverGeminiUpstreamError(resp.StatusCode) { + appendOpsUpstreamError(c, OpsUpstreamErrorEvent{ + Platform: account.Platform, + AccountID: account.ID, + AccountName: account.Name, + UpstreamStatusCode: resp.StatusCode, + UpstreamRequestID: requestID, + Kind: "failover", + Message: upstreamMsg, + }) + return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: respBody} + } + if upstreamMsg == "" { + upstreamMsg = fmt.Sprintf("Gemini upstream error: %d", resp.StatusCode) + } + setOpsUpstreamError(c, resp.StatusCode, upstreamMsg, "") + writeGeminiOpenAIEmbeddingsError(c, resp.StatusCode, "upstream_error", upstreamMsg) + return nil, fmt.Errorf("gemini upstream error: %d message=%s", resp.StatusCode, upstreamMsg) + } + + respBody, err := ReadUpstreamResponseBody(resp.Body, s.cfg, c, openAITooLargeError) + if err != nil { + if !errors.Is(err, ErrUpstreamResponseBodyTooLarge) { + writeGeminiOpenAIEmbeddingsError(c, http.StatusBadGateway, "api_error", "Failed to read upstream response") + } + return nil, fmt.Errorf("read upstream body: %w", err) + } + + openAIBody, usage, err := convertGeminiEmbeddingResponseToOpenAI(respBody, originalModel, len(inputs)) + if err != nil { + writeGeminiOpenAIEmbeddingsError(c, http.StatusBadGateway, "api_error", "Failed to parse upstream response") + return nil, err + } + + if !c.Writer.Written() { + c.Data(http.StatusOK, "application/json", openAIBody) + } + + return &ForwardResult{ + RequestID: requestID, + Usage: usage, + Model: originalModel, + UpstreamModel: mappedModel, + Stream: false, + Duration: time.Since(startTime), + }, nil +} + +func (s *GeminiMessagesCompatService) buildGeminiOpenAIEmbeddingsRequest( + ctx context.Context, + account *Account, + model string, + batch bool, + body []byte, +) (*http.Request, string, error) { + baseURL := account.GetGeminiBaseURL(geminicli.AIStudioBaseURL) + normalizedBaseURL, err := s.validateUpstreamBaseURL(baseURL) + if err != nil { + return nil, "", err + } + targetURL := geminiEmbeddingURL(normalizedBaseURL, model, batch) + + upstreamCtx, releaseUpstreamCtx := detachUpstreamContext(ctx) + upstreamReq, err := http.NewRequestWithContext(upstreamCtx, http.MethodPost, targetURL, bytes.NewReader(body)) + releaseUpstreamCtx() + if err != nil { + return nil, "", err + } + upstreamReq.Header.Set("Content-Type", "application/json") + upstreamReq.Header.Set("Accept", "application/json") + + switch account.Type { + case AccountTypeAPIKey: + apiKey := strings.TrimSpace(account.GetCredential("api_key")) + if apiKey == "" { + return nil, "", errors.New("gemini api_key not configured") + } + upstreamReq.Header.Set("x-goog-api-key", apiKey) + case AccountTypeOAuth, AccountTypeServiceAccount: + if s.tokenProvider == nil { + return nil, "", errors.New("gemini token provider not configured") + } + accessToken, err := s.tokenProvider.GetAccessToken(ctx, account) + if err != nil { + return nil, "", err + } + upstreamReq.Header.Set("Authorization", "Bearer "+accessToken) + default: + return nil, "", fmt.Errorf("unsupported account type: %s", account.Type) + } + + return upstreamReq, "x-request-id", nil +} + +func parseGeminiOpenAIEmbeddingInputs(raw json.RawMessage) ([]string, error) { + raw = bytes.TrimSpace(raw) + if len(raw) == 0 || bytes.Equal(raw, []byte("null")) { + return nil, errors.New("input is required") + } + + var single string + if err := json.Unmarshal(raw, &single); err == nil { + if strings.TrimSpace(single) == "" { + return nil, errors.New("input cannot be empty") + } + return []string{single}, nil + } + + var many []string + if err := json.Unmarshal(raw, &many); err == nil { + if len(many) == 0 { + return nil, errors.New("input cannot be empty") + } + for _, input := range many { + if strings.TrimSpace(input) == "" { + return nil, errors.New("input cannot contain empty strings") + } + } + return many, nil + } + + var arr []json.RawMessage + if err := json.Unmarshal(raw, &arr); err == nil { + return nil, errors.New("token array inputs are not supported") + } + return nil, errors.New("input must be a string or array of strings") +} + +func buildGeminiEmbedContentRequest(input string) []byte { + body, _ := json.Marshal(map[string]any{ + "content": geminiEmbeddingContent(input), + }) + return body +} + +func buildGeminiBatchEmbedContentsRequest(model string, inputs []string) []byte { + requests := make([]map[string]any, 0, len(inputs)) + for _, input := range inputs { + requests = append(requests, map[string]any{ + "model": geminiEmbeddingBodyModel(model), + "content": geminiEmbeddingContent(input), + }) + } + body, _ := json.Marshal(map[string]any{"requests": requests}) + return body +} + +func geminiEmbeddingContent(input string) map[string]any { + return map[string]any{ + "parts": []map[string]string{{"text": input}}, + } +} + +func geminiEmbeddingBodyModel(model string) string { + model = strings.TrimSpace(model) + model = strings.TrimPrefix(model, "models/") + return "models/" + model +} + +func geminiEmbeddingURL(baseURL string, model string, batch bool) string { + action := "embedContent" + if batch { + action = "batchEmbedContents" + } + model = strings.TrimSpace(model) + model = strings.TrimPrefix(model, "models/") + return fmt.Sprintf("%s/v1beta/models/%s:%s", strings.TrimRight(baseURL, "/"), model, action) +} + +func convertGeminiEmbeddingResponseToOpenAI(body []byte, model string, inputCount int) ([]byte, ClaudeUsage, error) { + var parsed geminiEmbeddingAPIResponse + if err := json.Unmarshal(body, &parsed); err != nil { + return nil, ClaudeUsage{}, err + } + + vectors := make([][]float64, 0, inputCount) + if parsed.Embedding != nil { + vectors = append(vectors, parsed.Embedding.Values) + } + for _, embedding := range parsed.Embeddings { + vectors = append(vectors, embedding.Values) + } + if len(vectors) == 0 { + return nil, ClaudeUsage{}, errors.New("gemini embedding response did not include embeddings") + } + + data := make([]geminiOpenAIEmbedding, 0, len(vectors)) + for i, values := range vectors { + data = append(data, geminiOpenAIEmbedding{ + Object: "embedding", + Embedding: values, + Index: i, + }) + } + + promptTokens := parsed.UsageMetadata.PromptTokenCount + out := geminiOpenAIEmbeddingsResponse{ + Object: "list", + Data: data, + Model: model, + Usage: geminiOpenAIEmbeddingsUsage{ + PromptTokens: promptTokens, + TotalTokens: promptTokens, + }, + } + outBody, err := json.Marshal(out) + if err != nil { + return nil, ClaudeUsage{}, err + } + return outBody, ClaudeUsage{InputTokens: promptTokens}, nil +} + +func writeGeminiOpenAIEmbeddingsError(c *gin.Context, statusCode int, errType string, message string) { + c.JSON(statusCode, gin.H{ + "error": gin.H{ + "type": errType, + "message": message, + }, + }) +} diff --git a/backend/internal/service/gemini_openai_embeddings_test.go b/backend/internal/service/gemini_openai_embeddings_test.go new file mode 100644 index 00000000000..247c8c8ae17 --- /dev/null +++ b/backend/internal/service/gemini_openai_embeddings_test.go @@ -0,0 +1,184 @@ +package service + +import ( + "bytes" + "context" + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/Wei-Shaw/sub2api/internal/config" + "github.com/gin-gonic/gin" + "github.com/stretchr/testify/require" +) + +func TestGeminiForwardOpenAICompatibleEmbeddings_SingleInputUsesEmbedContent(t *testing.T) { + gin.SetMode(gin.TestMode) + + httpStub := &geminiCompatHTTPUpstreamStub{ + response: &http.Response{ + StatusCode: http.StatusOK, + Header: http.Header{ + "Content-Type": []string{"application/json"}, + "X-Request-Id": []string{"gemini-req-1"}, + }, + Body: io.NopCloser(strings.NewReader(`{ + "embedding":{"values":[0.1,0.2,0.3]}, + "usageMetadata":{"promptTokenCount":7} + }`)), + }, + } + svc := &GeminiMessagesCompatService{ + httpUpstream: httpStub, + cfg: &config.Config{}, + } + account := &Account{ + ID: 201, + Platform: PlatformGemini, + Type: AccountTypeAPIKey, + Credentials: map[string]any{ + "api_key": "gemini-api-key", + }, + Concurrency: 1, + } + + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + body := []byte(`{"model":"gemini-embedding-2-preview","input":"hello"}`) + c.Request = httptest.NewRequest(http.MethodPost, "/v1beta/openai/embeddings", bytes.NewReader(body)) + + result, err := svc.ForwardOpenAICompatibleEmbeddings(context.Background(), c, account, body) + require.NoError(t, err) + require.NotNil(t, result) + require.Equal(t, 7, result.Usage.InputTokens) + require.Equal(t, "gemini-embedding-2-preview", result.Model) + require.Equal(t, "gemini-req-1", result.RequestID) + + require.NotNil(t, httpStub.lastReq) + require.Contains(t, httpStub.lastReq.URL.String(), "/v1beta/models/gemini-embedding-2-preview:embedContent") + require.Equal(t, "gemini-api-key", httpStub.lastReq.Header.Get("x-goog-api-key")) + require.Empty(t, httpStub.lastReq.Header.Get("Authorization")) + + sentBody, err := io.ReadAll(httpStub.lastReq.Body) + require.NoError(t, err) + var sent map[string]any + require.NoError(t, json.Unmarshal(sentBody, &sent)) + content, ok := sent["content"].(map[string]any) + require.True(t, ok) + parts, ok := content["parts"].([]any) + require.True(t, ok) + require.Len(t, parts, 1) + part, ok := parts[0].(map[string]any) + require.True(t, ok) + require.Equal(t, "hello", part["text"]) + + require.Equal(t, http.StatusOK, rec.Code) + var got map[string]any + require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &got)) + require.Equal(t, "list", got["object"]) + data, ok := got["data"].([]any) + require.True(t, ok) + require.Len(t, data, 1) + first, ok := data[0].(map[string]any) + require.True(t, ok) + require.Equal(t, "embedding", first["object"]) + require.Equal(t, float64(0), first["index"]) + require.Equal(t, "gemini-embedding-2-preview", got["model"]) + usage, ok := got["usage"].(map[string]any) + require.True(t, ok) + require.Equal(t, float64(7), usage["prompt_tokens"]) + require.Equal(t, float64(7), usage["total_tokens"]) +} + +func TestGeminiForwardOpenAICompatibleEmbeddings_BatchInputUsesBatchEmbedContents(t *testing.T) { + gin.SetMode(gin.TestMode) + + httpStub := &geminiCompatHTTPUpstreamStub{ + response: &http.Response{ + StatusCode: http.StatusOK, + Header: http.Header{"Content-Type": []string{"application/json"}}, + Body: io.NopCloser(strings.NewReader(`{ + "embeddings":[{"values":[1,2]},{"values":[3,4]}], + "usageMetadata":{"promptTokenCount":11} + }`)), + }, + } + svc := &GeminiMessagesCompatService{ + httpUpstream: httpStub, + cfg: &config.Config{}, + } + account := &Account{ + ID: 202, + Platform: PlatformGemini, + Type: AccountTypeAPIKey, + Credentials: map[string]any{ + "api_key": "gemini-api-key", + }, + Concurrency: 1, + } + + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + body := []byte(`{"model":"gemini-embedding-2-preview","input":["hello","world"]}`) + c.Request = httptest.NewRequest(http.MethodPost, "/v1beta/openai/embeddings", bytes.NewReader(body)) + + result, err := svc.ForwardOpenAICompatibleEmbeddings(context.Background(), c, account, body) + require.NoError(t, err) + require.NotNil(t, result) + require.Equal(t, 11, result.Usage.InputTokens) + + require.NotNil(t, httpStub.lastReq) + require.Contains(t, httpStub.lastReq.URL.String(), "/v1beta/models/gemini-embedding-2-preview:batchEmbedContents") + + sentBody, err := io.ReadAll(httpStub.lastReq.Body) + require.NoError(t, err) + var sent map[string]any + require.NoError(t, json.Unmarshal(sentBody, &sent)) + requests, ok := sent["requests"].([]any) + require.True(t, ok) + require.Len(t, requests, 2) + + var got map[string]any + require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &got)) + data, ok := got["data"].([]any) + require.True(t, ok) + require.Len(t, data, 2) + second, ok := data[1].(map[string]any) + require.True(t, ok) + require.Equal(t, float64(1), second["index"]) + embedding, ok := second["embedding"].([]any) + require.True(t, ok) + require.Equal(t, []any{float64(3), float64(4)}, embedding) +} + +func TestGeminiForwardOpenAICompatibleEmbeddings_RejectsTokenArrayInput(t *testing.T) { + gin.SetMode(gin.TestMode) + + svc := &GeminiMessagesCompatService{ + httpUpstream: &geminiCompatHTTPUpstreamStub{}, + cfg: &config.Config{}, + } + account := &Account{ + ID: 203, + Platform: PlatformGemini, + Type: AccountTypeAPIKey, + Credentials: map[string]any{ + "api_key": "gemini-api-key", + }, + Concurrency: 1, + } + + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + body := []byte(`{"model":"gemini-embedding-2-preview","input":[1,2,3]}`) + c.Request = httptest.NewRequest(http.MethodPost, "/v1beta/openai/embeddings", bytes.NewReader(body)) + + result, err := svc.ForwardOpenAICompatibleEmbeddings(context.Background(), c, account, body) + require.Error(t, err) + require.Nil(t, result) + require.Equal(t, http.StatusBadRequest, rec.Code) + require.Contains(t, rec.Body.String(), "token array inputs are not supported") +} diff --git a/backend/internal/service/gemini_openai_images.go b/backend/internal/service/gemini_openai_images.go new file mode 100644 index 00000000000..280871f8433 --- /dev/null +++ b/backend/internal/service/gemini_openai_images.go @@ -0,0 +1,375 @@ +package service + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "strings" + "time" + + "github.com/Wei-Shaw/sub2api/internal/pkg/geminicli" + "github.com/gin-gonic/gin" +) + +type geminiOpenAIImageGenerationRequest struct { + Model string `json:"model"` + Prompt string `json:"prompt"` + N *int `json:"n,omitempty"` + Size string `json:"size,omitempty"` + ResponseFormat string `json:"response_format,omitempty"` +} + +type geminiOpenAIImagesResponse struct { + Created int64 `json:"created"` + Data []geminiOpenAIImagesDataObject `json:"data"` +} + +type geminiOpenAIImagesDataObject struct { + B64JSON string `json:"b64_json"` + RevisedPrompt string `json:"revised_prompt,omitempty"` +} + +// ForwardOpenAICompatibleImagesGenerations serves +// /v1beta/openai/images/generations through Gemini native generateContent. +func (s *GeminiMessagesCompatService) ForwardOpenAICompatibleImagesGenerations( + ctx context.Context, + c *gin.Context, + account *Account, + body []byte, +) (*ForwardResult, error) { + startTime := time.Now() + + req, err := parseGeminiOpenAIImageGenerationRequest(body) + if err != nil { + writeGeminiOpenAIImagesError(c, http.StatusBadRequest, "invalid_request_error", err.Error()) + return nil, err + } + + originalModel := strings.TrimSpace(req.Model) + mappedModel := originalModel + if account.Type == AccountTypeAPIKey || account.Type == AccountTypeServiceAccount { + mappedModel = account.GetMappedModel(originalModel) + } + upstreamBody := buildGeminiImageGenerateContentRequest(req.Prompt, req.Size) + + upstreamReq, requestIDHeader, err := s.buildGeminiOpenAIImagesRequest(ctx, account, mappedModel, upstreamBody) + if err != nil { + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return nil, err + } + writeGeminiOpenAIImagesError(c, http.StatusBadGateway, "upstream_error", err.Error()) + return nil, err + } + + proxyURL := "" + if account.ProxyID != nil && account.Proxy != nil { + proxyURL = account.Proxy.URL() + } + resp, err := s.httpUpstream.Do(upstreamReq, proxyURL, account.ID, account.Concurrency) + if err != nil { + safeErr := sanitizeUpstreamErrorMessage(err.Error()) + setOpsUpstreamError(c, 0, safeErr, "") + appendOpsUpstreamError(c, OpsUpstreamErrorEvent{ + Platform: account.Platform, + AccountID: account.ID, + AccountName: account.Name, + UpstreamStatusCode: 0, + Kind: "request_error", + Message: safeErr, + }) + writeGeminiOpenAIImagesError(c, http.StatusBadGateway, "upstream_error", "Upstream request failed") + return nil, fmt.Errorf("upstream request failed: %s", safeErr) + } + defer func() { _ = resp.Body.Close() }() + + requestID := resp.Header.Get(requestIDHeader) + if requestID == "" { + requestID = resp.Header.Get("x-goog-request-id") + } + if requestID != "" { + c.Header("x-request-id", requestID) + } + + if resp.StatusCode >= 400 { + respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 2<<20)) + s.handleGeminiUpstreamError(ctx, account, resp.StatusCode, resp.Header, respBody) + upstreamMsg := sanitizeUpstreamErrorMessage(strings.TrimSpace(extractUpstreamErrorMessage(respBody))) + if s.shouldFailoverGeminiUpstreamError(resp.StatusCode) { + appendOpsUpstreamError(c, OpsUpstreamErrorEvent{ + Platform: account.Platform, + AccountID: account.ID, + AccountName: account.Name, + UpstreamStatusCode: resp.StatusCode, + UpstreamRequestID: requestID, + Kind: "failover", + Message: upstreamMsg, + }) + return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: respBody} + } + if upstreamMsg == "" { + upstreamMsg = fmt.Sprintf("Gemini upstream error: %d", resp.StatusCode) + } + setOpsUpstreamError(c, resp.StatusCode, upstreamMsg, "") + writeGeminiOpenAIImagesError(c, resp.StatusCode, "upstream_error", upstreamMsg) + return nil, fmt.Errorf("gemini upstream error: %d message=%s", resp.StatusCode, upstreamMsg) + } + + respBody, err := ReadUpstreamResponseBody(resp.Body, s.cfg, c, openAITooLargeError) + if err != nil { + if !errors.Is(err, ErrUpstreamResponseBodyTooLarge) { + writeGeminiOpenAIImagesError(c, http.StatusBadGateway, "api_error", "Failed to read upstream response") + } + return nil, fmt.Errorf("read upstream body: %w", err) + } + + images, revisedPrompt, usage, err := collectGeminiOpenAIImages(respBody) + if err != nil { + writeGeminiOpenAIImagesError(c, http.StatusBadGateway, "api_error", "Failed to parse upstream response") + return nil, err + } + + if !c.Writer.Written() { + c.Data(http.StatusOK, "application/json", buildGeminiOpenAIImagesResponse(time.Now().Unix(), images, revisedPrompt)) + } + + imageInputSize := strings.TrimSpace(req.Size) + imageSize := normalizeOpenAIImageSizeTier(imageInputSize) + return &ForwardResult{ + RequestID: requestID, + Usage: usage, + Model: originalModel, + UpstreamModel: mappedModel, + Stream: false, + Duration: time.Since(startTime), + ImageCount: len(images), + ImageSize: imageSize, + ImageInputSize: imageInputSize, + }, nil +} + +func parseGeminiOpenAIImageGenerationRequest(body []byte) (*geminiOpenAIImageGenerationRequest, error) { + var req geminiOpenAIImageGenerationRequest + if err := json.Unmarshal(body, &req); err != nil { + return nil, errors.New("Failed to parse request body") + } + req.Model = strings.TrimSpace(req.Model) + req.Prompt = strings.TrimSpace(req.Prompt) + req.Size = strings.TrimSpace(req.Size) + req.ResponseFormat = strings.ToLower(strings.TrimSpace(req.ResponseFormat)) + if req.Model == "" { + return nil, errors.New("model is required") + } + if req.Prompt == "" { + return nil, errors.New("prompt is required") + } + if !isImageGenerationModel(req.Model) { + return nil, fmt.Errorf("images/generations requires an image generation model, got %q", req.Model) + } + switch req.ResponseFormat { + case "", "b64_json": + case "url": + return nil, errors.New("response_format=url is not supported") + default: + return nil, fmt.Errorf("unsupported response_format %q", req.ResponseFormat) + } + if req.N != nil { + switch { + case *req.N == 1: + case *req.N > 1: + return nil, errors.New("n greater than 1 is not supported") + default: + return nil, errors.New("n must be 1") + } + } + return &req, nil +} + +func buildGeminiImageGenerateContentRequest(prompt string, size string) []byte { + generationConfig := map[string]any{ + "responseModalities": []string{"TEXT", "IMAGE"}, + } + if strings.TrimSpace(size) != "" { + generationConfig["imageConfig"] = map[string]any{ + "imageSize": normalizeOpenAIImageSizeTier(size), + } + } + body, _ := json.Marshal(map[string]any{ + "contents": []map[string]any{ + { + "role": "user", + "parts": []map[string]string{ + {"text": prompt}, + }, + }, + }, + "generationConfig": generationConfig, + }) + return body +} + +func (s *GeminiMessagesCompatService) buildGeminiOpenAIImagesRequest( + ctx context.Context, + account *Account, + model string, + body []byte, +) (*http.Request, string, error) { + baseURL := account.GetGeminiBaseURL(geminicli.AIStudioBaseURL) + normalizedBaseURL, err := s.validateUpstreamBaseURL(baseURL) + if err != nil { + return nil, "", err + } + targetURL := geminiImageGenerateContentURL(normalizedBaseURL, model) + + upstreamCtx, releaseUpstreamCtx := detachUpstreamContext(ctx) + upstreamReq, err := http.NewRequestWithContext(upstreamCtx, http.MethodPost, targetURL, bytes.NewReader(body)) + releaseUpstreamCtx() + if err != nil { + return nil, "", err + } + upstreamReq.Header.Set("Content-Type", "application/json") + upstreamReq.Header.Set("Accept", "application/json") + + switch account.Type { + case AccountTypeAPIKey: + apiKey := strings.TrimSpace(account.GetCredential("api_key")) + if apiKey == "" { + return nil, "", errors.New("gemini api_key not configured") + } + upstreamReq.Header.Set("x-goog-api-key", apiKey) + case AccountTypeOAuth, AccountTypeServiceAccount: + if s.tokenProvider == nil { + return nil, "", errors.New("gemini token provider not configured") + } + accessToken, err := s.tokenProvider.GetAccessToken(ctx, account) + if err != nil { + return nil, "", err + } + upstreamReq.Header.Set("Authorization", "Bearer "+accessToken) + default: + return nil, "", fmt.Errorf("unsupported account type: %s", account.Type) + } + + return upstreamReq, "x-request-id", nil +} + +func geminiImageGenerateContentURL(baseURL string, model string) string { + model = strings.TrimSpace(model) + model = strings.TrimPrefix(model, "models/") + return fmt.Sprintf("%s/v1beta/models/%s:generateContent", strings.TrimRight(baseURL, "/"), model) +} + +func collectGeminiOpenAIImages(raw []byte) ([]string, string, ClaudeUsage, error) { + var payload map[string]any + if err := json.Unmarshal(raw, &payload); err != nil { + return nil, "", ClaudeUsage{}, err + } + + var images []string + revisedPrompt := "" + if candidates, ok := payload["candidates"].([]any); ok { + for _, candidate := range candidates { + cm, ok := candidate.(map[string]any) + if !ok { + continue + } + content, ok := cm["content"].(map[string]any) + if !ok { + continue + } + parts, ok := content["parts"].([]any) + if !ok { + continue + } + for _, part := range parts { + pm, ok := part.(map[string]any) + if !ok { + continue + } + if revisedPrompt == "" { + if text, _ := pm["text"].(string); strings.TrimSpace(text) != "" { + revisedPrompt = text + } + } + if data := geminiInlineImageData(pm["inlineData"]); data != "" { + images = append(images, data) + continue + } + if data := geminiInlineImageData(pm["inline_data"]); data != "" { + images = append(images, data) + } + } + } + } + if len(images) == 0 { + return nil, revisedPrompt, ClaudeUsage{}, errors.New("gemini image response did not include image data") + } + + usage := ClaudeUsage{} + if usageMetadata, ok := payload["usageMetadata"].(map[string]any); ok { + usage.InputTokens = intNumberFromAny(usageMetadata["promptTokenCount"]) + usage.OutputTokens = intNumberFromAny(usageMetadata["candidatesTokenCount"]) + usage.ImageOutputTokens = intNumberFromAny(usageMetadata["imageOutputTokenCount"]) + } + return images, revisedPrompt, usage, nil +} + +func geminiInlineImageData(v any) string { + inline, ok := v.(map[string]any) + if !ok { + return "" + } + data, _ := inline["data"].(string) + if strings.TrimSpace(data) == "" { + return "" + } + if mimeType, _ := inline["mimeType"].(string); mimeType != "" && !strings.HasPrefix(strings.ToLower(mimeType), "image/") { + return "" + } + if mimeType, _ := inline["mime_type"].(string); mimeType != "" && !strings.HasPrefix(strings.ToLower(mimeType), "image/") { + return "" + } + return data +} + +func intNumberFromAny(v any) int { + switch n := v.(type) { + case float64: + return int(n) + case int: + return n + case json.Number: + i, _ := n.Int64() + return int(i) + default: + return 0 + } +} + +func buildGeminiOpenAIImagesResponse(created int64, images []string, revisedPrompt string) []byte { + data := make([]geminiOpenAIImagesDataObject, 0, len(images)) + for _, image := range images { + item := geminiOpenAIImagesDataObject{B64JSON: image} + if strings.TrimSpace(revisedPrompt) != "" { + item.RevisedPrompt = revisedPrompt + } + data = append(data, item) + } + body, _ := json.Marshal(geminiOpenAIImagesResponse{ + Created: created, + Data: data, + }) + return body +} + +func writeGeminiOpenAIImagesError(c *gin.Context, statusCode int, errType string, message string) { + c.JSON(statusCode, gin.H{ + "error": gin.H{ + "type": errType, + "message": message, + }, + }) +} diff --git a/backend/internal/service/gemini_openai_images_test.go b/backend/internal/service/gemini_openai_images_test.go new file mode 100644 index 00000000000..6ca02d35996 --- /dev/null +++ b/backend/internal/service/gemini_openai_images_test.go @@ -0,0 +1,193 @@ +package service + +import ( + "bytes" + "context" + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/Wei-Shaw/sub2api/internal/config" + "github.com/gin-gonic/gin" + "github.com/stretchr/testify/require" +) + +func TestGeminiForwardOpenAICompatibleImagesGenerations_ReturnsB64JSON(t *testing.T) { + gin.SetMode(gin.TestMode) + + httpStub := &geminiCompatHTTPUpstreamStub{ + response: &http.Response{ + StatusCode: http.StatusOK, + Header: http.Header{ + "Content-Type": []string{"application/json"}, + "X-Request-Id": []string{"gemini-img-1"}, + }, + Body: io.NopCloser(strings.NewReader(`{ + "candidates":[{"content":{"parts":[ + {"text":"done"}, + {"inlineData":{"mimeType":"image/png","data":"iVBORw0KGgo="}} + ]}}], + "usageMetadata":{"promptTokenCount":10,"candidatesTokenCount":2} + }`)), + }, + } + svc := &GeminiMessagesCompatService{ + httpUpstream: httpStub, + cfg: &config.Config{}, + } + account := &Account{ + ID: 301, + Platform: PlatformGemini, + Type: AccountTypeAPIKey, + Credentials: map[string]any{ + "api_key": "gemini-api-key", + }, + Concurrency: 1, + } + + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + body := []byte(`{"model":"gemini-2.5-flash-image","prompt":"draw a cat","size":"1024x1024","response_format":"b64_json"}`) + c.Request = httptest.NewRequest(http.MethodPost, "/v1beta/openai/images/generations", bytes.NewReader(body)) + + result, err := svc.ForwardOpenAICompatibleImagesGenerations(context.Background(), c, account, body) + require.NoError(t, err) + require.NotNil(t, result) + require.Equal(t, "gemini-img-1", result.RequestID) + require.Equal(t, 10, result.Usage.InputTokens) + require.Equal(t, 2, result.Usage.OutputTokens) + require.Equal(t, 1, result.ImageCount) + require.Equal(t, "1024x1024", result.ImageInputSize) + + require.NotNil(t, httpStub.lastReq) + require.Contains(t, httpStub.lastReq.URL.String(), "/v1beta/models/gemini-2.5-flash-image:generateContent") + require.Equal(t, "gemini-api-key", httpStub.lastReq.Header.Get("x-goog-api-key")) + sentBody, err := io.ReadAll(httpStub.lastReq.Body) + require.NoError(t, err) + var sent map[string]any + require.NoError(t, json.Unmarshal(sentBody, &sent)) + contents, ok := sent["contents"].([]any) + require.True(t, ok) + require.Len(t, contents, 1) + content, ok := contents[0].(map[string]any) + require.True(t, ok) + parts, ok := content["parts"].([]any) + require.True(t, ok) + require.Len(t, parts, 1) + part, ok := parts[0].(map[string]any) + require.True(t, ok) + require.Equal(t, "draw a cat", part["text"]) + generationConfig, ok := sent["generationConfig"].(map[string]any) + require.True(t, ok) + modalities, ok := generationConfig["responseModalities"].([]any) + require.True(t, ok) + require.Contains(t, modalities, "TEXT") + require.Contains(t, modalities, "IMAGE") + + require.Equal(t, http.StatusOK, rec.Code) + var got map[string]any + require.NoError(t, json.Unmarshal(rec.Body.Bytes(), &got)) + data, ok := got["data"].([]any) + require.True(t, ok) + require.Len(t, data, 1) + first, ok := data[0].(map[string]any) + require.True(t, ok) + require.Equal(t, "iVBORw0KGgo=", first["b64_json"]) + require.NotContains(t, first, "url") +} + +func TestGeminiForwardOpenAICompatibleImagesGenerations_RejectsURLResponseFormat(t *testing.T) { + gin.SetMode(gin.TestMode) + + httpStub := &geminiCompatHTTPUpstreamStub{} + svc := &GeminiMessagesCompatService{ + httpUpstream: httpStub, + cfg: &config.Config{}, + } + account := &Account{ + ID: 302, + Platform: PlatformGemini, + Type: AccountTypeAPIKey, + Credentials: map[string]any{ + "api_key": "gemini-api-key", + }, + Concurrency: 1, + } + + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + body := []byte(`{"model":"gemini-2.5-flash-image","prompt":"draw a cat","response_format":"url"}`) + c.Request = httptest.NewRequest(http.MethodPost, "/v1beta/openai/images/generations", bytes.NewReader(body)) + + result, err := svc.ForwardOpenAICompatibleImagesGenerations(context.Background(), c, account, body) + require.Error(t, err) + require.Nil(t, result) + require.Equal(t, http.StatusBadRequest, rec.Code) + require.Contains(t, rec.Body.String(), "response_format=url is not supported") + require.Zero(t, httpStub.calls) +} + +func TestGeminiForwardOpenAICompatibleImagesGenerations_RejectsMultipleImages(t *testing.T) { + gin.SetMode(gin.TestMode) + + httpStub := &geminiCompatHTTPUpstreamStub{} + svc := &GeminiMessagesCompatService{ + httpUpstream: httpStub, + cfg: &config.Config{}, + } + account := &Account{ + ID: 303, + Platform: PlatformGemini, + Type: AccountTypeAPIKey, + Credentials: map[string]any{ + "api_key": "gemini-api-key", + }, + Concurrency: 1, + } + + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + body := []byte(`{"model":"gemini-2.5-flash-image","prompt":"draw a cat","n":2}`) + c.Request = httptest.NewRequest(http.MethodPost, "/v1beta/openai/images/generations", bytes.NewReader(body)) + + result, err := svc.ForwardOpenAICompatibleImagesGenerations(context.Background(), c, account, body) + require.Error(t, err) + require.Nil(t, result) + require.Equal(t, http.StatusBadRequest, rec.Code) + require.Contains(t, rec.Body.String(), "n greater than 1 is not supported") + require.Zero(t, httpStub.calls) +} + +func TestGeminiForwardOpenAICompatibleImagesGenerations_RequiresImageModel(t *testing.T) { + gin.SetMode(gin.TestMode) + + httpStub := &geminiCompatHTTPUpstreamStub{} + svc := &GeminiMessagesCompatService{ + httpUpstream: httpStub, + cfg: &config.Config{}, + } + account := &Account{ + ID: 304, + Platform: PlatformGemini, + Type: AccountTypeAPIKey, + Credentials: map[string]any{ + "api_key": "gemini-api-key", + }, + Concurrency: 1, + } + + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + body := []byte(`{"model":"gemini-2.5-flash","prompt":"draw a cat"}`) + c.Request = httptest.NewRequest(http.MethodPost, "/v1beta/openai/images/generations", bytes.NewReader(body)) + + result, err := svc.ForwardOpenAICompatibleImagesGenerations(context.Background(), c, account, body) + require.Error(t, err) + require.Nil(t, result) + require.Equal(t, http.StatusBadRequest, rec.Code) + require.Contains(t, rec.Body.String(), "requires an image generation model") + require.Zero(t, httpStub.calls) +} diff --git a/backend/internal/service/pricing_service_test.go b/backend/internal/service/pricing_service_test.go index cc8b120a1aa..f4252f95404 100644 --- a/backend/internal/service/pricing_service_test.go +++ b/backend/internal/service/pricing_service_test.go @@ -124,9 +124,9 @@ func TestDefaultPricingIncludesCodexAutoReview(t *testing.T) { got := svc.GetModelPricing("codex-auto-review") require.NotNil(t, got) - require.InDelta(t, 2.5e-6, got.InputCostPerToken, 1e-12) - require.InDelta(t, 1.5e-5, got.OutputCostPerToken, 1e-12) - require.InDelta(t, 2.5e-7, got.CacheReadInputTokenCost, 1e-12) + require.InDelta(t, 5e-6, got.InputCostPerToken, 1e-12) + require.InDelta(t, 3e-5, got.OutputCostPerToken, 1e-12) + require.InDelta(t, 5e-7, got.CacheReadInputTokenCost, 1e-12) } func TestGetModelPricing_Gpt54MiniUsesDedicatedStaticFallbackWhenRemoteMissing(t *testing.T) { diff --git a/docs/superpowers/plans/2026-05-29-gemini-openai-compatible-implementation.md b/docs/superpowers/plans/2026-05-29-gemini-openai-compatible-implementation.md new file mode 100644 index 00000000000..6847f241e2b --- /dev/null +++ b/docs/superpowers/plans/2026-05-29-gemini-openai-compatible-implementation.md @@ -0,0 +1,1158 @@ +# Gemini OpenAI-Compatible Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add a Gemini-only OpenAI-compatible `/v1beta/openai` surface covering models, chat completions with audio input, embeddings, image generations, explicit unsupported endpoints, and frontend key-usage guidance. + +**Architecture:** Register a dedicated `/v1beta/openai` route group that uses Google-style API key extraction but enforces `gemini` group platform before dispatch. Chat completions reuses the existing `GatewayHandler.ChatCompletions` and `GeminiMessagesCompatService.ForwardAsChatCompletions` path; audio is preserved through the existing Chat Completions to Responses to Anthropic to Gemini bridge. Embeddings and image generation use new Gemini-native service methods that emit OpenAI-compatible responses while preserving sub2api account selection, billing, usage recording, and failover loops in `GatewayHandler`. + +**Tech Stack:** Go, Gin, existing sub2api handler/service layers, Gemini native REST `generateContent`, `embedContent`, `batchEmbedContents`, Vue 3, Vitest. + +--- + +## Reference Checks + +- Google Gemini OpenAI compatibility documents `base_url="https://generativelanguage.googleapis.com/v1beta/openai/"`, `POST /v1beta/openai/chat/completions`, `POST /v1beta/openai/embeddings`, audio understanding through Chat Completions, `GET /v1beta/openai/models`, and video endpoints. +- Google Gemini native embeddings uses `POST /v1beta/{model=models/*}:embedContent` and `POST /v1beta/{model=models/*}:batchEmbedContents`. +- Google Gemini image generation uses `POST /v1beta/models/gemini-2.5-flash-image:generateContent` and returns image bytes in response parts `inlineData`. + +## File Structure + +- Modify `backend/internal/server/routes/gateway.go` + - Add `/v1beta/openai` route group. + - Add `requireGeminiOpenAICompatibleGroup` route middleware that rejects non-Gemini groups with OpenAI-style JSON before any `/v1beta/openai` handler runs, including chat completions. + - Add route registration tests by inspecting Gin route metadata. +- Create `backend/internal/handler/gemini_openai_compatible_handler.go` + - Model list and model retrieve handlers. + - OpenAI-style unsupported endpoint response helper. + - Gemini OpenAI embeddings and image handlers that reuse shared Gemini account-selection and usage-recording helpers. +- Create `backend/internal/handler/gemini_openai_compatible_handler_test.go` + - Unit tests for model conversion, platform guard, unsupported errors, embeddings/image handler validation. +- Modify `backend/internal/pkg/apicompat/types.go` + - Add `ChatInputAudio` and `ResponsesInputAudio`. + - Add `InputAudio` to `ChatContentPart` and `ResponsesContentPart`. +- Modify `backend/internal/pkg/apicompat/chatcompletions_to_responses.go` + - Preserve `input_audio` parts and validate data/format. +- Modify `backend/internal/pkg/apicompat/responses_to_anthropic_request.go` + - Preserve internal `input_audio` parts as Anthropic-style content blocks with `source`. +- Add or modify `backend/internal/pkg/apicompat/chatcompletions_to_responses_test.go` + - Coverage for preserving audio and rejecting invalid audio. +- Add or modify `backend/internal/pkg/apicompat/responses_to_anthropic_request_test.go` + - Coverage for converting Responses audio into Anthropic blocks. +- Modify `backend/internal/service/gemini_messages_compat_service.go` + - Convert audio blocks into Gemini `inlineData`. +- Modify `backend/internal/service/gemini_messages_compat_service_test.go` + - Full-chain audio test through `ForwardAsChatCompletions`. +- Create `backend/internal/service/gemini_openai_embeddings.go` + - Gemini-native embeddings request conversion, upstream call, OpenAI-compatible response conversion, usage extraction. +- Create `backend/internal/service/gemini_openai_embeddings_test.go` + - Unit tests for single and batch embeddings conversion. +- Create `backend/internal/service/gemini_openai_images.go` + - Gemini-native image-generation request conversion, upstream call, OpenAI-compatible response conversion, usage and image billing fields. +- Create `backend/internal/service/gemini_openai_images_test.go` + - Unit tests for generated image response and validation failures. +- Modify `backend/internal/handler/endpoint.go` and `backend/internal/handler/endpoint_test.go` + - Normalize `/v1beta/openai/embeddings` and `/v1beta/openai/images/generations` to the existing endpoint classes for ops logging. +- Modify `frontend/src/components/keys/UseKeyModal.vue` + - Add Gemini OpenAI-compatible tab. + - Correct native Gemini CLI base URL to `/v1beta`. + - Add OpenAI SDK Python example and env var snippets for `/v1beta/openai`. +- Modify `frontend/src/components/keys/__tests__/UseKeyModal.spec.ts` + - Test native Gemini CLI `/v1beta` and OpenAI-compatible Gemini `/v1beta/openai`. +- Modify `frontend/src/i18n/locales/zh.ts` and `frontend/src/i18n/locales/en.ts` + - Add tab label and copy for OpenAI-compatible Gemini guidance. + +--- + +### Task 1: Route Group, Platform Guard, Models, Unsupported Errors + +**Files:** +- Modify: `backend/internal/server/routes/gateway.go` +- Create: `backend/internal/handler/gemini_openai_compatible_handler.go` +- Create: `backend/internal/handler/gemini_openai_compatible_handler_test.go` +- Test: `backend/internal/server/routes/gateway_test.go` + +- [ ] **Step 1: Write route registration tests** + +Add this test to `backend/internal/server/routes/gateway_test.go`: + +```go +func TestGatewayRoutesGeminiOpenAICompatiblePathsAreRegistered(t *testing.T) { + router := newGatewayRoutesTestRouter() + + routes := map[string]bool{} + for _, route := range router.Routes() { + routes[route.Method+" "+route.Path] = true + } + + required := []string{ + http.MethodGet + " /v1beta/openai/models", + http.MethodGet + " /v1beta/openai/models/:model", + http.MethodPost + " /v1beta/openai/chat/completions", + http.MethodPost + " /v1beta/openai/embeddings", + http.MethodPost + " /v1beta/openai/images/generations", + http.MethodPost + " /v1beta/openai/videos", + http.MethodGet + " /v1beta/openai/videos/:id", + } + for _, key := range required { + require.True(t, routes[key], "route %s should be registered", key) + } +} +``` + +Also add a middleware unit test in the same file: + +```go +func TestRequireGeminiOpenAICompatibleGroupRejectsNonGemini(t *testing.T) { + gin.SetMode(gin.TestMode) + router := gin.New() + router.Use(func(c *gin.Context) { + groupID := int64(1) + c.Set(string(servermiddleware.ContextKeyAPIKey), &service.APIKey{ + GroupID: &groupID, + Group: &service.Group{Platform: service.PlatformOpenAI}, + }) + c.Next() + }) + router.Use(requireGeminiOpenAICompatibleGroup) + router.POST("/v1beta/openai/chat/completions", func(c *gin.Context) { + c.JSON(http.StatusOK, gin.H{"ok": true}) + }) + + req := httptest.NewRequest(http.MethodPost, "/v1beta/openai/chat/completions", strings.NewReader(`{"model":"gpt-test"}`)) + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + require.Equal(t, http.StatusBadRequest, w.Code) + require.Contains(t, w.Body.String(), "requires a Gemini group") +} +``` + +- [ ] **Step 2: Write handler tests for model conversion and platform guard** + +Create `backend/internal/handler/gemini_openai_compatible_handler_test.go`: + +```go +package handler + +import ( + "net/http" + "net/http/httptest" + "testing" + + "github.com/Wei-Shaw/sub2api/internal/pkg/gemini" + servermiddleware "github.com/Wei-Shaw/sub2api/internal/server/middleware" + "github.com/Wei-Shaw/sub2api/internal/service" + "github.com/gin-gonic/gin" + "github.com/stretchr/testify/require" +) + +func TestGeminiOpenAICompatibleModelsUsesOpenAIShape(t *testing.T) { + got := geminiModelsToOpenAIModelList(gemini.FallbackModelsList()) + + require.Equal(t, "list", got.Object) + require.NotEmpty(t, got.Data) + require.Equal(t, "model", got.Data[0].Object) + require.Equal(t, "google", got.Data[0].OwnedBy) + require.NotContains(t, got.Data[0].ID, "models/") +} + +func TestGeminiOpenAICompatibleRejectsNonGeminiGroup(t *testing.T) { + gin.SetMode(gin.TestMode) + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Request = httptest.NewRequest(http.MethodGet, "/v1beta/openai/models", nil) + groupID := int64(1) + c.Set(string(servermiddleware.ContextKeyAPIKey), &service.APIKey{ + GroupID: &groupID, + Group: &service.Group{Platform: service.PlatformOpenAI}, + }) + + ok := ensureGeminiOpenAICompatibleGroup(c) + + require.False(t, ok) + require.Equal(t, http.StatusBadRequest, w.Code) + require.Contains(t, w.Body.String(), "requires a Gemini group") +} + +func TestGeminiOpenAICompatibleUnsupportedUsesOpenAIErrorShape(t *testing.T) { + gin.SetMode(gin.TestMode) + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Request = httptest.NewRequest(http.MethodPost, "/v1beta/openai/videos", nil) + + (&GatewayHandler{}).GeminiOpenAICompatibleUnsupported(c) + + require.Equal(t, http.StatusNotFound, w.Code) + require.JSONEq(t, `{"error":{"type":"invalid_request_error","message":"Unsupported endpoint for Gemini OpenAI compatibility"}}`, w.Body.String()) +} +``` + +- [ ] **Step 3: Run tests and verify they fail** + +Run: + +```bash +cd backend && go test ./internal/server/routes ./internal/handler -run 'GeminiOpenAICompatible|GatewayRoutesGeminiOpenAI' -count=1 +``` + +Expected: FAIL with missing route paths and missing symbols such as `geminiModelsToOpenAIModelList`. + +- [ ] **Step 4: Implement handlers and route group** + +In `backend/internal/handler/gemini_openai_compatible_handler.go`, add: + +```go +package handler + +import ( + "net/http" + "strings" + + "github.com/Wei-Shaw/sub2api/internal/pkg/gemini" + servermiddleware "github.com/Wei-Shaw/sub2api/internal/server/middleware" + "github.com/Wei-Shaw/sub2api/internal/service" + "github.com/gin-gonic/gin" +) + +type openAICompatModelList struct { + Object string `json:"object"` + Data []openAICompatModelObject `json:"data"` +} + +type openAICompatModelObject struct { + ID string `json:"id"` + Object string `json:"object"` + Created int64 `json:"created"` + OwnedBy string `json:"owned_by"` +} + +func ensureGeminiOpenAICompatibleGroup(c *gin.Context) bool { + apiKey, ok := servermiddleware.GetAPIKeyFromContext(c) + if !ok || apiKey == nil || apiKey.Group == nil || apiKey.Group.Platform != service.PlatformGemini { + geminiOpenAICompatError(c, http.StatusBadRequest, "invalid_request_error", "The /v1beta/openai compatibility endpoint requires a Gemini group") + return false + } + return true +} + +func geminiOpenAICompatError(c *gin.Context, status int, errType string, message string) { + c.JSON(status, gin.H{ + "error": gin.H{ + "type": errType, + "message": message, + }, + }) + c.Abort() +} + +func geminiModelNameToOpenAIID(name string) string { + name = strings.TrimSpace(name) + name = strings.TrimPrefix(name, "models/") + return name +} + +func geminiModelsToOpenAIModelList(src gemini.ModelsListResponse) openAICompatModelList { + out := openAICompatModelList{Object: "list", Data: make([]openAICompatModelObject, 0, len(src.Models))} + for _, model := range src.Models { + id := geminiModelNameToOpenAIID(model.Name) + if id == "" { + continue + } + out.Data = append(out.Data, openAICompatModelObject{ + ID: id, + Object: "model", + Created: 0, + OwnedBy: "google", + }) + } + return out +} + +func geminiModelToOpenAIModelObject(model string) openAICompatModelObject { + return openAICompatModelObject{ + ID: geminiModelNameToOpenAIID(model), + Object: "model", + Created: 0, + OwnedBy: "google", + } +} + +func (h *GatewayHandler) GeminiOpenAICompatibleModels(c *gin.Context) { + if !ensureGeminiOpenAICompatibleGroup(c) { + return + } + c.JSON(http.StatusOK, geminiModelsToOpenAIModelList(gemini.FallbackModelsList())) +} + +func (h *GatewayHandler) GeminiOpenAICompatibleGetModel(c *gin.Context) { + if !ensureGeminiOpenAICompatibleGroup(c) { + return + } + model := strings.TrimSpace(c.Param("model")) + if model == "" { + geminiOpenAICompatError(c, http.StatusBadRequest, "invalid_request_error", "model is required") + return + } + c.JSON(http.StatusOK, geminiModelToOpenAIModelObject(model)) +} + +func (h *GatewayHandler) GeminiOpenAICompatibleUnsupported(c *gin.Context) { + geminiOpenAICompatError(c, http.StatusNotFound, "invalid_request_error", "Unsupported endpoint for Gemini OpenAI compatibility") +} +``` + +In `backend/internal/server/routes/gateway.go`, add this package-level helper near `getGroupPlatform`: + +```go +func requireGeminiOpenAICompatibleGroup(c *gin.Context) { + apiKey, ok := middleware.GetAPIKeyFromContext(c) + if !ok || apiKey == nil || apiKey.Group == nil || apiKey.Group.Platform != service.PlatformGemini { + c.JSON(http.StatusBadRequest, gin.H{ + "error": gin.H{ + "type": "invalid_request_error", + "message": "The /v1beta/openai compatibility endpoint requires a Gemini group", + }, + }) + c.Abort() + return + } + c.Next() +} +``` + +Then add the group after the existing native `/v1beta` Gemini group: + +```go + + geminiOpenAI := r.Group("/v1beta/openai") + geminiOpenAI.Use(bodyLimit) + geminiOpenAI.Use(clientRequestID) + geminiOpenAI.Use(opsErrorLogger) + geminiOpenAI.Use(endpointNorm) + geminiOpenAI.Use(middleware.APIKeyAuthWithSubscriptionGoogle(apiKeyService, subscriptionService, cfg)) + geminiOpenAI.Use(requireGroupGoogle) + geminiOpenAI.Use(requireGeminiOpenAICompatibleGroup) + { + geminiOpenAI.GET("/models", h.Gateway.GeminiOpenAICompatibleModels) + geminiOpenAI.GET("/models/:model", h.Gateway.GeminiOpenAICompatibleGetModel) + geminiOpenAI.POST("/chat/completions", h.Gateway.ChatCompletions) + geminiOpenAI.POST("/embeddings", h.Gateway.GeminiOpenAICompatibleEmbeddings) + geminiOpenAI.POST("/images/generations", h.Gateway.GeminiOpenAICompatibleImagesGenerations) + geminiOpenAI.POST("/videos", h.Gateway.GeminiOpenAICompatibleUnsupported) + geminiOpenAI.GET("/videos/:id", h.Gateway.GeminiOpenAICompatibleUnsupported) + } +``` + +- [ ] **Step 5: Run tests and commit** + +Run: + +```bash +cd backend && go test ./internal/server/routes ./internal/handler -run 'GeminiOpenAICompatible|GatewayRoutesGeminiOpenAI' -count=1 +``` + +Expected: PASS. + +Commit: + +```bash +git add backend/internal/server/routes/gateway.go backend/internal/server/routes/gateway_test.go backend/internal/handler/gemini_openai_compatible_handler.go backend/internal/handler/gemini_openai_compatible_handler_test.go +git commit -m "feat: add gemini openai compatibility routes" +``` + +--- + +### Task 2: Chat Completions `input_audio` Preservation + +**Files:** +- Modify: `backend/internal/pkg/apicompat/types.go` +- Modify: `backend/internal/pkg/apicompat/chatcompletions_to_responses.go` +- Modify: `backend/internal/pkg/apicompat/responses_to_anthropic_request.go` +- Modify: `backend/internal/service/gemini_messages_compat_service.go` +- Test: `backend/internal/pkg/apicompat/*_test.go` +- Test: `backend/internal/service/gemini_messages_compat_service_test.go` + +- [ ] **Step 1: Write failing apicompat tests** + +Add tests that assert: + +```go +func TestChatCompletionsToResponsesPreservesInputAudio(t *testing.T) { + req := &ChatCompletionsRequest{ + Model: "gemini-3.5-flash", + Messages: []ChatMessage{{ + Role: "user", + Content: json.RawMessage(`[{"type":"text","text":"Transcribe this."},{"type":"input_audio","input_audio":{"data":"UklGRg==","format":"wav"}}]`), + }}, + } + + out, err := ChatCompletionsToResponses(req) + require.NoError(t, err) + + var items []ResponsesInputItem + require.NoError(t, json.Unmarshal(out.Input, &items)) + var parts []ResponsesContentPart + require.NoError(t, json.Unmarshal(items[0].Content, &parts)) + require.Equal(t, "input_audio", parts[1].Type) + require.Equal(t, "UklGRg==", parts[1].InputAudio.Data) + require.Equal(t, "wav", parts[1].InputAudio.Format) +} + +func TestChatCompletionsToResponsesRejectsUnsupportedInputAudioFormat(t *testing.T) { + req := &ChatCompletionsRequest{ + Model: "gemini-3.5-flash", + Messages: []ChatMessage{{ + Role: "user", + Content: json.RawMessage(`[{"type":"input_audio","input_audio":{"data":"abc","format":"bad"}}]`), + }}, + } + + _, err := ChatCompletionsToResponses(req) + require.ErrorContains(t, err, "unsupported input_audio format") +} +``` + +Add a Responses to Anthropic test: + +```go +func TestResponsesToAnthropicRequestPreservesInputAudio(t *testing.T) { + content, _ := json.Marshal([]ResponsesContentPart{{ + Type: "input_audio", + InputAudio: &ResponsesInputAudio{Data: "UklGRg==", Format: "wav"}, + }}) + input, _ := json.Marshal([]ResponsesInputItem{{Role: "user", Content: content}}) + + out, err := ResponsesToAnthropicRequest(&ResponsesRequest{Model: "gemini-3.5-flash", Input: input}) + require.NoError(t, err) + require.Len(t, out.Messages, 1) + + var blocks []AnthropicContentBlock + require.NoError(t, json.Unmarshal(out.Messages[0].Content, &blocks)) + require.Equal(t, "input_audio", blocks[0].Type) + require.NotNil(t, blocks[0].Source) + require.Equal(t, "audio/wav", blocks[0].Source.MediaType) + require.Equal(t, "UklGRg==", blocks[0].Source.Data) +} +``` + +- [ ] **Step 2: Write failing Gemini conversion tests** + +Add to `backend/internal/service/gemini_messages_compat_service_test.go`: + +```go +func TestConvertClaudeMessagesToGeminiGenerateContent_InputAudioToInlineData(t *testing.T) { + claudeReq := map[string]any{ + "model": "gemini-3.5-flash", + "max_tokens": 128, + "messages": []any{map[string]any{ + "role": "user", + "content": []any{map[string]any{ + "type": "input_audio", + "source": map[string]any{ + "type": "base64", + "media_type": "audio/wav", + "data": "UklGRg==", + }, + }}, + }}, + } + body, _ := json.Marshal(claudeReq) + + got, err := convertClaudeMessagesToGeminiGenerateContent(body) + require.NoError(t, err) + require.JSONEq(t, `{"contents":[{"role":"user","parts":[{"inlineData":{"mimeType":"audio/wav","data":"UklGRg=="}}]}],"generationConfig":{"maxOutputTokens":128}}`, string(got)) +} +``` + +Add a full-chain test that sends Chat Completions `input_audio` through `ForwardAsChatCompletions` and inspects `httpStub.lastReq.Body` for `inlineData.mimeType == "audio/wav"`. + +- [ ] **Step 3: Run tests and verify they fail** + +Run: + +```bash +cd backend && go test ./internal/pkg/apicompat ./internal/service -run 'InputAudio|Audio' -count=1 +``` + +Expected: FAIL because audio fields and conversions do not exist. + +- [ ] **Step 4: Implement audio types and conversion** + +In `backend/internal/pkg/apicompat/types.go`, add: + +```go +type ChatInputAudio struct { + Data string `json:"data"` + Format string `json:"format"` +} + +type ResponsesInputAudio struct { + Data string `json:"data"` + Format string `json:"format"` +} +``` + +Extend `ChatContentPart`: + +```go + InputAudio *ChatInputAudio `json:"input_audio,omitempty"` +``` + +Extend `ResponsesContentPart`: + +```go + InputAudio *ResponsesInputAudio `json:"input_audio,omitempty"` +``` + +In `chatcompletions_to_responses.go`, change `marshalChatInputContent` and `convertChatContentPartsToResponses` so conversion can return validation errors: + +```go +func OpenAIInputAudioFormatToMIMEType(format string) (string, bool) { + switch strings.ToLower(strings.TrimSpace(format)) { + case "wav": + return "audio/wav", true + case "mp3": + return "audio/mpeg", true + case "m4a": + return "audio/mp4", true + case "aac": + return "audio/aac", true + case "flac": + return "audio/flac", true + case "ogg": + return "audio/ogg", true + default: + return "", false + } +} +``` + +For `input_audio`, skip empty audio when other usable parts exist, return `input_audio.data is required` when the message has only empty audio, and return `unsupported input_audio format ""` when the format is non-empty but unmapped. + +In `responses_to_anthropic_request.go`, add: + +```go + case "input_audio": + if p.InputAudio == nil { + continue + } + data := strings.TrimSpace(p.InputAudio.Data) + if data == "" { + continue + } + mediaType, ok := OpenAIInputAudioFormatToMIMEType(p.InputAudio.Format) + if !ok { + return nil, fmt.Errorf("unsupported input_audio format %q", p.InputAudio.Format) + } + blocks = append(blocks, AnthropicContentBlock{ + Type: "input_audio", + Source: &AnthropicImageSource{ + Type: "base64", + MediaType: mediaType, + Data: data, + }, + }) +``` + +In `convertClaudeMessagesToGeminiContents`, add an audio case alongside the image case: + +```go + case "input_audio", "audio": + if src, ok := bm["source"].(map[string]any); ok { + if srcType, _ := src["type"].(string); srcType == "base64" { + mediaType, _ := src["media_type"].(string) + data, _ := src["data"].(string) + if strings.TrimSpace(mediaType) != "" && strings.TrimSpace(data) != "" { + parts = append(parts, map[string]any{ + "inlineData": map[string]any{ + "mimeType": mediaType, + "data": data, + }, + }) + } + } + } +``` + +- [ ] **Step 5: Run tests and commit** + +Run: + +```bash +cd backend && go test ./internal/pkg/apicompat ./internal/service -run 'InputAudio|Audio|GeminiForwardAsChatCompletions' -count=1 +``` + +Expected: PASS. + +Commit: + +```bash +git add backend/internal/pkg/apicompat backend/internal/service/gemini_messages_compat_service.go backend/internal/service/gemini_messages_compat_service_test.go +git commit -m "feat: preserve gemini openai input audio" +``` + +--- + +### Task 3: Gemini OpenAI-Compatible Embeddings + +**Files:** +- Create: `backend/internal/service/gemini_openai_embeddings.go` +- Create: `backend/internal/service/gemini_openai_embeddings_test.go` +- Modify: `backend/internal/handler/gemini_openai_compatible_handler.go` +- Test: `backend/internal/handler/gemini_openai_compatible_handler_test.go` + +- [ ] **Step 1: Write service tests** + +Create tests for: + +```go +func TestGeminiForwardOpenAICompatibleEmbeddings_SingleInputUsesEmbedContent(t *testing.T) +func TestGeminiForwardOpenAICompatibleEmbeddings_BatchInputUsesBatchEmbedContents(t *testing.T) +func TestGeminiForwardOpenAICompatibleEmbeddings_RejectsTokenArrayInput(t *testing.T) +``` + +The single-input test should assert: +- upstream URL contains `/v1beta/models/gemini-embedding-2-preview:embedContent` +- upstream JSON contains `content.parts[0].text` +- client response contains `"object":"list"` and `data[0].object == "embedding"` +- returned `ForwardResult.Usage.InputTokens` equals upstream `usageMetadata.promptTokenCount` + +- [ ] **Step 2: Run tests and verify they fail** + +Run: + +```bash +cd backend && go test ./internal/service -run 'OpenAICompatibleEmbeddings' -count=1 +``` + +Expected: FAIL because `ForwardOpenAICompatibleEmbeddings` does not exist. + +- [ ] **Step 3: Implement embeddings service** + +Create `backend/internal/service/gemini_openai_embeddings.go` with these functions: + +```go +type geminiOpenAIEmbeddingsRequest struct { + Model string `json:"model"` + Input json.RawMessage `json:"input"` +} + +func (s *GeminiMessagesCompatService) ForwardOpenAICompatibleEmbeddings(ctx context.Context, c *gin.Context, account *Account, body []byte) (*ForwardResult, error) +func parseGeminiOpenAIEmbeddingInputs(raw json.RawMessage) ([]string, error) +func buildGeminiEmbedContentRequest(input string) []byte +func buildGeminiBatchEmbedContentsRequest(model string, inputs []string) []byte +func geminiEmbeddingURL(baseURL string, model string, batch bool) string +func convertGeminiEmbeddingResponseToOpenAI(body []byte, model string, inputCount int) ([]byte, ClaudeUsage, error) +func writeGeminiOpenAIEmbeddingsError(c *gin.Context, statusCode int, errType string, message string) +``` + +Implementation rules: +- Accept `input` as a string or `[]string`. +- Reject token arrays and nested arrays with HTTP 400 `invalid_request_error`. +- For one input, call `:embedContent`. +- For multiple inputs, call `:batchEmbedContents`. +- Use `x-goog-api-key` for API key accounts and `Authorization: Bearer` for OAuth accounts. +- Use `account.GetGeminiBaseURL(geminicli.AIStudioBaseURL)`. +- Use `account.GetMappedModel` for API key and service account accounts. +- Convert `embedding.values` and `embeddings[].values` into OpenAI embeddings response data. +- Extract `usageMetadata.promptTokenCount` as input tokens when present. + +- [ ] **Step 4: Add handler method and usage recording** + +In `gemini_openai_compatible_handler.go`, add: + +```go +func (h *GatewayHandler) GeminiOpenAICompatibleEmbeddings(c *gin.Context) { + h.handleGeminiOpenAICompatibleUnary(c, geminiOpenAICompatibleUnaryOptions{ + Component: "handler.gemini_openai.embeddings", + Endpoint: int16(service.RequestTypeSync), + Forward: func(ctx context.Context, c *gin.Context, account *service.Account, body []byte) (*service.ForwardResult, error) { + return h.geminiCompatService.ForwardOpenAICompatibleEmbeddings(ctx, c, account, body) + }, + }) +} +``` + +Add `handleGeminiOpenAICompatibleUnary` in the same file. It must: +- call `ensureGeminiOpenAICompatibleGroup` +- read and validate JSON body +- extract `model` +- set ops request and endpoint context +- resolve channel model mapping +- acquire user concurrency slot +- check billing eligibility +- select only Gemini accounts using `SelectAccountWithLoadAwareness` +- acquire account slot +- call the provided `Forward` +- record usage with `gatewayService.RecordUsage` +- fail over on `*service.UpstreamFailoverError` + +- [ ] **Step 5: Run tests and commit** + +Run: + +```bash +cd backend && go test ./internal/service ./internal/handler -run 'OpenAICompatibleEmbeddings|GeminiOpenAICompatible' -count=1 +``` + +Expected: PASS. + +Commit: + +```bash +git add backend/internal/service/gemini_openai_embeddings.go backend/internal/service/gemini_openai_embeddings_test.go backend/internal/handler/gemini_openai_compatible_handler.go backend/internal/handler/gemini_openai_compatible_handler_test.go +git commit -m "feat: add gemini openai embeddings" +``` + +--- + +### Task 4: Gemini OpenAI-Compatible Image Generations + +**Files:** +- Create: `backend/internal/service/gemini_openai_images.go` +- Create: `backend/internal/service/gemini_openai_images_test.go` +- Modify: `backend/internal/handler/gemini_openai_compatible_handler.go` + +- [ ] **Step 1: Write service tests** + +Create tests for: + +```go +func TestGeminiForwardOpenAICompatibleImagesGenerations_ReturnsB64JSON(t *testing.T) +func TestGeminiForwardOpenAICompatibleImagesGenerations_RejectsURLResponseFormat(t *testing.T) +func TestGeminiForwardOpenAICompatibleImagesGenerations_RejectsMultipleImages(t *testing.T) +func TestGeminiForwardOpenAICompatibleImagesGenerations_RequiresImageModel(t *testing.T) +``` + +The success test upstream response body: + +```json +{ + "candidates": [ + { + "content": { + "parts": [ + {"text": "done"}, + {"inlineData": {"mimeType": "image/png", "data": "iVBORw0KGgo="}} + ] + } + } + ], + "usageMetadata": {"promptTokenCount": 10, "candidatesTokenCount": 2} +} +``` + +Assert: +- upstream URL contains `/v1beta/models/gemini-2.5-flash-image:generateContent` +- upstream JSON contains `generationConfig.responseModalities` with `TEXT` and `IMAGE` +- downstream response contains `data[0].b64_json == "iVBORw0KGgo="` +- `ForwardResult.ImageCount == 1` + +- [ ] **Step 2: Run tests and verify they fail** + +Run: + +```bash +cd backend && go test ./internal/service -run 'OpenAICompatibleImages' -count=1 +``` + +Expected: FAIL because `ForwardOpenAICompatibleImagesGenerations` does not exist. + +- [ ] **Step 3: Implement image generation service** + +Create `backend/internal/service/gemini_openai_images.go` with these functions: + +```go +type geminiOpenAIImageGenerationRequest struct { + Model string `json:"model"` + Prompt string `json:"prompt"` + N *int `json:"n,omitempty"` + Size string `json:"size,omitempty"` + ResponseFormat string `json:"response_format,omitempty"` +} + +func (s *GeminiMessagesCompatService) ForwardOpenAICompatibleImagesGenerations(ctx context.Context, c *gin.Context, account *Account, body []byte) (*ForwardResult, error) +func parseGeminiOpenAIImageGenerationRequest(body []byte) (*geminiOpenAIImageGenerationRequest, error) +func buildGeminiImageGenerateContentRequest(prompt string, size string) []byte +func collectGeminiOpenAIImages(raw []byte) ([]string, string, ClaudeUsage, error) +func buildGeminiOpenAIImagesResponse(created int64, images []string, revisedPrompt string) []byte +func writeGeminiOpenAIImagesError(c *gin.Context, statusCode int, errType string, message string) +``` + +Implementation rules: +- Require non-empty `model` and `prompt`. +- Require `isImageGenerationModel(model)`. +- Allow `response_format` empty or `b64_json`. +- Reject `response_format == "url"` with HTTP 400 because sub2api has no URL storage path here. +- Allow `n` empty or `1`; reject `n > 1` with HTTP 400. +- Send native Gemini `generateContent` with `responseModalities: ["TEXT", "IMAGE"]`. +- Parse `inlineData.data` and `inline_data.data`. +- Return OpenAI Images response `{"created": , "data": [{"b64_json": "iVBORw0KGgo="}]}` in tests and the actual generated base64 value at runtime. +- Return a `ForwardResult` with `ImageCount`, `ImageSize`, `ImageInputSize`, and `Usage`. + +- [ ] **Step 4: Add handler method** + +In `gemini_openai_compatible_handler.go`, add: + +```go +func (h *GatewayHandler) GeminiOpenAICompatibleImagesGenerations(c *gin.Context) { + h.handleGeminiOpenAICompatibleUnary(c, geminiOpenAICompatibleUnaryOptions{ + Component: "handler.gemini_openai.images", + Endpoint: int16(service.RequestTypeSync), + BeforeForward: func(c *gin.Context, apiKey *service.APIKey, subject servermiddleware.AuthSubject, model string, body []byte) bool { + if !service.GroupAllowsImageGeneration(apiKey.Group) { + geminiOpenAICompatError(c, http.StatusForbidden, "permission_error", service.ImageGenerationPermissionMessage()) + return false + } + reqLog := requestLogger(c, "handler.gemini_openai.images", zap.Int64("user_id", subject.UserID), zap.Int64("api_key_id", apiKey.ID), zap.Any("group_id", apiKey.GroupID), zap.String("model", model)) + if decision := h.checkContentModeration(c, reqLog, apiKey, subject, service.ContentModerationProtocolOpenAIImages, model, body); decision != nil && decision.Blocked { + geminiOpenAICompatError(c, contentModerationStatus(decision), contentModerationErrorCode(decision), decision.Message) + return false + } + return true + }, + Forward: func(ctx context.Context, c *gin.Context, account *service.Account, body []byte) (*service.ForwardResult, error) { + return h.geminiCompatService.ForwardOpenAICompatibleImagesGenerations(ctx, c, account, body) + }, + }) +} +``` + +- [ ] **Step 5: Run tests and commit** + +Run: + +```bash +cd backend && go test ./internal/service ./internal/handler -run 'OpenAICompatibleImages|GeminiOpenAICompatible' -count=1 +``` + +Expected: PASS. + +Commit: + +```bash +git add backend/internal/service/gemini_openai_images.go backend/internal/service/gemini_openai_images_test.go backend/internal/handler/gemini_openai_compatible_handler.go backend/internal/handler/gemini_openai_compatible_handler_test.go +git commit -m "feat: add gemini openai image generations" +``` + +--- + +### Task 5: Endpoint Normalization and Ops Classification + +**Files:** +- Modify: `backend/internal/handler/endpoint.go` +- Modify: `backend/internal/handler/endpoint_test.go` + +- [ ] **Step 1: Write endpoint tests** + +Add cases: + +```go +{"/v1beta/openai/embeddings", EndpointEmbeddings}, +{"/v1beta/openai/images/generations", EndpointImagesGenerations}, +``` + +Also add platform normalization assertions that `service.PlatformGemini` keeps the same endpoint classes. + +- [ ] **Step 2: Run tests and verify failure if normalization misses the new prefix** + +Run: + +```bash +cd backend && go test ./internal/handler -run 'Endpoint' -count=1 +``` + +Expected: FAIL if current normalization does not classify new paths. + +- [ ] **Step 3: Implement endpoint normalization** + +Update `NormalizeInboundEndpoint` logic so path contains checks for `/embeddings` and `/images/generations` apply to `/v1beta/openai/*` as they already do for `/v1/*` and `/openai/v1/*`. + +- [ ] **Step 4: Run tests and commit** + +Run: + +```bash +cd backend && go test ./internal/handler -run 'Endpoint' -count=1 +``` + +Expected: PASS. + +Commit: + +```bash +git add backend/internal/handler/endpoint.go backend/internal/handler/endpoint_test.go +git commit -m "chore: classify gemini openai endpoints" +``` + +--- + +### Task 6: Frontend Use Key Modal + +**Files:** +- Modify: `frontend/src/components/keys/UseKeyModal.vue` +- Modify: `frontend/src/components/keys/__tests__/UseKeyModal.spec.ts` +- Modify: `frontend/src/i18n/locales/zh.ts` +- Modify: `frontend/src/i18n/locales/en.ts` + +- [ ] **Step 1: Write frontend tests** + +Add tests: + +```ts +it('renders native Gemini CLI config with v1beta base URL', () => { + const wrapper = mount(UseKeyModal, { + props: { + show: true, + apiKey: 'sk-gemini', + baseUrl: 'https://example.com/v1', + platform: 'gemini' + }, + global: { + stubs: { + BaseDialog: { template: '
' }, + Icon: { template: '' } + } + } + }) + + const code = wrapper.find('pre code').text() + expect(code).toContain('GOOGLE_GEMINI_BASE_URL="https://example.com/v1beta"') + expect(code).toContain('GEMINI_API_KEY="sk-gemini"') +}) + +it('renders Gemini OpenAI-compatible config with v1beta openai base URL', async () => { + const wrapper = mount(UseKeyModal, { + props: { + show: true, + apiKey: 'sk-gemini', + baseUrl: 'https://example.com/v1', + platform: 'gemini' + }, + global: { + stubs: { + BaseDialog: { template: '
' }, + Icon: { template: '' } + } + } + }) + + const tab = wrapper.findAll('button').find((button) => + button.text().includes('keys.useKeyModal.cliTabs.openaiCompatible') + ) + expect(tab).toBeDefined() + await tab!.trigger('click') + await nextTick() + + const codeBlocks = wrapper.findAll('pre code').map((code) => code.text()) + expect(codeBlocks.join('\n')).toContain('OPENAI_BASE_URL="https://example.com/v1beta/openai"') + expect(codeBlocks.join('\n')).toContain('base_url="https://example.com/v1beta/openai"') +}) +``` + +- [ ] **Step 2: Run tests and verify they fail** + +Run: + +```bash +cd frontend && pnpm test:run src/components/keys/__tests__/UseKeyModal.spec.ts +``` + +Expected: FAIL because the tab and `/v1beta` base URL are not implemented. + +- [ ] **Step 3: Implement UI changes** + +In `UseKeyModal.vue`: +- Add Gemini tab: + +```ts +{ id: 'openai-compatible', label: t('keys.useKeyModal.cliTabs.openaiCompatible'), icon: TerminalIcon } +``` + +- Compute: + +```ts +const geminiOpenAIBase = `${baseRoot}/v1beta/openai` +``` + +- Change native Gemini current files to: + +```ts +if (activeClientTab.value === 'openai-compatible') { + return generateGeminiOpenAICompatibleFiles(geminiOpenAIBase, apiKey) +} +return [generateGeminiCliContent(geminiBase, apiKey)] +``` + +- Change Antigravity Gemini current files to: + +```ts +return [generateGeminiCliContent(antigravityGeminiBase, apiKey)] +``` + +- Add: + +```ts +function generateGeminiOpenAICompatibleFiles(baseUrl: string, apiKey: string): FileConfig[] { + const model = 'gemini-3.5-flash' + const shellPath = activeTab.value === 'cmd' + ? 'Command Prompt' + : activeTab.value === 'powershell' + ? 'PowerShell' + : 'Terminal' + const shellContent = activeTab.value === 'cmd' + ? `set OPENAI_BASE_URL=${baseUrl} +set OPENAI_API_KEY=${apiKey} +set OPENAI_MODEL=${model}` + : activeTab.value === 'powershell' + ? `$env:OPENAI_BASE_URL="${baseUrl}" +$env:OPENAI_API_KEY="${apiKey}" +$env:OPENAI_MODEL="${model}"` + : `export OPENAI_BASE_URL="${baseUrl}" +export OPENAI_API_KEY="${apiKey}" +export OPENAI_MODEL="${model}"` + + const pythonContent = `from openai import OpenAI + +client = OpenAI( + api_key="${apiKey}", + base_url="${baseUrl}", +) + +response = client.chat.completions.create( + model="${model}", + messages=[{"role": "user", "content": "Hello"}], +) + +print(response.choices[0].message.content)` + + return [ + { path: shellPath, content: shellContent }, + { path: 'openai_gemini.py', content: pythonContent, hint: t('keys.useKeyModal.gemini.openaiPythonHint') } + ] +} +``` + +- [ ] **Step 4: Add i18n copy** + +In both locale files add: + +```ts +openaiCompatible: 'OpenAI Compatible' +``` + +In `zh.ts`, add `gemini.openaiPythonHint`: + +```ts +openaiPythonHint: 'OpenAI SDK 示例,base_url 使用 Gemini OpenAI 兼容路径' +``` + +In `en.ts`, add: + +```ts +openaiPythonHint: 'OpenAI SDK example using the Gemini OpenAI-compatible base URL' +``` + +- [ ] **Step 5: Run tests and commit** + +Run: + +```bash +cd frontend && pnpm test:run src/components/keys/__tests__/UseKeyModal.spec.ts +``` + +Expected: PASS. + +Commit: + +```bash +git add frontend/src/components/keys/UseKeyModal.vue frontend/src/components/keys/__tests__/UseKeyModal.spec.ts frontend/src/i18n/locales/zh.ts frontend/src/i18n/locales/en.ts +git commit -m "feat: show gemini openai compatible key usage" +``` + +--- + +### Task 7: Full Verification + +**Files:** +- No implementation files unless tests expose a real defect. + +- [ ] **Step 1: Run backend focused packages** + +Run: + +```bash +cd backend && go test ./internal/server/routes ./internal/handler ./internal/pkg/apicompat ./internal/service +``` + +Expected: PASS. + +- [ ] **Step 2: Run frontend focused tests** + +Run: + +```bash +cd frontend && pnpm test:run src/components/keys/__tests__/UseKeyModal.spec.ts +``` + +Expected: PASS. + +- [ ] **Step 3: Run formatting and diff checks** + +Run: + +```bash +gofmt -w backend/internal/server/routes/gateway.go backend/internal/handler/gemini_openai_compatible_handler.go backend/internal/pkg/apicompat/types.go backend/internal/pkg/apicompat/chatcompletions_to_responses.go backend/internal/pkg/apicompat/responses_to_anthropic_request.go backend/internal/service/gemini_messages_compat_service.go backend/internal/service/gemini_openai_embeddings.go backend/internal/service/gemini_openai_images.go +git diff --check +git status --short +``` + +Expected: +- `git diff --check` exits 0. +- `git status --short` shows only intentional files before the final commit, or clean after the task commits. + +- [ ] **Step 4: Optional local smoke request** + +If a local dev server is already running, issue: + +```bash +curl -sS "$SUB2API_BASE_URL/v1beta/openai/models" \ + -H "Authorization: Bearer $SUB2API_GEMINI_KEY" | jq . +``` + +Expected: JSON object with `"object": "list"` and model IDs without `models/`. + +- [ ] **Step 5: Final commit if verification caused follow-up fixes** + +If verification required follow-up edits: + +```bash +git add +git commit -m "fix: stabilize gemini openai compatibility" +``` + +If there were no follow-up edits, skip this commit. + +--- + +## Self-Review + +**Spec coverage:** +- `/v1beta/openai` Gemini-only routing: Task 1. +- OpenAI SDK chat completions base URL: Task 1 and Task 6. +- Audio `input_audio` to Gemini `inlineData`: Task 2. +- Models endpoint OpenAI shape: Task 1. +- Embeddings endpoint native Gemini conversion and OpenAI response: Task 3. +- Image generation endpoint native Gemini conversion and OpenAI response: Task 4. +- Explicit unsupported video endpoint: Task 1. +- Frontend Use Key modal with native Gemini plus OpenAI-compatible Gemini guidance: Task 6. + +**Placeholder scan:** This plan contains concrete file paths, test names, commands, expected failures, expected passing states, and implementation contracts. It avoids deferred requirements. + +**Type consistency:** +- `ChatInputAudio`, `ResponsesInputAudio`, `InputAudio`, and `OpenAIInputAudioFormatToMIMEType` are introduced before later tasks consume them. +- Route middleware and handler-level defensive group checks use the same error message and OpenAI-compatible error shape. +- Handler method names in routes match the handler task names. +- Service method names in handler tasks match service task names. diff --git a/docs/superpowers/specs/2026-05-29-gemini-openai-compatible-design.md b/docs/superpowers/specs/2026-05-29-gemini-openai-compatible-design.md new file mode 100644 index 00000000000..03789aa0a85 --- /dev/null +++ b/docs/superpowers/specs/2026-05-29-gemini-openai-compatible-design.md @@ -0,0 +1,265 @@ +# Gemini OpenAI-Compatible `/v1beta/openai` Design + +## Context + +Google Gemini exposes an OpenAI-compatible API under `/v1beta/openai`. In sub2api, Gemini currently has two separate surfaces: + +- Gemini native REST endpoints under `/v1beta/models...`. +- OpenAI Chat Completions compatibility through `/v1/chat/completions`, which routes Gemini groups through `GeminiMessagesCompatService`. + +There is no sub2api route for `/v1beta/openai/*` today. Clients that follow Google's documented OpenAI SDK shape and set `base_url` to a `/v1beta/openai` prefix will post to `/v1beta/openai/chat/completions` and receive 404. The existing Gemini Chat Completions conversion path also does not preserve OpenAI `input_audio` content parts; unknown parts can become text instead of Gemini `inlineData`. + +## Goals + +- Add a Gemini-only OpenAI-compatible entrypoint at `/v1beta/openai`. +- Preserve the existing sub2api routing, account selection, billing, model mapping, failover, moderation, and ops logging behavior. +- Support OpenAI SDK clients that use `base_url=/v1beta/openai`. +- Cover the documented Gemini OpenAI-compatible prefix with either a working endpoint or an explicit unsupported-endpoint error. +- Support audio input in Chat Completions by converting OpenAI `input_audio` parts to Gemini `inlineData`. +- Update frontend key-usage guidance so users can discover the new OpenAI-compatible Gemini base URL. + +## Non-Goals + +- Do not make `/v1beta/openai` a generic alias for all platforms. +- Do not bypass sub2api's Gemini scheduling by blindly proxying to Google's `/v1beta/openai` upstream. +- Do not add OpenAI Responses, Realtime, or arbitrary future OpenAI endpoints under this prefix. +- Do not implement Gemini video generation until sub2api has a video-operation backend and billing path. +- Do not replace the existing Gemini CLI/native `/v1beta` guidance. + +## Route Design + +Register a new route group: + +```text +/v1beta/openai +``` + +Supported endpoints: + +```text +GET /v1beta/openai/models +POST /v1beta/openai/chat/completions +POST /v1beta/openai/embeddings +POST /v1beta/openai/images/generations +``` + +The route group uses the same Google/Gemini API key auth style as native `/v1beta` routes: + +- `Authorization: Bearer ` +- `x-goog-api-key: ` +- `key=` +- `api_key=` + +The route group must require the API key's group platform to be `gemini`. If the group is missing or not Gemini, return an explicit JSON error rather than falling through to OpenAI or Anthropic routing. + +Unsupported subpaths under `/v1beta/openai/*` return a clear OpenAI-style JSON error, for example: + +```json +{ + "error": { + "type": "invalid_request_error", + "message": "Unsupported endpoint for Gemini OpenAI compatibility" + } +} +``` + +Known official Gemini OpenAI-compatible paths that sub2api does not yet have backend support for, such as video operations, should be registered only if needed to return explicit unsupported errors. They must not fall through to unrelated OpenAI or Gemini native handlers. + +## Chat Completions Flow + +`POST /v1beta/openai/chat/completions` should reuse the existing Gemini Chat Completions path: + +```text +Gateway.ChatCompletions + -> GeminiMessagesCompatService.ForwardAsChatCompletions + -> Gemini native generateContent / streamGenerateContent +``` + +This keeps existing behavior for: + +- user/account concurrency limits +- Gemini account selection and failover +- model mapping +- channel restrictions +- content moderation +- usage and billing records +- ops error logging +- streaming and non-streaming response conversion + +The new route should differ only in external path compatibility and platform guard semantics. + +## Audio Input Conversion + +OpenAI-compatible clients can send audio in Chat Completions content parts: + +```json +{ + "type": "input_audio", + "input_audio": { + "data": "", + "format": "wav" + } +} +``` + +The converter must preserve this as a real Gemini media part: + +```json +{ + "inlineData": { + "mimeType": "audio/wav", + "data": "" + } +} +``` + +Supported format mapping: + +```text +wav -> audio/wav +mp3 -> audio/mpeg +m4a -> audio/mp4 +aac -> audio/aac +flac -> audio/flac +ogg -> audio/ogg +``` + +If `data` is missing, empty, or only whitespace, the converter should drop the audio part when other usable parts exist, matching the existing empty image handling pattern. If the message contains only an empty audio part, return a validation error rather than sending an empty Gemini `parts` list. + +If `format` is missing or unsupported, return HTTP 400 with a specific error. Do not stringify unknown `input_audio` blocks into prompt text. + +The likely implementation seam is: + +- extend `apicompat.ChatContentPart` with an `InputAudio` field +- preserve audio parts through Chat Completions to Responses conversion +- preserve audio parts through Responses to Anthropic conversion +- teach `convertClaudeMessagesToGeminiContents` to convert audio blocks to `inlineData` + +If preserving audio through the generic Anthropic bridge would create broad risk, a narrower Gemini-specific conversion helper may be used, but it must remain covered by tests and must not duplicate the whole gateway flow. + +## Models Endpoint + +`GET /v1beta/openai/models` returns OpenAI-compatible model-list JSON: + +```json +{ + "object": "list", + "data": [ + { + "id": "gemini-2.5-flash", + "object": "model", + "created": 0, + "owned_by": "google" + } + ] +} +``` + +The data source should reuse the existing Gemini model list path or curated fallback list. This endpoint should not require a separate upstream call shape unless existing model-list behavior already does so. + +## Embeddings Endpoint + +`POST /v1beta/openai/embeddings` should provide OpenAI-compatible embeddings for Gemini groups. + +- accept OpenAI embeddings request shape +- convert to Gemini native `embedContent` or `batchEmbedContents` +- convert Gemini response to OpenAI embeddings response shape +- record input-token usage and zero output-token usage + +This endpoint must not route to the OpenAI platform embeddings handler. + +## Image Generation Endpoint + +`POST /v1beta/openai/images/generations` should provide OpenAI-compatible image generation for Gemini image models. + +Request handling: + +- accept OpenAI `images.generate` request shape with `model`, `prompt`, optional `response_format`, and optional `n` +- require a Gemini image-capable model such as `gemini-2.5-flash-image` or `gemini-3.1-flash-image` +- convert the prompt into Gemini native `generateContent` +- request image output from Gemini using the same native image-generation path already used by Gemini image models +- preserve existing Gemini account selection, image billing, moderation, failover, and ops logging behavior + +Response handling: + +- return OpenAI image response shape with `data[].b64_json` +- support `response_format=b64_json`; if `response_format=url` is requested, return HTTP 400 unless a real URL-backed storage path exists +- support `n` absent or `n=1`; if `n>1`, return HTTP 400 unless the implementation deliberately fans out multiple Gemini calls with billing for each image + +This endpoint must not route to the OpenAI platform images handler. + +## Frontend Adaptation + +Update the user key usage modal for Gemini groups. + +Keep the existing Gemini CLI guidance unchanged: + +```text +GOOGLE_GEMINI_BASE_URL=/v1beta +GEMINI_API_KEY= +``` + +Add an OpenAI-compatible Gemini tab with: + +```text +OPENAI_BASE_URL=/v1beta/openai +OPENAI_API_KEY= +``` + +Also include a minimal Python OpenAI SDK example: + +```python +from openai import OpenAI + +client = OpenAI( + api_key="", + base_url="/v1beta/openai", +) +``` + +OpenCode guidance should not replace the existing Google provider example. If an OpenAI-provider example is added, label it as OpenAI-compatible Gemini and use `/v1beta/openai`. + +Add or update Chinese and English i18n strings for: + +- Gemini OpenAI-compatible tab label +- tab description +- usage note that this prefix is Gemini-only + +## Error Handling + +- Non-Gemini group: return an explicit platform error. +- Unsupported `/v1beta/openai/*` endpoint: return OpenAI-style unsupported-endpoint JSON. +- Unsupported audio format: return HTTP 400 and name the unsupported format. +- Empty audio-only message: return HTTP 400. +- Unsupported image `response_format` or `n>1`: return HTTP 400 with a specific message. +- Video endpoints and other known-but-unimplemented Gemini OpenAI-compatible endpoints: return an explicit unsupported-endpoint error. +- Upstream Gemini errors: preserve existing Gemini Chat Completions error mapping and failover behavior. + +## Testing + +Backend tests: + +- route test: `/v1beta/openai/chat/completions` is registered and does not 404. +- platform guard test: non-Gemini groups are rejected for `/v1beta/openai/*`. +- chat routing test: Gemini OpenAI-compatible route reaches `GeminiMessagesCompatService.ForwardAsChatCompletions`. +- audio conversion test: `input_audio` with `format=wav` produces Gemini `inlineData.mimeType=audio/wav` and preserves base64 data. +- audio format validation test: unsupported `format` returns HTTP 400. +- empty audio validation test: audio-only empty payload returns HTTP 400. +- embeddings test: OpenAI embeddings request converts to Gemini embedding upstream and returns OpenAI embeddings shape. +- image generation test: OpenAI image generation request converts to Gemini image upstream and returns `b64_json`. +- image validation test: unsupported image `response_format=url` or `n>1` returns HTTP 400. +- unsupported endpoint test: `/v1beta/openai/videos` returns explicit unsupported error rather than 404 or wrong-platform routing. +- regression test: existing `/v1beta/models/{model}:generateContent` still works. +- regression test: existing Gemini `/v1/chat/completions` still works. + +Frontend tests: + +- Gemini key usage modal has an OpenAI-compatible tab. +- Generated Gemini OpenAI-compatible base URL is `/v1beta/openai`. +- Existing Gemini CLI base URL remains `/v1beta`. +- The OpenAI-compatible tab mentions Chat Completions, embeddings, and image generation, and notes that video is not available until backend support exists. +- i18n keys render for English and Chinese. + +## Rollout Notes + +This is backward compatible for existing clients because it only adds new routes and preserves existing Gemini CLI/native paths. The risky part is audio conversion through shared compatibility structures; tests should focus on ensuring existing text/image Chat Completions behavior is unchanged. diff --git a/frontend/src/components/keys/UseKeyModal.vue b/frontend/src/components/keys/UseKeyModal.vue index 18440c60111..6b9f673d690 100644 --- a/frontend/src/components/keys/UseKeyModal.vue +++ b/frontend/src/components/keys/UseKeyModal.vue @@ -280,6 +280,7 @@ const clientTabs = computed((): TabConfig[] => { case 'gemini': return [ { id: 'gemini', label: t('keys.useKeyModal.cliTabs.geminiCli'), icon: SparkleIcon }, + { id: 'openai-compatible', label: t('keys.useKeyModal.cliTabs.openaiCompatible'), icon: TerminalIcon }, { id: 'opencode', label: t('keys.useKeyModal.cliTabs.opencode'), icon: TerminalIcon } ] case 'antigravity': @@ -378,7 +379,11 @@ const comment = (value: string) => wrapToken('text-slate-500', value) const currentFiles = computed((): FileConfig[] => { const baseUrl = props.baseUrl || window.location.origin const apiKey = props.apiKey - const baseRoot = baseUrl.replace(/\/v1\/?$/, '').replace(/\/+$/, '') + const baseRoot = baseUrl + .replace(/\/v1beta\/openai\/?$/, '') + .replace(/\/v1beta\/?$/, '') + .replace(/\/v1\/?$/, '') + .replace(/\/+$/, '') const ensureV1 = (value: string) => { const trimmed = value.replace(/\/+$/, '') return trimmed.endsWith('/v1') ? trimmed : `${trimmed}/v1` @@ -393,6 +398,7 @@ const currentFiles = computed((): FileConfig[] => { const trimmed = baseRoot.replace(/\/+$/, '') return trimmed.endsWith('/v1beta') ? trimmed : `${trimmed}/v1beta` })() + const geminiOpenAIBase = `${baseRoot}/v1beta/openai` if (activeClientTab.value === 'opencode') { switch (props.platform) { @@ -422,10 +428,13 @@ const currentFiles = computed((): FileConfig[] => { } return generateOpenAIFiles(baseUrl, apiKey) case 'gemini': - return [generateGeminiCliContent(baseUrl, apiKey)] + if (activeClientTab.value === 'openai-compatible') { + return generateGeminiOpenAICompatibleFiles(geminiOpenAIBase, apiKey) + } + return [generateGeminiCliContent(geminiBase, apiKey)] case 'antigravity': if (activeClientTab.value === 'gemini') { - return [generateGeminiCliContent(`${baseUrl}/antigravity`, apiKey)] + return [generateGeminiCliContent(antigravityGeminiBase, apiKey)] } return generateAnthropicFiles(`${baseUrl}/antigravity`, apiKey) default: @@ -525,6 +534,53 @@ ${keyword('$env:')}${variable('GEMINI_MODEL')}${operator('=')}${string(`"${model return { path, content, highlighted } } +function generateGeminiOpenAICompatibleFiles(baseUrl: string, apiKey: string): FileConfig[] { + const model = 'gemini-3.5-flash' + let path: string + let content: string + + switch (activeTab.value) { + case 'cmd': + path = 'Command Prompt' + content = `set OPENAI_BASE_URL=${baseUrl} +set OPENAI_API_KEY=${apiKey} +set OPENAI_MODEL=${model}` + break + case 'powershell': + path = 'PowerShell' + content = `$env:OPENAI_BASE_URL="${baseUrl}" +$env:OPENAI_API_KEY="${apiKey}" +$env:OPENAI_MODEL="${model}"` + break + case 'unix': + default: + path = 'Terminal' + content = `export OPENAI_BASE_URL="${baseUrl}" +export OPENAI_API_KEY="${apiKey}" +export OPENAI_MODEL="${model}"` + break + } + + const pythonContent = `from openai import OpenAI + +client = OpenAI( + api_key="${apiKey}", + base_url="${baseUrl}", +) + +response = client.chat.completions.create( + model="${model}", + messages=[{"role": "user", "content": "Hello"}], +) + +print(response.choices[0].message.content)` + + return [ + { path, content }, + { path: 'openai_gemini.py', content: pythonContent, hint: t('keys.useKeyModal.gemini.openaiPythonHint') } + ] +} + function generateOpenAIFiles(baseUrl: string, apiKey: string): FileConfig[] { const isWindows = activeTab.value === 'windows' const configDir = isWindows ? '%userprofile%\\.codex' : '~/.codex' diff --git a/frontend/src/components/keys/__tests__/UseKeyModal.spec.ts b/frontend/src/components/keys/__tests__/UseKeyModal.spec.ts index b3fdeb937ab..9a04fd59e6e 100644 --- a/frontend/src/components/keys/__tests__/UseKeyModal.spec.ts +++ b/frontend/src/components/keys/__tests__/UseKeyModal.spec.ts @@ -122,4 +122,62 @@ describe('UseKeyModal', () => { expect(codeBlock.text()).toContain('"name": "GPT-5.4 Mini"') expect(codeBlock.text()).not.toContain('"name": "GPT-5.4 Nano"') }) + + it('renders native Gemini CLI config with v1beta base URL', () => { + const wrapper = mount(UseKeyModal, { + props: { + show: true, + apiKey: 'sk-gemini', + baseUrl: 'https://example.com/v1', + platform: 'gemini' + }, + global: { + stubs: { + BaseDialog: { + template: '
' + }, + Icon: { + template: '' + } + } + } + }) + + const code = wrapper.find('pre code').text() + expect(code).toContain('GOOGLE_GEMINI_BASE_URL="https://example.com/v1beta"') + expect(code).toContain('GEMINI_API_KEY="sk-gemini"') + }) + + it('renders Gemini OpenAI-compatible config with v1beta openai base URL', async () => { + const wrapper = mount(UseKeyModal, { + props: { + show: true, + apiKey: 'sk-gemini', + baseUrl: 'https://example.com/v1', + platform: 'gemini' + }, + global: { + stubs: { + BaseDialog: { + template: '
' + }, + Icon: { + template: '' + } + } + } + }) + + const tab = wrapper.findAll('button').find((button) => + button.text().includes('keys.useKeyModal.cliTabs.openaiCompatible') + ) + + expect(tab).toBeDefined() + await tab!.trigger('click') + await nextTick() + + const codeBlocks = wrapper.findAll('pre code').map((code) => code.text()) + expect(codeBlocks.join('\n')).toContain('OPENAI_BASE_URL="https://example.com/v1beta/openai"') + expect(codeBlocks.join('\n')).toContain('base_url="https://example.com/v1beta/openai"') + }) }) diff --git a/frontend/src/i18n/locales/en.ts b/frontend/src/i18n/locales/en.ts index 6735029c7c3..7468cfbdb48 100644 --- a/frontend/src/i18n/locales/en.ts +++ b/frontend/src/i18n/locales/en.ts @@ -750,6 +750,7 @@ export default { cliTabs: { claudeCode: 'Claude Code', geminiCli: 'Gemini CLI', + openaiCompatible: 'OpenAI-compatible', codexCli: 'Codex CLI', codexCliWs: 'Codex CLI (WebSocket)', opencode: 'OpenCode', @@ -764,6 +765,7 @@ export default { gemini: { description: 'Add the following environment variables to your terminal profile or run directly in terminal to configure Gemini CLI access.', modelComment: 'If you have Gemini 3 access, you can use: gemini-3-pro-preview', + openaiPythonHint: 'Python example using the OpenAI SDK against the Gemini OpenAI-compatible endpoint.', note: 'These environment variables will be active in the current terminal session. For permanent configuration, add them to ~/.bashrc, ~/.zshrc, or the appropriate configuration file.', }, opencode: { diff --git a/frontend/src/i18n/locales/zh.ts b/frontend/src/i18n/locales/zh.ts index abb8dff730e..cec201ef3b7 100644 --- a/frontend/src/i18n/locales/zh.ts +++ b/frontend/src/i18n/locales/zh.ts @@ -750,6 +750,7 @@ export default { cliTabs: { claudeCode: 'Claude Code', geminiCli: 'Gemini CLI', + openaiCompatible: 'OpenAI 兼容', codexCli: 'Codex CLI', codexCliWs: 'Codex CLI (WebSocket)', opencode: 'OpenCode' @@ -767,6 +768,7 @@ export default { description: '将以下环境变量添加到您的终端配置文件或直接在终端中运行,以配置 Gemini CLI 访问。', modelComment: '如果你有 Gemini 3 权限可以填:gemini-3-pro-preview', + openaiPythonHint: '使用 OpenAI SDK 调用 Gemini OpenAI 兼容端点的 Python 示例。', note: '这些环境变量将在当前终端会话中生效。如需永久配置,请将其添加到 ~/.bashrc、~/.zshrc 或相应的配置文件中。' }, opencode: {