Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/costs.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import type { LlmTokenUsage } from "./llm/generate-text.js";

export type LlmProvider = "xai" | "openai" | "google" | "anthropic" | "zai" | "nvidia" | "cli";
export type LlmProvider = "xai" | "openai" | "google" | "anthropic" | "zai" | "nvidia" | "vertex" | "cli";

export type LlmCall = {
provider: LlmProvider;
Expand Down
2 changes: 2 additions & 0 deletions src/daemon/flow-context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ export function createDaemonUrlFlowContext(args: DaemonUrlFlowContextArgs): UrlF
firecrawlConfigured,
googleConfigured,
anthropicConfigured,
vertexConfig,
cliAvailability,
envForAuto,
apifyToken,
Expand Down Expand Up @@ -268,6 +269,7 @@ export function createDaemonUrlFlowContext(args: DaemonUrlFlowContextArgs): UrlF
zai: { apiKey: zaiApiKey, baseUrl: zaiBaseUrl },
nvidia: { apiKey: nvidiaApiKey, baseUrl: nvidiaBaseUrl },
providerBaseUrls,
vertexConfig: vertexConfig ?? null,
});

const outputLanguage = resolveOutputLanguageSetting({
Expand Down
99 changes: 96 additions & 3 deletions src/llm/generate-text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@ import {
normalizeAnthropicModelAccessError,
} from "./providers/anthropic.js";
import { completeGoogleDocument, completeGoogleText } from "./providers/google.js";
import { completeVertexText, completeVertexDocument, type VertexConfig } from "./providers/vertex.js";
import {
resolveAnthropicModel,
resolveGoogleModel,
resolveVertexModel,
resolveOpenAiModel,
resolveXaiModel,
resolveNvidiaModel,
Expand Down Expand Up @@ -164,6 +166,7 @@ export async function generateTextWithModelId({
anthropicBaseUrlOverride,
googleBaseUrlOverride,
xaiBaseUrlOverride,
vertexConfig,
forceChatCompletions,
retries = 0,
onRetry,
Expand All @@ -180,13 +183,14 @@ export async function generateTextWithModelId({
anthropicBaseUrlOverride?: string | null;
googleBaseUrlOverride?: string | null;
xaiBaseUrlOverride?: string | null;
vertexConfig?: VertexConfig | null;
forceChatCompletions?: boolean;
retries?: number;
onRetry?: (notice: RetryNotice) => void;
}): Promise<{
text: string;
canonicalModelId: string;
provider: "xai" | "openai" | "google" | "anthropic" | "zai" | "nvidia";
provider: "xai" | "openai" | "google" | "anthropic" | "zai" | "nvidia" | "vertex";
usage: LlmTokenUsage | null;
}> {
const parsed = parseGatewayStyleModelId(modelId);
Expand Down Expand Up @@ -281,6 +285,30 @@ export async function generateTextWithModelId({
};
}

if (parsed.provider === "vertex") {
if (!vertexConfig)
throw new Error(
"Missing Vertex AI configuration. Set GOOGLE_CLOUD_PROJECT, GOOGLE_CLOUD_LOCATION, " +
"and VERTEX_AI_SERVICE_ACCOUNT_KEY (or GOOGLE_APPLICATION_CREDENTIALS).",
);
const result = await completeVertexDocument({
modelId: parsed.model,
vertexConfig,
promptText: prompt.userText,
document: documentAttachment,
maxOutputTokens,
temperature: effectiveTemperature,
timeoutMs,
fetchImpl,
});
return {
text: result.text,
canonicalModelId: parsed.canonical,
provider: parsed.provider,
usage: result.usage,
};
}

throw createUnsupportedFunctionalityError(
`document attachments are not supported for ${parsed.provider}/... models`,
);
Expand Down Expand Up @@ -375,6 +403,28 @@ export async function generateTextWithModelId({
};
}

if (parsed.provider === "vertex") {
if (!vertexConfig)
throw new Error(
"Missing Vertex AI configuration. Set GOOGLE_CLOUD_PROJECT, GOOGLE_CLOUD_LOCATION, " +
"and VERTEX_AI_SERVICE_ACCOUNT_KEY (or GOOGLE_APPLICATION_CREDENTIALS).",
);
const result = await completeVertexText({
modelId: parsed.model,
vertexConfig,
context,
temperature: effectiveTemperature,
maxOutputTokens,
signal: controller.signal,
});
return {
text: result.text,
canonicalModelId: parsed.canonical,
provider: parsed.provider,
usage: result.usage,
};
}

if (parsed.provider === "anthropic") {
const apiKey = apiKeys.anthropicApiKey;
if (!apiKey) throw new Error("Missing ANTHROPIC_API_KEY for anthropic/... model");
Expand Down Expand Up @@ -479,6 +529,7 @@ export async function streamTextWithModelId({
anthropicBaseUrlOverride,
googleBaseUrlOverride,
xaiBaseUrlOverride,
vertexConfig,
forceChatCompletions,
}: {
modelId: string;
Expand All @@ -493,11 +544,12 @@ export async function streamTextWithModelId({
anthropicBaseUrlOverride?: string | null;
googleBaseUrlOverride?: string | null;
xaiBaseUrlOverride?: string | null;
vertexConfig?: VertexConfig | null;
forceChatCompletions?: boolean;
}): Promise<{
textStream: AsyncIterable<string>;
canonicalModelId: string;
provider: "xai" | "openai" | "google" | "anthropic" | "zai" | "nvidia";
provider: "xai" | "openai" | "google" | "anthropic" | "zai" | "nvidia" | "vertex";
usage: Promise<LlmTokenUsage | null>;
lastError: () => unknown;
}> {
Expand All @@ -515,6 +567,7 @@ export async function streamTextWithModelId({
anthropicBaseUrlOverride,
googleBaseUrlOverride,
xaiBaseUrlOverride,
vertexConfig,
forceChatCompletions,
});
}
Expand All @@ -532,6 +585,7 @@ export async function streamTextWithContext({
anthropicBaseUrlOverride,
googleBaseUrlOverride,
xaiBaseUrlOverride,
vertexConfig,
forceChatCompletions,
}: {
modelId: string;
Expand All @@ -546,11 +600,12 @@ export async function streamTextWithContext({
anthropicBaseUrlOverride?: string | null;
googleBaseUrlOverride?: string | null;
xaiBaseUrlOverride?: string | null;
vertexConfig?: VertexConfig | null;
forceChatCompletions?: boolean;
}): Promise<{
textStream: AsyncIterable<string>;
canonicalModelId: string;
provider: "xai" | "openai" | "google" | "anthropic" | "zai" | "nvidia";
provider: "xai" | "openai" | "google" | "anthropic" | "zai" | "nvidia" | "vertex";
usage: Promise<LlmTokenUsage | null>;
lastError: () => unknown;
}> {
Expand Down Expand Up @@ -701,6 +756,44 @@ export async function streamTextWithContext({
};
}

if (parsed.provider === "vertex") {
if (!vertexConfig)
throw new Error(
"Missing Vertex AI configuration. Set GOOGLE_CLOUD_PROJECT, GOOGLE_CLOUD_LOCATION, " +
"and VERTEX_AI_SERVICE_ACCOUNT_KEY (or GOOGLE_APPLICATION_CREDENTIALS).",
);
const model = resolveVertexModel({
modelId: parsed.model,
context,
});
const stream = streamSimple(model, context, {
...(typeof effectiveTemperature === "number" ? { temperature: effectiveTemperature } : {}),
...(typeof maxOutputTokens === "number" ? { maxTokens: maxOutputTokens } : {}),
project: vertexConfig.project,
location: vertexConfig.location,
signal: controller.signal,
} as Record<string, unknown>);

const textStream: AsyncIterable<string> = {
async *[Symbol.asyncIterator]() {
for await (const event of stream) {
if (event.type === "text_delta") yield event.delta;
if (event.type === "error") {
lastError = event.error;
break;
}
}
},
};
return {
textStream: wrapTextStream(textStream),
canonicalModelId: parsed.canonical,
provider: parsed.provider,
usage: streamUsageWithTimeout({ result: stream.result(), timeoutMs }),
lastError: () => lastError,
};
}

if (parsed.provider === "anthropic") {
const apiKey = apiKeys.anthropicApiKey;
if (!apiKey) throw new Error("Missing ANTHROPIC_API_KEY for anthropic/... model");
Expand Down
2 changes: 1 addition & 1 deletion src/llm/html-to-markdown.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ export function createHtmlToMarkdownConverter({
}) => void;
onUsage?: (usage: {
model: string;
provider: "xai" | "openai" | "google" | "anthropic" | "zai" | "nvidia";
provider: "xai" | "openai" | "google" | "anthropic" | "zai" | "nvidia" | "vertex";
usage: LlmTokenUsage | null;
}) => void;
}): ConvertHtmlToMarkdown {
Expand Down
6 changes: 3 additions & 3 deletions src/llm/model-id.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export type LlmProvider = "xai" | "openai" | "google" | "anthropic" | "zai" | "nvidia";
export type LlmProvider = "xai" | "openai" | "google" | "anthropic" | "zai" | "nvidia" | "vertex";

export type ParsedModelId = {
provider: LlmProvider;
Expand All @@ -12,7 +12,7 @@ export type ParsedModelId = {
canonical: string;
};

const PROVIDERS: LlmProvider[] = ["xai", "openai", "google", "anthropic", "zai", "nvidia"];
const PROVIDERS: LlmProvider[] = ["xai", "openai", "google", "anthropic", "zai", "nvidia", "vertex"];

/**
* Anthropic short model aliases that are NOT valid API model identifiers.
Expand Down Expand Up @@ -63,7 +63,7 @@ export function normalizeGatewayStyleModelId(raw: string): string {
const model = normalized.slice(slash + 1);
if (!PROVIDERS.includes(provider as LlmProvider)) {
throw new Error(
`Unsupported model provider "${provider}". Use xai/..., openai/..., google/..., anthropic/..., zai/..., or nvidia/...`,
`Unsupported model provider "${provider}". Use xai/..., openai/..., google/..., anthropic/..., zai/..., nvidia/..., or vertex/...`,
);
}
if (model.trim().length === 0) {
Expand Down
24 changes: 24 additions & 0 deletions src/llm/providers/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,30 @@ export function resolveGoogleModel({
);
}

export function resolveVertexModel({
modelId,
context,
}: {
modelId: string;
context: Context;
}): Model<Api> {
const allowImages = wantsImages(context);
// Vertex AI uses the same model ids as google (e.g. gemini-3-flash-preview)
// but dispatches via the google-vertex API which uses ADC auth.
const base = tryGetModel("google", modelId);
return {
...(base ??
createSyntheticModel({
provider: "google",
modelId,
api: "google-vertex",
baseUrl: "https://us-central1-aiplatform.googleapis.com",
allowImages,
})),
api: "google-vertex" as Api,
};
}

export function resolveAnthropicModel({
modelId,
context,
Expand Down
Loading