diff --git a/core/llm/llms/Auxen.ts b/core/llm/llms/Auxen.ts new file mode 100644 index 00000000000..867a21de3c6 --- /dev/null +++ b/core/llm/llms/Auxen.ts @@ -0,0 +1,24 @@ +import OpenAI from "./OpenAI.js"; + +import type { LLMOptions } from "../../index.js"; + +/** + * Auxen — dedicated, OpenAI-compatible LLM endpoints (https://auxen.ai). + * + * Each Auxen instance is a per-customer dedicated GPU running one open-source + * model behind a stable HTTPS endpoint of the form + * https://api.auxen.ai/v1//v1 + * authenticated with a per-instance `auxk_*` bearer token. + * + * Because the apiBase is per-instance, no fixed defaultOptions.apiBase is + * provided — users must set `apiBase` in their config to the URL issued by + * the Auxen dashboard. + */ +class Auxen extends OpenAI { + static providerName = "auxen"; + static defaultOptions: Partial = { + // apiBase is per-instance — user must provide it via config. + }; +} + +export default Auxen; diff --git a/core/llm/llms/index.ts b/core/llm/llms/index.ts index 453b2d90cd8..2bf805995ed 100644 --- a/core/llm/llms/index.ts +++ b/core/llm/llms/index.ts @@ -11,6 +11,7 @@ import { renderTemplatedString } from "../../util/handlebars/renderTemplatedStri import { BaseLLM } from "../index"; import Anthropic from "./Anthropic"; import Asksage from "./Asksage"; +import Auxen from "./Auxen"; import Azure from "./Azure"; import Bedrock from "./Bedrock"; import BedrockImport from "./BedrockImport"; @@ -120,6 +121,7 @@ export const LLMClasses = [ TestLLM, Cerebras, Asksage, + Auxen, Nebius, Nous, Venice, diff --git a/docs/customize/model-providers/more/auxen.mdx b/docs/customize/model-providers/more/auxen.mdx new file mode 100644 index 00000000000..b17692f0be7 --- /dev/null +++ b/docs/customize/model-providers/more/auxen.mdx @@ -0,0 +1,62 @@ +--- +title: Auxen +slug: ../auxen +--- + +[Auxen](https://auxen.ai) hosts per-customer **dedicated** LLM endpoints (Llama 3.1/3.2, Qwen 2.5, Mistral, Gemma 2, Mixtral, Phi-3, Command R) on stable HTTPS URLs with an OpenAI-compatible `/v1/chat/completions` API. Each Auxen instance is a dedicated GPU billed per-minute of runtime, not per token. + + + Provision an instance from the [Auxen dashboard](https://auxen.ai). You will be issued two values: + + - A per-instance **base URL** of the form `https://api.auxen.ai/v1/inst_xxx/v1` + - A per-instance **API key** prefixed `auxk_` + + +## Configuration + + + + ```yaml title="config.yaml" + name: My Auxen Config + version: 0.0.1 + schema: v1 + + models: + - name: Auxen Llama 3.1 8B + provider: auxen + model: llama-3.1-8b + apiBase: https://api.auxen.ai/v1/inst_xxx/v1 + apiKey: auxk_... + ``` + + + ```json title="config.json" + { + "models": [ + { + "title": "Auxen Llama 3.1 8B", + "provider": "auxen", + "model": "llama-3.1-8b", + "apiBase": "https://api.auxen.ai/v1/inst_xxx/v1", + "apiKey": "auxk_..." + } + ] + } + ``` + + + +The `model` field should match the model your Auxen instance is serving. Each Auxen instance is provisioned with one model at creation time. + +## Catalog + +Auxen-hosted models: + +- `llama-3.1-8b`, `llama-3.1-70b`, `llama-3.2-3b` +- `qwen2.5-7b`, `qwen2.5-14b`, `qwen2.5-32b` +- `mistral-7b`, `mistral-nemo-12b`, `mixtral-8x7b` +- `gemma2-9b`, `phi-3-mini`, `command-r-7b` + +## Pricing + +Auxen bills per-minute of dedicated GPU runtime, not per token. See [auxen.ai/pricing](https://auxen.ai/pricing) for hourly rates by model size. diff --git a/extensions/vscode/config_schema.json b/extensions/vscode/config_schema.json index 6eb11282100..d7577a766a5 100644 --- a/extensions/vscode/config_schema.json +++ b/extensions/vscode/config_schema.json @@ -236,7 +236,8 @@ "ovhcloud", "venice", "inception", - "tars" + "tars", + "auxen" ], "markdownEnumDescriptions": [ "### OpenAI\nUse gpt-4, gpt-3.5-turbo, or any other OpenAI model. See [here](https://openai.com/product#made-for-developers) to obtain an API key.\n\n> [Reference](https://docs.continue.dev/reference/Model%20Providers/openai)", @@ -289,7 +290,8 @@ "### OVHcloud AI Endpoints is a serverless inference API that provides access to a curated selection of models (e.g., Llama, Mistral, Qwen, Deepseek). It is designed with security and data privacy in mind and is compliant with GDPR. To get started, create an API key on the OVHcloud [AI Endpoints website](https://endpoints.ai.cloud.ovh.net/). For more information, including pricing, visit the OVHcloud [AI Endpoints product page](https://www.ovhcloud.com/en/public-cloud/ai-endpoints/).", "### Venice\n Venice.AI is a privacy-focused generative AI platform, allowing users to interact with open-source LLMs without storing any private user data.\nHosted models support the OpenAI API standard, providing seamless integration for users seeking privacy and flexibility.\nTo get started with the Venice API, either purchase a pro account, stake $VVV for daily inference allotments, or fund your account with USD.\nVisit the [API settings page](https://venice.ai/settings/api) or learn more at the [Venice API documentation](https://venice.ai/api).", "### Inception\n Inception Labs offer a new generation of diffusion-based LLMs.\nVisit the [API settings page](https://platform.inceptionlabs.ai/) or learn more at the [Inception docs](https://platform.inceptionlabs.ai/docs).", - "### TARS\nTARS is an OpenAI-compatible proxy router. To get started, obtain an API key and configure the provider in your config.json." + "### TARS\nTARS is an OpenAI-compatible proxy router. To get started, obtain an API key and configure the provider in your config.json.", + "### Auxen\n[Auxen](https://auxen.ai) hosts per-customer dedicated LLM endpoints (Llama, Qwen, Mistral, Gemma, Mixtral, Phi, Command R) on a per-instance HTTPS URL with an OpenAI-compatible API, billed per-minute of dedicated GPU runtime. Provision an instance from the [Auxen dashboard](https://auxen.ai) to obtain your per-instance `apiBase` (e.g. `https://api.auxen.ai/v1/inst_xxx/v1`) and `auxk_*` API key." ], "type": "string" }, @@ -536,7 +538,8 @@ "kindo", "scaleway", "ovhcloud", - "venice" + "venice", + "auxen" ] } }, diff --git a/packages/openai-adapters/src/index.ts b/packages/openai-adapters/src/index.ts index c9eb4da00fa..c6afa876dc8 100644 --- a/packages/openai-adapters/src/index.ts +++ b/packages/openai-adapters/src/index.ts @@ -166,6 +166,12 @@ export function constructLlmApi(config: LLMConfig): BaseLlmApi | undefined { return openAICompatible("https://api.scaleway.ai/v1/", config); case "fireworks": return openAICompatible("https://api.fireworks.ai/inference/v1/", config); + case "auxen": + // Auxen instance URLs are per-customer. The config.apiBase the user + // sets on their config (issued by the Auxen dashboard) is the source + // of truth; fall back to the marketing root only so the type-system + // is satisfied. + return openAICompatible("https://api.auxen.ai/v1/", config); case "together": return openAICompatible("https://api.together.xyz/v1/", config); case "ncompass": diff --git a/packages/openai-adapters/src/types.ts b/packages/openai-adapters/src/types.ts index 3b324b0ac6b..9363fea9c1f 100644 --- a/packages/openai-adapters/src/types.ts +++ b/packages/openai-adapters/src/types.ts @@ -33,6 +33,7 @@ export const OpenAIConfigSchema = BasePlusConfig.extend({ useResponsesApi: z.boolean().optional(), provider: z.union([ z.literal("openai"), + z.literal("auxen"), z.literal("mistral"), z.literal("voyage"), z.literal("deepinfra"),