diff --git a/.claude/commands/test.md b/.claude/commands/test.md index 813fdb90..1a2572c6 100644 --- a/.claude/commands/test.md +++ b/.claude/commands/test.md @@ -23,7 +23,7 @@ Run a test from the testing framework to validate documentation quality. | Category | Tests | Description | |----------|-------|-------------| -| `smoke` | 12 | Fast tests, no GPU deploys | +| `smoke` | 13 | Fast tests, no GPU deploys | | `flash` | 13 | Flash SDK tests | | `serverless` | 20 | Serverless endpoint tests | | `vllm` | 6 | vLLM deployment tests | @@ -32,7 +32,7 @@ Run a test from the testing framework to validate documentation quality. | `templates` | 6 | Template tests | | `clusters` | 4 | Instant Cluster tests | | `sdk` | 8 | SDK and API tests | -| `cli` | 6 | runpodctl tests | +| `cli` | 16 | runpodctl tests | | `integrations` | 4 | Third-party integrations | | `public` | 3 | Public endpoint tests | | `tutorials` | 9 | End-to-end tutorials | diff --git a/docs.json b/docs.json index 52d0260c..846d6426 100644 --- a/docs.json +++ b/docs.json @@ -86,6 +86,7 @@ "pages": [ "serverless/overview", "serverless/quickstart", + "serverless/build-worker", "serverless/sdks", "serverless/pricing", { @@ -150,7 +151,6 @@ "group": "vLLM", "pages": [ "serverless/vllm/overview", - "serverless/vllm/get-started", "serverless/vllm/configuration", "serverless/vllm/vllm-requests", "serverless/vllm/openai-compatibility", @@ -411,19 +411,21 @@ "group": "Runpod CLI", "pages": [ "runpodctl/overview", + "runpodctl/reference/runpodctl-doctor", + "runpodctl/reference/runpodctl-pod", + "runpodctl/reference/runpodctl-serverless", + "runpodctl/reference/runpodctl-template", + "runpodctl/reference/runpodctl-hub", + "runpodctl/reference/runpodctl-network-volume", + "runpodctl/reference/runpodctl-registry", + "runpodctl/reference/runpodctl-gpu", + "runpodctl/reference/runpodctl-datacenter", + "runpodctl/reference/runpodctl-billing", + "runpodctl/reference/runpodctl-user", + "runpodctl/reference/runpodctl-ssh", "runpodctl/reference/runpodctl-config", - "runpodctl/reference/runpodctl-create-pod", - "runpodctl/reference/runpodctl-create-pods", - "runpodctl/reference/runpodctl-get-cloud", - "runpodctl/reference/runpodctl-get-pod", - "runpodctl/reference/runpodctl-receive", - "runpodctl/reference/runpodctl-remove-pod", - "runpodctl/reference/runpodctl-remove-pods", "runpodctl/reference/runpodctl-send", - "runpodctl/reference/runpodctl-ssh-add-key", - "runpodctl/reference/runpodctl-ssh-list-keys", - "runpodctl/reference/runpodctl-start-pod", - "runpodctl/reference/runpodctl-stop-pod", + "runpodctl/reference/runpodctl-receive", "runpodctl/reference/runpodctl-update", "runpodctl/reference/runpodctl-version" ] @@ -612,13 +614,17 @@ } }, "redirects": [ + { + "source": "/serverless/vllm/get-started", + "destination": "/serverless/quickstart" + }, { "source": "/tutorials/pods/fine-tune-llm-axolotl", "destination": "/fine-tune" }, { "source": "/serverless/workers/custom-worker", - "destination": "/serverless/quickstart" + "destination": "/serverless/build-worker" }, { "source": "/runpodctl/install-runpodctl", @@ -876,6 +882,46 @@ "source": "/runpodctl/reference/runpodctl", "destination": "/runpodctl/overview" }, + { + "source": "/runpodctl/reference/runpodctl-create-pod", + "destination": "/runpodctl/reference/runpodctl-pod" + }, + { + "source": "/runpodctl/reference/runpodctl-create-pods", + "destination": "/runpodctl/reference/runpodctl-pod" + }, + { + "source": "/runpodctl/reference/runpodctl-get-pod", + "destination": "/runpodctl/reference/runpodctl-pod" + }, + { + "source": "/runpodctl/reference/runpodctl-get-cloud", + "destination": "/runpodctl/reference/runpodctl-datacenter" + }, + { + "source": "/runpodctl/reference/runpodctl-remove-pod", + "destination": "/runpodctl/reference/runpodctl-pod" + }, + { + "source": "/runpodctl/reference/runpodctl-remove-pods", + "destination": "/runpodctl/reference/runpodctl-pod" + }, + { + "source": "/runpodctl/reference/runpodctl-start-pod", + "destination": "/runpodctl/reference/runpodctl-pod" + }, + { + "source": "/runpodctl/reference/runpodctl-stop-pod", + "destination": "/runpodctl/reference/runpodctl-pod" + }, + { + "source": "/runpodctl/reference/runpodctl-ssh-add-key", + "destination": "/runpodctl/reference/runpodctl-ssh" + }, + { + "source": "/runpodctl/reference/runpodctl-ssh-list-keys", + "destination": "/runpodctl/reference/runpodctl-ssh" + }, { "source": "/hub/public-endpoints", "destination": "/public-endpoints/overview" diff --git a/get-started.mdx b/get-started.mdx index b4549a2f..aa7e3789 100644 --- a/get-started.mdx +++ b/get-started.mdx @@ -38,25 +38,23 @@ If you haven't set up payments yet, you'll be prompted to add a payment method a -First, [create an API key](/get-started/api-keys) if you haven't already. Export it as an environment variable: +First, install the [Runpod CLI](/runpodctl/overview) on your local machine and configure it with your API key: ```bash -export RUNPOD_API_KEY="your-api-key" +# Install runpodctl +bash <(wget -qO- cli.runpod.io) + +# Configure your API key +runpodctl config --apiKey "your-api-key" ``` Then deploy your Pod: ```bash -curl --request POST \ - --url https://rest.runpod.io/v1/pods \ - --header "Authorization: Bearer $RUNPOD_API_KEY" \ - --header "Content-Type: application/json" \ - --data '{ - "name": "quickstart-pod", - "imageName": "runpod/pytorch:2.1.0-py3.10-cuda11.8.0-devel-ubuntu22.04", - "gpuTypeIds": ["NVIDIA A40"], - "gpuCount": 1 - }' +runpodctl pod create \ + --name "quickstart-pod" \ + --gpu-id "NVIDIA A40" \ + --image "runpod/pytorch:2.1.0-py3.10-cuda11.8.0-devel-ubuntu22.04" ``` The response includes your Pod ID: @@ -96,31 +94,24 @@ Once your Pod finishes initializing, connect and run some code: -You'll need an [SSH key added to your account](/pods/configuration/use-ssh) for this to work. +You'll need an [SSH key added to your account](/pods/configuration/use-ssh) for this to work. You can run `runpodctl doctor` to set this up automatically. -Get your Pod's SSH connection details: +Get the SSH command for your Pod: ```bash -curl --request GET \ - --url "https://rest.runpod.io/v1/pods/$RUNPOD_POD_ID" \ - --header "Authorization: Bearer $RUNPOD_API_KEY" +runpodctl ssh info $RUNPOD_POD_ID ``` -The response includes `publicIp` and `portMappings`: +The output includes the SSH command to connect: ```json { - "id": "uv9wy55tyv30lo", - "publicIp": "194.68.245.207", - "portMappings": { - "22": 22100 - }, - ... + "sshCommand": "ssh root@194.68.245.207 -p 22100 -i ~/.ssh/id_ed25519" } ``` -Use these values to connect via SSH: +Use this command to connect and run code: ```bash ssh root@194.68.245.207 -p 22100 @@ -165,17 +156,13 @@ To terminate your Pod: Stop your Pod: ```bash -curl --request POST \ - --url "https://rest.runpod.io/v1/pods/$RUNPOD_POD_ID/stop" \ - --header "Authorization: Bearer $RUNPOD_API_KEY" +runpodctl pod stop $RUNPOD_POD_ID ``` You'll still be charged a small amount for storage on stopped Pods (\$0.20 per GB per month). If you don't need to retain any data on your Pod, terminate it completely: ```bash -curl --request DELETE \ - --url "https://rest.runpod.io/v1/pods/$RUNPOD_POD_ID" \ - --header "Authorization: Bearer $RUNPOD_API_KEY" +runpodctl pod delete $RUNPOD_POD_ID ``` diff --git a/hub/overview.mdx b/hub/overview.mdx index 717a94cd..ab0ff03e 100644 --- a/hub/overview.mdx +++ b/hub/overview.mdx @@ -68,7 +68,7 @@ The Hub also offers s for popular AI models. These are Publish your GitHub repository on the Hub by preparing a with a and `Dockerfile`. -New to building Serverless workers? Follow the [quickstart guide](/serverless/quickstart). +New to building Serverless workers? Follow the [build a custom worker](/serverless/build-worker) guide. 1. Add configuration files in a `.runpod` directory per the [publishing guide](/hub/publishing-guide). diff --git a/integrations/n8n-integration.mdx b/integrations/n8n-integration.mdx index 4a029c7e..4560d508 100644 --- a/integrations/n8n-integration.mdx +++ b/integrations/n8n-integration.mdx @@ -46,7 +46,7 @@ First, you'll deploy a vLLM worker to serve the `Qwen/qwen3-32b-awq` model. - For more details on vLLM deployment options, see [Deploy a vLLM worker](/serverless/vllm/get-started). + For more details on vLLM deployment options, see [Configure vLLM](/serverless/vllm/configuration). In the deployment modal: diff --git a/integrations/overview.mdx b/integrations/overview.mdx index 0191a451..abbcbfaf 100644 --- a/integrations/overview.mdx +++ b/integrations/overview.mdx @@ -21,7 +21,7 @@ To integrate with Serverless: 5. Start [sending requests](/serverless/endpoints/send-requests) to the endpoint. -For a full walkthrough of how to create and test custom endpoints, try the [Serverless quickstart](/serverless/quickstart). +For a full walkthrough of how to create and test custom endpoints, try the [build a custom worker](/serverless/build-worker) guide. ## Integrate with Pods diff --git a/overview.mdx b/overview.mdx index 2c11bc39..8a7c3ee4 100644 --- a/overview.mdx +++ b/overview.mdx @@ -98,7 +98,7 @@ For a list of available models, see the [model reference](/public-endpoints/refe Create a REST API with automatic load balancing using Flash. - + Deploy a large language model in minutes using vLLM on Serverless. diff --git a/pods/connect-to-a-pod.mdx b/pods/connect-to-a-pod.mdx index a537d7bd..203a1f46 100644 --- a/pods/connect-to-a-pod.mdx +++ b/pods/connect-to-a-pod.mdx @@ -50,14 +50,14 @@ Interactive web environment for code, files, and data analysis. Available on tem Create a Pod with JupyterLab access using the CLI: ```bash -runpodctl create pod \ +runpodctl pod create \ --name my-jupyter-pod \ - --gpuType "NVIDIA GeForce RTX 4090" \ - --imageName "runpod/pytorch:2.1.0-py3.10-cuda11.8.0-devel-ubuntu22.04" \ - --containerDiskSize 20 \ - --volumeSize 50 \ + --gpu-id "NVIDIA GeForce RTX 4090" \ + --image "runpod/pytorch:2.1.0-py3.10-cuda11.8.0-devel-ubuntu22.04" \ + --container-disk-in-gb 20 \ + --volume-in-gb 50 \ --ports "8888/http" \ - --env "JUPYTER_PASSWORD=your_secure_password" + --env '{"JUPYTER_PASSWORD":"your_secure_password"}' ``` After the Pod starts, access JupyterLab at `https://[POD_ID]-8888.proxy.runpod.net`. diff --git a/pods/manage-pods.mdx b/pods/manage-pods.mdx index 2172041a..e8939ed3 100644 --- a/pods/manage-pods.mdx +++ b/pods/manage-pods.mdx @@ -15,12 +15,12 @@ runpodctl config --apiKey RUNPOD_API_KEY | Action | Web UI | CLI | |--------|-----|-----| -| **Deploy** | [Pods page](https://www.console.runpod.io/pods) → Deploy | `runpodctl create pods --name NAME --gpuType "GPU" --imageName "IMAGE"` | -| **Start** | Expand Pod → Play icon | `runpodctl start pod POD_ID` | -| **Stop** | Expand Pod → Stop icon | `runpodctl stop pod POD_ID` | -| **Update** | Three-dot menu → Edit Pod | — | -| **Terminate** | Expand Pod → Trash icon | `runpodctl remove pod POD_ID` | -| **List** | [Pods page](https://www.console.runpod.io/pods) | `runpodctl get pod` | +| **Deploy** | [Pods page](https://www.console.runpod.io/pods) → Deploy | `runpodctl pod create --name NAME --gpu-id "GPU" --image "IMAGE"` | +| **Start** | Expand Pod → Play icon | `runpodctl pod start POD_ID` | +| **Stop** | Expand Pod → Stop icon | `runpodctl pod stop POD_ID` | +| **Update** | Three-dot menu → Edit Pod | `runpodctl pod update POD_ID` | +| **Terminate** | Expand Pod → Trash icon | `runpodctl pod delete POD_ID` | +| **List** | [Pods page](https://www.console.runpod.io/pods) | `runpodctl pod list` | ## Deploy a Pod @@ -48,12 +48,12 @@ Deploy preconfigured Pods from the [Runpod Hub](/hub/overview#deploy-as-a-pod) f ```sh -runpodctl create pods \ +runpodctl pod create \ --name hello-world \ - --gpuType "NVIDIA A40" \ - --imageName "runpod/pytorch:3.10-2.0.0-117" \ - --containerDiskSize 10 \ - --volumeSize 100 + --gpu-id "NVIDIA A40" \ + --image "runpod/pytorch:3.10-2.0.0-117" \ + --container-disk-in-gb 10 \ + --volume-in-gb 100 ``` @@ -118,12 +118,12 @@ Pods with [network volumes](/storage/network-volumes) attached cannot be stopped ```sh -runpodctl stop pod $RUNPOD_POD_ID +runpodctl pod stop $RUNPOD_POD_ID ``` **Schedule a stop** (e.g., after 2 hours): ```sh -sleep 2h; runpodctl stop pod $RUNPOD_POD_ID & +sleep 2h; runpodctl pod stop $RUNPOD_POD_ID & ``` @@ -154,7 +154,7 @@ Resume a stopped Pod. Note: You may be allocated [zero GPUs](/references/trouble ```sh -runpodctl start pod $RUNPOD_POD_ID +runpodctl pod start $RUNPOD_POD_ID ``` @@ -224,11 +224,7 @@ Terminating permanently deletes all data not stored in a [network volume](/stora ```sh -# Single Pod -runpodctl remove pod $RUNPOD_POD_ID - -# Bulk remove by name -runpodctl remove pods my-bulk-task --podCount 40 +runpodctl pod delete $RUNPOD_POD_ID ``` diff --git a/runpodctl/manage-pods.mdx b/runpodctl/manage-pods.mdx deleted file mode 100644 index e69de29b..00000000 diff --git a/runpodctl/overview.mdx b/runpodctl/overview.mdx index 0b64eb41..8fa4d651 100644 --- a/runpodctl/overview.mdx +++ b/runpodctl/overview.mdx @@ -1,66 +1,136 @@ --- title: "Overview" sidebarTitle: "Overview" -description: "Use Runpod CLI to manage Pods from your local machine." +description: "Use Runpod CLI to manage Pods, Serverless endpoints, templates, and more from your local machine." --- import { PodsTooltip, PodTooltip } from "/snippets/tooltips.jsx"; -Runpod CLI is an [open source](https://github.com/runpod/runpodctl) command-line interface tool for managing your Runpod resources remotely from your local machine. You can transfer files and data between your local system and Runpod, execute code on remote , and automate Pod deployment workflows. +Runpod CLI is an [open source](https://github.com/runpod/runpodctl) command-line tool for managing your Runpod resources from your local machine. You can manage Pods, Serverless endpoints, templates, network volumes, and models, transfer files between your system and Runpod, diagnose issues, and view account information. -## Install Runpod CLI locally +## Quick start + +After installing and configuring `runpodctl`, you can start managing resources immediately: -Every you deploy comes preinstalled with the `runpodctl` command and a Pod-scoped API key. You can also install it on your local machine to manage your Pods remotely from your own system. +```bash +runpodctl doctor # First time setup (API key + SSH) +runpodctl gpu list # See available GPUs +runpodctl template search pytorch # Find a template +runpodctl pod create --template-id runpod-torch-v21 --gpu-id "NVIDIA RTX 4090" +runpodctl pod list # List your Pods +``` -To install Runpod CLI locally, follow these steps: +## Install Runpod CLI locally + +Every you deploy comes preinstalled with `runpodctl` and a Pod-scoped API key. You can also install it on your local machine to manage resources remotely. ### Step 1: Choose an installation method Choose the installation method that matches your operating system. + + +The install script automatically detects your operating system and architecture: + +```bash +bash <(wget -qO- cli.runpod.io) +``` + +Or using curl: + +```bash +bash <(curl -sL cli.runpod.io) +``` + +The script installs the latest version to `/usr/local/bin` when run as root, or to `~/.local/bin` for non-root users. If you don't have root access, the script displays instructions for adding `runpodctl` to your PATH. + + + **Homebrew:** -```sh +```bash brew install runpod/runpodctl/runpodctl ``` **ARM (Apple Silicon):** -```sh -wget --quiet --show-progress https://github.com/runpod/runpodctl/releases/download/v1.14.3/runpodctl-darwin-arm64 -O runpodctl && chmod +x runpodctl && sudo mv runpodctl /usr/local/bin/runpodctl +```bash +wget --quiet --show-progress https://github.com/runpod/runpodctl/releases/latest/download/runpodctl-darwin-arm64 -O runpodctl && chmod +x runpodctl && sudo mv runpodctl /usr/local/bin/runpodctl ``` **AMD (Intel):** -```sh -wget --quiet --show-progress https://github.com/runpod/runpodctl/releases/download/v1.14.3/runpodctl-darwin-amd64 -O runpodctl && chmod +x runpodctl && sudo mv runpodctl /usr/local/bin/runpodctl +```bash +wget --quiet --show-progress https://github.com/runpod/runpodctl/releases/latest/download/runpodctl-darwin-amd64 -O runpodctl && chmod +x runpodctl && sudo mv runpodctl /usr/local/bin/runpodctl ``` -```sh -wget --quiet --show-progress https://github.com/Run-Pod/runpodctl/releases/download/v1.14.3/runpodctl-linux-amd64 -O runpodctl && chmod +x runpodctl && sudo cp runpodctl /usr/bin/runpodctl + +**AMD64 (x86_64):** +```bash +wget --quiet --show-progress https://github.com/runpod/runpodctl/releases/latest/download/runpodctl-linux-amd64 -O runpodctl && chmod +x runpodctl && sudo cp runpodctl /usr/bin/runpodctl +``` + +**ARM64 (aarch64):** +```bash +wget --quiet --show-progress https://github.com/runpod/runpodctl/releases/latest/download/runpodctl-linux-arm64 -O runpodctl && chmod +x runpodctl && sudo cp runpodctl /usr/bin/runpodctl +``` + +**Non-root installation:** + +If you don't have root access, install to your user directory: + +```bash +mkdir -p ~/.local/bin && wget --quiet --show-progress https://github.com/runpod/runpodctl/releases/latest/download/runpodctl-linux-amd64 -O ~/.local/bin/runpodctl && chmod +x ~/.local/bin/runpodctl +``` + +Then add `~/.local/bin` to your PATH by adding this line to your `~/.bashrc` or `~/.zshrc`: + +```bash +export PATH="$HOME/.local/bin:$PATH" ``` -```sh -wget https://github.com/runpod/runpodctl/releases/download/v1.14.3/runpodctl-windows-amd64.exe -O runpodctl.exe +```bash +wget https://github.com/runpod/runpodctl/releases/latest/download/runpodctl-windows-amd64.exe -O runpodctl.exe ``` -```sh -!wget --quiet --show-progress https://github.com/Run-Pod/runpodctl/releases/download/v1.14.3/runpodctl-linux-amd -O runpodctl +```bash +!wget --quiet --show-progress https://github.com/runpod/runpodctl/releases/latest/download/runpodctl-linux-amd64 -O runpodctl !chmod +x runpodctl !cp runpodctl /usr/bin/runpodctl ``` + + +Pre-built binaries are available on [conda-forge](https://anaconda.org/conda-forge/runpodctl) for Linux (x86_64, aarch64, ppc64le), macOS (x86_64, arm64), and Windows (x86_64). + +**conda:** +```bash +conda install conda-forge::runpodctl +``` + +**mamba:** +```bash +mamba install conda-forge::runpodctl +``` + +**pixi:** +```bash +pixi global install runpodctl +``` + + + This installs `runpodctl` globally on your system, so you can run commands from any directory. @@ -69,9 +139,17 @@ This installs `runpodctl` globally on your system, so you can run commands from Before you can use `runpodctl` locally, you must configure it with an [API key](/get-started/api-keys). -Run the following command to add your API key to `runpodctl`, replacing `YOUR_API_KEY` with your API key: +The easiest way to set up your API key and SSH configuration is with the `doctor` command: + +```bash +runpodctl doctor +``` + +This command guides you through first-time setup, including API key configuration and SSH key setup. + +Alternatively, you can manually configure your API key: -```sh +```bash runpodctl config --apiKey YOUR_API_KEY ``` @@ -84,25 +162,55 @@ saved apiKey into config file: /Users/runpod/.runpod/config.toml To verify that `runpodctl` installed successfully, run this command: -```sh +```bash runpodctl version ``` You should see which version is installed: -```sh -runpodctl v1.14.4 +```bash +runpodctl v2.0 ``` +## Command groups + +Runpod CLI organizes commands into groups based on the resource type: + +| Command | Alias | Description | +|---------|-------|-------------| +| `runpodctl pod` | | Manage Pods (create, list, start, stop, delete) | +| `runpodctl serverless` | `sls` | Manage Serverless endpoints | +| `runpodctl template` | `tpl` | List, search, and manage templates | +| `runpodctl hub` | | Browse and deploy from the Runpod Hub | +| `runpodctl network-volume` | `nv` | Manage network volumes | +| `runpodctl registry` | `reg` | Manage container registry authentications | +| `runpodctl gpu` | | List available GPUs | +| `runpodctl datacenter` | `dc` | List datacenters | +| `runpodctl billing` | | View billing history | +| `runpodctl user` | `me` | View account information | +| `runpodctl ssh` | | Manage SSH keys and get connection info | + ## Help and reference Learn how to use Runpod CLI commands by browsing the CLI reference using the sidebar to the left, or by running the `help` command: -```sh +```bash runpodctl help ``` Learn more about a particular command by running: -```sh -runpodctl [command] help -``` \ No newline at end of file +```bash +runpodctl [command] --help +``` + +## Shell completion + +Enable tab completion for your shell to make working with `runpodctl` easier: + +```bash +runpodctl completion +``` + +This command auto-detects your shell and adds the appropriate source command to your shell configuration file (`~/.bashrc` or `~/.zshrc`). The command is idempotent—running it again skips installation if completion is already configured. + +Restart your shell or source the configuration file for changes to take effect. diff --git a/runpodctl/reference/runpodctl-billing.mdx b/runpodctl/reference/runpodctl-billing.mdx new file mode 100644 index 00000000..466399ba --- /dev/null +++ b/runpodctl/reference/runpodctl-billing.mdx @@ -0,0 +1,108 @@ +--- +title: "billing" +sidebarTitle: "billing" +--- + +View billing history for Pods, Serverless endpoints, and network volumes. + + +```bash Command +runpodctl billing [flags] +``` + + +## Subcommands + +### View Pod billing + +View billing history for Pods: + +```bash +runpodctl billing pods +``` + +#### Pod billing flags + + +Time bucket size (`hour`, `day`, `week`, `month`, `year`). + + + +Start time in RFC3339 format (e.g., `2024-01-01T00:00:00Z`). + + + +End time in RFC3339 format. + + + +Group results by `podId` or `gpuId`. + + + +Filter by specific Pod ID. + + + +Filter by specific GPU type. + + +### View Serverless billing + +View billing history for Serverless endpoints: + +```bash +runpodctl billing serverless +``` + +#### Serverless billing flags + + +Time bucket size (`hour`, `day`, `week`, `month`, `year`). + + + +Start time in RFC3339 format. + + + +End time in RFC3339 format. + + + +Group results by `endpointId`, `podId`, or `gpuId`. + + + +Filter by specific endpoint ID. + + + +Filter by specific GPU type. + + +### View network volume billing + +View billing history for network volumes: + +```bash +runpodctl billing network-volume +``` + +#### Network volume billing flags + + +Time bucket size (`hour`, `day`, `week`, `month`, `year`). + + + +Start time in RFC3339 format. + + + +End time in RFC3339 format. + + +## Related commands + +- [`runpodctl user`](/runpodctl/reference/runpodctl-user) diff --git a/runpodctl/reference/runpodctl-config.mdx b/runpodctl/reference/runpodctl-config.mdx index 2c383e3f..c4644628 100644 --- a/runpodctl/reference/runpodctl-config.mdx +++ b/runpodctl/reference/runpodctl-config.mdx @@ -6,7 +6,7 @@ sidebarTitle: "config" Configure the Runpod CLI with your API credentials and API URL to enable programmatic access to your Runpod resources. -```sh Command +```bash Command runpodctl config [flags] ``` @@ -15,11 +15,16 @@ runpodctl config [flags] Configure the CLI with your API key: -```sh -runpodctl config \ - --apiKey "rpaPOIUYYULKDSALVIUT3Q2ZRKZ98IUYTSK2OQQ2CWQxkd01" +```bash +runpodctl config --apiKey "your-api-key-here" ``` + + +For first-time setup, we recommend using [`runpodctl doctor`](/runpodctl/reference/runpodctl-doctor) instead, which guides you through API key configuration and SSH key setup interactively. + + + ## Flags @@ -29,3 +34,8 @@ Your Runpod API key, which authenticates the CLI to access your account. You can The Runpod API endpoint URL. The default value should work for most users. + +## Related commands + +- [`runpodctl doctor`](/runpodctl/reference/runpodctl-doctor) +- [`runpodctl user`](/runpodctl/reference/runpodctl-user) diff --git a/runpodctl/reference/runpodctl-create-pod.mdx b/runpodctl/reference/runpodctl-create-pod.mdx deleted file mode 100644 index 359668da..00000000 --- a/runpodctl/reference/runpodctl-create-pod.mdx +++ /dev/null @@ -1,101 +0,0 @@ ---- -title: "create pod" -sidebarTitle: "create pod" ---- - -Create and start a new Pod on Runpod with configuration options for GPU type, storage, networking, and cloud tier. - - -```sh Command -runpodctl create pod [flags] -``` - - -## Example - -Create a Pod with 2 RTX 4090 GPUs in the Secure Cloud with a custom container image: - -```sh -runpodctl create pod \ - --name "my-training-pod" \ - --gpuType "NVIDIA GeForce RTX 3090" \ - --gpuCount 2 \ - --secureCloud \ - --imageName "runpod/pytorch:2.0.1-py3.10-cuda11.8.0-devel" \ - --containerDiskSize 50 \ - --volumeSize 100 -``` - -## Flags - - -A custom name for your Pod to make it easy to identify and reference. - - - -The GPU type to use for the Pod (e.g., `NVIDIA GeForce RTX 4090`, `NVIDIA B200`, `NVIDIA L40S`). Use the GPU ID (long form) from the [GPU types reference](/references/gpu-types) table to specify the GPU type. - - - -The number of GPUs to allocate to the Pod. - - - -Create the Pod in the Secure Cloud tier, which offers enterprise-grade infrastructure with enhanced reliability. - - - -Create the Pod in the Community Cloud tier, which typically offers lower pricing with spot instance availability. - - - -The Docker container image to use for the Pod (e.g., `runpod/pytorch:latest`). - - - -The ID of a template to use for Pod configuration, which pre-defines the image and environment settings. - - - -The size of the container disk in gigabytes, used for temporary storage within the container. - - - -The size of the persistent volume in gigabytes, which retains data across Pod restarts. - - - -The mount path for the persistent volume inside the container. - - - -The ID of an existing [network volume](/storage/network-volumes) to attach to the Pod for shared storage across multiple Pods. - - - -The maximum price ceiling in dollars per hour. If not specified, the Pod will be created at the lowest available price. - - - -The minimum system memory required in gigabytes. - - - -The minimum number of vCPUs required for the Pod. - - - -Environment variables to set in the container. Specify multiple times for multiple variables (e.g., `--env KEY1=VALUE1 --env KEY2=VALUE2`). - - - -Additional arguments to pass to the container when it starts. - - - -Ports to expose from the container. Specify multiple times for multiple ports (e.g., `--ports 8888/http --ports 22/tcp`). You can expose up to 10 HTTP ports and multiple TCP ports. See [Expose ports](/pods/configuration/expose-ports) for details. - - -## Related commands - -- [`runpodctl create pods`](/runpodctl/reference/runpodctl-create-pods) diff --git a/runpodctl/reference/runpodctl-create-pods.mdx b/runpodctl/reference/runpodctl-create-pods.mdx deleted file mode 100644 index f5f4f292..00000000 --- a/runpodctl/reference/runpodctl-create-pods.mdx +++ /dev/null @@ -1,104 +0,0 @@ ---- -title: "create pods" -sidebarTitle: "create pods" ---- - -Create multiple Pods at once with identical configurations (useful for parallel workloads or distributed training). - - -```sh Command -runpodctl create pods [flags] -``` - - -## Example - -Create 3 identical Pods with the name "training-worker" in the Secure Cloud: - -```sh -runpodctl create pods \ - --name "training-worker" \ - --podCount 3 \ - --gpuType "NVIDIA GeForce RTX 3090" \ - --gpuCount 1 \ - --secureCloud \ - --imageName "runpod/pytorch:2.0.1-py3.10-cuda11.8.0-devel" -``` - -## Flags - - -A custom name for the Pods. All Pods in the group will share this base name. - - - -The number of Pods to create. - - - -The GPU type to use for the Pods (e.g., `NVIDIA GeForce RTX 4090`, `NVIDIA B200`, `NVIDIA L40S`). Use the GPU ID (long form) from the [GPU types reference](/references/gpu-types) table to specify the GPU type. - - - -The number of GPUs to allocate to each Pod. - - - -Create the Pods in the Secure Cloud tier, which offers enterprise-grade infrastructure with enhanced reliability. - - - -Create the Pods in the Community Cloud tier, which typically offers lower pricing with spot instance availability. - - - -The Docker container image to use for the Pods (e.g., `runpod/pytorch:latest`). - - - -The ID of a template to use for Pod configuration, which pre-defines the image and environment settings. - - - -The size of the container disk in gigabytes for each Pod. - - - -The size of the persistent volume in gigabytes for each Pod. - - - -The mount path for the persistent volume inside each container. - - - -The ID of an existing network volume to attach to all Pods for shared storage. - - - -The maximum price ceiling in dollars per hour. If not specified, Pods will be created at the lowest available price. - - - -The minimum system memory required in gigabytes for each Pod. - - - -The minimum number of vCPUs required for each Pod. - - - -Environment variables to set in the containers. Specify multiple times for multiple variables. - - - -Additional arguments to pass to the containers when they start. - - - -Ports to expose from the containers. Maximum of 1 HTTP port and 1 TCP port allowed per Pod. - - -## Related commands - -- [`runpodctl create pod`](/runpodctl/reference/runpodctl-create-pod) diff --git a/runpodctl/reference/runpodctl-datacenter.mdx b/runpodctl/reference/runpodctl-datacenter.mdx new file mode 100644 index 00000000..d74026f8 --- /dev/null +++ b/runpodctl/reference/runpodctl-datacenter.mdx @@ -0,0 +1,79 @@ +--- +title: "datacenter" +sidebarTitle: "datacenter" +--- + +List available datacenters and their locations. + + +```bash Command +runpodctl datacenter [flags] +``` + + +## Alias + +You can use `dc` as a shorthand for `datacenter`: + +```bash +runpodctl dc list +``` + +## Subcommands + +### List datacenters + +List all available datacenters with GPU availability: + +```bash +runpodctl datacenter list +``` + +## Example output + +```json +[ + { + "gpuAvailability": [ + { + "displayName": "RTX 4090", + "gpuId": "NVIDIA GeForce RTX 4090", + "stockStatus": "High" + } + ], + "id": "US-GA-1", + "location": "United States", + "name": "US-GA-1" + }, + { + "gpuAvailability": [ + { + "displayName": "A100 PCIe", + "gpuId": "NVIDIA A100 80GB PCIe", + "stockStatus": "High" + } + ], + "id": "EU-RO-1", + "location": "Europe", + "name": "EU-RO-1" + } +] +``` + +## Using datacenter IDs + +When creating network volumes or specifying preferred datacenters for Pods, use the datacenter ID: + +```bash +# Create a network volume in a specific datacenter +runpodctl network-volume create --name "my-volume" --size 100 --data-center-id "US-GA-1" + +# Create a Pod with preferred datacenter +runpodctl pod create --template-id runpod-torch-v21 --gpu-id "NVIDIA RTX 4090" --data-center-ids "US-GA-1" +``` + +## Related commands + +- [`runpodctl network-volume create`](/runpodctl/reference/runpodctl-network-volume) +- [`runpodctl pod create`](/runpodctl/reference/runpodctl-pod) +- [`runpodctl gpu list`](/runpodctl/reference/runpodctl-gpu) diff --git a/runpodctl/reference/runpodctl-doctor.mdx b/runpodctl/reference/runpodctl-doctor.mdx new file mode 100644 index 00000000..5fc5755f --- /dev/null +++ b/runpodctl/reference/runpodctl-doctor.mdx @@ -0,0 +1,37 @@ +--- +title: "doctor" +sidebarTitle: "doctor" +--- + +Diagnose and fix CLI issues, including first-time setup for API keys and SSH configuration. + + +```bash Command +runpodctl doctor +``` + + +## Description + +The `doctor` command is the recommended way to set up `runpodctl` for the first time. It guides you through: + +- Configuring your API key +- Setting up SSH keys for Pod access +- Verifying your configuration + +If you encounter issues with the CLI, running `doctor` can help diagnose and fix common problems. + +## Example + +Run the doctor command to set up or troubleshoot your CLI: + +```bash +runpodctl doctor +``` + +The command will interactively guide you through the setup process and report any issues it finds. + +## Related commands + +- [`runpodctl config`](/runpodctl/reference/runpodctl-config) +- [`runpodctl ssh add-key`](/runpodctl/reference/runpodctl-ssh) diff --git a/runpodctl/reference/runpodctl-get-cloud.mdx b/runpodctl/reference/runpodctl-get-cloud.mdx deleted file mode 100644 index 3d82af50..00000000 --- a/runpodctl/reference/runpodctl-get-cloud.mdx +++ /dev/null @@ -1,48 +0,0 @@ ---- -title: "get cloud" -sidebarTitle: "get cloud" ---- - -List all GPUs currently available in the Runpod cloud, with options for filtering by GPU count, memory/disk size, and cloud type. - - -```sh Command -runpodctl get cloud [flags] -``` - - -## Example - -List all Secure Cloud GPUs with at least 4 instances available: - -```sh -runpodctl get cloud 4 --secure -``` - -## Arguments - - -The minimum number of GPUs that must be available for each option listed. - - -## Flags - - -Filter for GPUs with a minimum disk size (in gigabytes). - - - -Filter for GPUs with a minimum system memory size (in gigabytes). - - - -Filter for GPUs with a minimum number of vCPUs. - - - -List only GPUs from the [Secure Cloud](https://docs.runpod.io/pods/choose-a-pod#secure-cloud-vs-community-cloud). - - - -List only GPUs from the [Community Cloud](https://docs.runpod.io/pods/choose-a-pod#secure-cloud-vs-community-cloud). - diff --git a/runpodctl/reference/runpodctl-get-pod.mdx b/runpodctl/reference/runpodctl-get-pod.mdx deleted file mode 100644 index ea6754c5..00000000 --- a/runpodctl/reference/runpodctl-get-pod.mdx +++ /dev/null @@ -1,32 +0,0 @@ ---- -title: "get pod" -sidebarTitle: "get pod" ---- - -List all your Pods or retrieve details about a specific Pod by its ID. - - -```sh Command -runpodctl get pod [flags] -``` - - -## Example - -List all your Pods with complete field information: - -```sh -runpodctl get pod --allfields -``` - -## Arguments - - -The ID of a specific Pod to retrieve. If no ID is provided, all Pods will be listed. - - -## Flags - - -Include all available fields in the output, providing complete Pod information. - diff --git a/runpodctl/reference/runpodctl-gpu.mdx b/runpodctl/reference/runpodctl-gpu.mdx new file mode 100644 index 00000000..584fcae9 --- /dev/null +++ b/runpodctl/reference/runpodctl-gpu.mdx @@ -0,0 +1,73 @@ +--- +title: "gpu" +sidebarTitle: "gpu" +--- + +List available GPU types and their specifications. + + +```bash Command +runpodctl gpu [flags] +``` + + +## Subcommands + +### List available GPUs + +List GPUs that are currently available: + +```bash +runpodctl gpu list +``` + +Include unavailable GPUs in the list: + +```bash +runpodctl gpu list --include-unavailable +``` + +#### List flags + + +Include GPUs that are currently unavailable. + + +## Example output + +```json +[ + { + "available": true, + "communityCloud": true, + "displayName": "RTX 4090", + "gpuId": "NVIDIA GeForce RTX 4090", + "memoryInGb": 24, + "secureCloud": true, + "stockStatus": "High" + }, + { + "available": true, + "communityCloud": true, + "displayName": "A100 PCIe", + "gpuId": "NVIDIA A100 80GB PCIe", + "memoryInGb": 80, + "secureCloud": true, + "stockStatus": "High" + } +] +``` + +## Using GPU IDs + +When creating Pods or Serverless endpoints, use the GPU ID from the list with the `--gpu-id` flag: + +```bash +runpodctl pod create --template-id runpod-torch-v21 --gpu-id "NVIDIA RTX 4090" +``` + +## Related commands + +- [`runpodctl pod create`](/runpodctl/reference/runpodctl-pod) +- [`runpodctl serverless create`](/runpodctl/reference/runpodctl-serverless) +- [`runpodctl datacenter list`](/runpodctl/reference/runpodctl-datacenter) diff --git a/runpodctl/reference/runpodctl-hub.mdx b/runpodctl/reference/runpodctl-hub.mdx new file mode 100644 index 00000000..2def79e6 --- /dev/null +++ b/runpodctl/reference/runpodctl-hub.mdx @@ -0,0 +1,142 @@ +--- +title: "hub" +sidebarTitle: "hub" +--- + +Browse and search the Runpod Hub marketplace to discover deployable repos. You can list popular repos, search by name, and get details for specific repos. Use Hub repo IDs with [`runpodctl serverless create --hub-id`](/runpodctl/reference/runpodctl-serverless) to deploy endpoints directly from the Hub. + +```bash +runpodctl hub [flags] +``` + +## Subcommands + +### List repos + +List repos from the Hub marketplace. By default, shows the top 10 repos ordered by stars. + +```bash +# List top repos by stars +runpodctl hub list + +# List only Serverless repos +runpodctl hub list --type SERVERLESS + +# List only Pod repos +runpodctl hub list --type POD + +# Filter by category +runpodctl hub list --category ai --limit 20 + +# Order by deployment count +runpodctl hub list --order-by deploys + +# Filter by repo owner +runpodctl hub list --owner runpod +``` + +#### List flags + + +Filter by deployment type (`POD` or `SERVERLESS`). + + + +Filter by category. + + + +Filter by repo owner. + + + +Sort results by field: `createdAt`, `deploys`, `releasedAt`, `stars`, `updatedAt`, or `views`. + + + +Sort direction: `asc` or `desc`. + + + +Maximum number of results to return. + + + +Number of results to skip for pagination. + + +### Search repos + +Search for repos in the Hub by name: + +```bash +# Search for vLLM repos +runpodctl hub search vllm + +# Search Serverless repos only +runpodctl hub search whisper --type SERVERLESS + +# Limit search results +runpodctl hub search stable-diffusion --limit 5 +``` + +#### Search flags + + +Filter by deployment type (`POD` or `SERVERLESS`). + + + +Filter by category. + + + +Filter by repo owner. + + + +Sort results by field: `createdAt`, `deploys`, `releasedAt`, `stars`, `updatedAt`, or `views`. + + + +Sort direction: `asc` or `desc`. + + + +Maximum number of results to return. + + + +Number of results to skip for pagination. + + +### Get repo details + +Get detailed information about a specific Hub repo by its ID or owner/name: + +```bash +# Get by listing ID +runpodctl hub get cm8h09d9n000008jvh2rqdsmb + +# Get by owner/name +runpodctl hub get runpod-workers/worker-vllm +``` + +## Deploy from the Hub + +After finding a repo you want to deploy, use its listing ID with `serverless create`: + +```bash +# Find a repo +runpodctl hub search vllm + +# Deploy it +runpodctl serverless create --hub-id cm8h09d9n000008jvh2rqdsmb --name "my-vllm" +``` + +GPU IDs and container disk size are automatically pulled from the Hub release config. You can override the GPU type with `--gpu-id`. + +## Related commands + +- [`runpodctl serverless create`](/runpodctl/reference/runpodctl-serverless) +- [`runpodctl template search`](/runpodctl/reference/runpodctl-template) diff --git a/runpodctl/reference/runpodctl-network-volume.mdx b/runpodctl/reference/runpodctl-network-volume.mdx new file mode 100644 index 00000000..06f9653a --- /dev/null +++ b/runpodctl/reference/runpodctl-network-volume.mdx @@ -0,0 +1,97 @@ +--- +title: "network-volume" +sidebarTitle: "network-volume" +--- + +Manage network volumes for persistent shared storage across Pods and Serverless endpoints. + + +```bash Command +runpodctl network-volume [flags] +``` + + +## Alias + +You can use `nv` as a shorthand for `network-volume`: + +```bash +runpodctl nv list +``` + +## Subcommands + +### List network volumes + +List all your network volumes: + +```bash +runpodctl network-volume list +``` + +### Get network volume details + +Get detailed information about a specific network volume: + +```bash +runpodctl network-volume get +``` + +### Create a network volume + +Create a new network volume: + +```bash +runpodctl network-volume create --name "my-volume" --size 100 --data-center-id "US-GA-1" +``` + +#### Create flags + + +Volume name. + + + +Volume size in GB (1-4000). + + + +Datacenter ID where the volume will be created. Use [`runpodctl datacenter list`](/runpodctl/reference/runpodctl-datacenter) to see available datacenters. + + +### Update a network volume + +Update network volume configuration: + +```bash +runpodctl network-volume update --name "new-name" +``` + +#### Update flags + + +New volume name. + + + +New volume size in GB. Must be larger than the current size. + + +### Delete a network volume + +Delete a network volume: + +```bash +runpodctl network-volume delete +``` + + + +Deleting a network volume permanently removes all data stored on it. Make sure to back up any important data before deleting. + + + +## Related commands + +- [`runpodctl pod create`](/runpodctl/reference/runpodctl-pod) +- [`runpodctl datacenter list`](/runpodctl/reference/runpodctl-datacenter) diff --git a/runpodctl/reference/runpodctl-pod.mdx b/runpodctl/reference/runpodctl-pod.mdx new file mode 100644 index 00000000..ca9428ca --- /dev/null +++ b/runpodctl/reference/runpodctl-pod.mdx @@ -0,0 +1,266 @@ +--- +title: "pod" +sidebarTitle: "pod" +--- + +Manage Pods, including creating, listing, starting, stopping, and deleting Pods. + +```bash +runpodctl pod [flags] +``` + +## Subcommands + +### List Pods + +List your Pods. By default, this command shows only running Pods (similar to `docker ps`): + +```bash +runpodctl pod list +``` + +List all Pods including exited ones: + +```bash +runpodctl pod list --all +``` + +Filter by status: + +```bash +runpodctl pod list --status exited +``` + +Filter by creation time: + +```bash +# Pods created in the last 24 hours +runpodctl pod list --since 24h + +# Pods created in the last 7 days +runpodctl pod list --since 7d + +# Pods created after a specific date +runpodctl pod list --created-after 2025-01-15 +``` + +#### List flags + + +Show all Pods including exited ones. By default, only running Pods are shown. + + + +Filter by Pod status (e.g., `RUNNING`, `EXITED`). Cannot be used with `--all`. + + + +Filter Pods created within the specified duration (e.g., `1h`, `24h`, `7d`). Cannot be used with `--created-after`. + + + +Filter Pods created after the specified date in `YYYY-MM-DD` format. Cannot be used with `--since`. + + + +Filter by compute type (`GPU` or `CPU`). + + + +Filter by Pod name. + + +### Get Pod details + +Get detailed information about a specific Pod, including SSH connection info: + +```bash +runpodctl pod get +``` + +### Create a Pod + +Create a new Pod from a template: + +```bash +runpodctl pod create --template-id runpod-torch-v21 --gpu-id "NVIDIA GeForce RTX 4090" +``` + +Create a Pod with a custom Docker image: + +```bash +runpodctl pod create --image "runpod/pytorch:1.0.3-cu1281-torch291-ubuntu2404" --gpu-id "NVIDIA GeForce RTX 4090" +``` + +Create a CPU-only Pod: + +```bash +runpodctl pod create --compute-type cpu --image ubuntu:22.04 +``` + +#### Create flags + + +Template ID to use for Pod configuration. Use [`runpodctl template search`](/runpodctl/reference/runpodctl-template) to find templates. + + + +Docker image to use (e.g., `runpod/pytorch:latest`). Required if no template specified. + + + +Custom name for the Pod. + + + +GPU type (e.g., `NVIDIA GeForce RTX 4090`, `NVIDIA A100 80GB PCIe`). Use [`runpodctl gpu list`](/runpodctl/reference/runpodctl-gpu) to see available GPUs. + + + +Number of GPUs to allocate. + + + +Compute type (`GPU` or `CPU`). + + + +Container disk size in GB. + + + +Persistent volume size in GB. + + + +Mount path for the persistent volume. + + + +Comma-separated list of ports to expose (e.g., `8888/http,22/tcp`). + + + +Environment variables as a JSON object (e.g., `'{"KEY":"value"}'`). + + + +Cloud tier (`SECURE` or `COMMUNITY`). + + + +Comma-separated list of preferred datacenter IDs. Use [`runpodctl datacenter list`](/runpodctl/reference/runpodctl-datacenter) to see available datacenters. + + + +Enable global networking (Secure Cloud only). + + + +Require public IP (Community Cloud only). + + + +Enable SSH on the Pod. + + + +Network volume ID to attach. Use [`runpodctl network-volume list`](/runpodctl/reference/runpodctl-network-volume) to see available network volumes. + + +### Start a Pod + +Start a stopped Pod: + +```bash +runpodctl pod start +``` + +### Stop a Pod + +Stop a running Pod: + +```bash +runpodctl pod stop +``` + +### Restart a Pod + +Restart a Pod: + +```bash +runpodctl pod restart +``` + +### Reset a Pod + +Reset a Pod to its initial state: + +```bash +runpodctl pod reset +``` + +### Update a Pod + +Update Pod configuration: + +```bash +runpodctl pod update --name "new-name" +``` + +#### Update flags + + +New name for the Pod. + + + +New Docker image name. + + + +New container disk size in GB. + + + +New volume size in GB. + + + +New volume mount path. + + + +New comma-separated list of ports. + + + +New environment variables as a JSON object. + + +### Delete a Pod + +Delete a Pod: + +```bash +runpodctl pod delete +``` + +## Pod URLs + +Access exposed ports on your Pod using the following URL pattern: + +``` +https://-.proxy.runpod.net +``` + +For example, if your Pod ID is `abc123xyz` and you exposed port 8888: +``` +https://abc123xyz-8888.proxy.runpod.net +``` + +## Related commands + +- [`runpodctl gpu list`](/runpodctl/reference/runpodctl-gpu) +- [`runpodctl template`](/runpodctl/reference/runpodctl-template) +- [`runpodctl ssh`](/runpodctl/reference/runpodctl-ssh) diff --git a/runpodctl/reference/runpodctl-receive.mdx b/runpodctl/reference/runpodctl-receive.mdx index 4823e901..3a00ba63 100644 --- a/runpodctl/reference/runpodctl-receive.mdx +++ b/runpodctl/reference/runpodctl-receive.mdx @@ -6,26 +6,25 @@ sidebarTitle: "receive" Receive files or folders sent from another machine using a secure peer-to-peer connection established with a connection code. -```sh Command -runpodctl receive [flags] +```bash Command +runpodctl receive ``` ## Example -Receive files using a connection code. +Receive files using a connection code: -```sh +```bash runpodctl receive rainbow-unicorn-42 ``` ## Arguments - -The connection code phrase that matches the code used by the sender with the `send` command. If not provided, you'll be prompted to enter it. + +The connection code phrase that matches the code used by the sender with the [`send`](/runpodctl/reference/runpodctl-send) command. ## Related commands - [`runpodctl send`](/runpodctl/reference/runpodctl-send) -- [`runpodctl`](/runpodctl/reference/runpodctl) diff --git a/runpodctl/reference/runpodctl-registry.mdx b/runpodctl/reference/runpodctl-registry.mdx new file mode 100644 index 00000000..44145537 --- /dev/null +++ b/runpodctl/reference/runpodctl-registry.mdx @@ -0,0 +1,73 @@ +--- +title: "registry" +sidebarTitle: "registry" +--- + +Manage container registry authentications for private Docker images. + + +```bash Command +runpodctl registry [flags] +``` + + +## Alias + +You can use `reg` as a shorthand for `registry`: + +```bash +runpodctl reg list +``` + +## Subcommands + +### List registry authentications + +List all your container registry authentications: + +```bash +runpodctl registry list +``` + +### Get registry authentication details + +Get details about a specific registry authentication: + +```bash +runpodctl registry get +``` + +### Create a registry authentication + +Create credentials for a private container registry: + +```bash +runpodctl registry create --name "docker-hub" --username "myuser" --password "mypassword" +``` + +#### Create flags + + +Name for this registry authentication. + + + +Registry username. + + + +Registry password or access token. + + +### Delete a registry authentication + +Delete a registry authentication: + +```bash +runpodctl registry delete +``` + +## Related commands + +- [`runpodctl template create`](/runpodctl/reference/runpodctl-template) +- [`runpodctl pod create`](/runpodctl/reference/runpodctl-pod) diff --git a/runpodctl/reference/runpodctl-remove-pod.mdx b/runpodctl/reference/runpodctl-remove-pod.mdx deleted file mode 100644 index ca214d29..00000000 --- a/runpodctl/reference/runpodctl-remove-pod.mdx +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: "remove pod" -sidebarTitle: "remove pod" ---- - -Permanently delete a Pod and all its associated data. This action cannot be undone. - - -```sh Command -runpodctl remove pod -``` - - -## Example - -Terminate a Pod by its ID. - -```sh -runpodctl remove pod abc123xyz456 -``` - -## Arguments - - -The ID of the Pod to terminate. You can find Pod IDs using the `runpodctl get pod` command. - - -## Related commands - -- [`runpodctl remove pods`](/runpodctl/reference/runpodctl-remove-pods) -- [`runpodctl get pod`](/runpodctl/reference/runpodctl-get-pod) diff --git a/runpodctl/reference/runpodctl-remove-pods.mdx b/runpodctl/reference/runpodctl-remove-pods.mdx deleted file mode 100644 index 8de04940..00000000 --- a/runpodctl/reference/runpodctl-remove-pods.mdx +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: "remove pods" -sidebarTitle: "remove pods" ---- - -Terminate multiple Pods that share the same name. This is useful for cleaning up groups of Pods created with the `create pods` command. - - -```sh Command -runpodctl remove pods [flags] -``` - - -## Example - -Terminate all Pods named "training-worker": - -```sh -runpodctl remove pods training-worker -``` - -## Arguments - - -The name of the Pods to terminate. All Pods with this exact name will be removed. - - -## Flags - - -The number of Pods with the specified name to terminate. This limits the removal to a specific count rather than removing all matching Pods. - - -## Related commands - -- [`runpodctl remove pod`](/runpodctl/reference/runpodctl-remove-pod) -- [`runpodctl get pod`](/runpodctl/reference/runpodctl-get-pod) diff --git a/runpodctl/reference/runpodctl-send.mdx b/runpodctl/reference/runpodctl-send.mdx index 061f28b9..141f1714 100644 --- a/runpodctl/reference/runpodctl-send.mdx +++ b/runpodctl/reference/runpodctl-send.mdx @@ -6,7 +6,7 @@ sidebarTitle: "send" Transfer files or folders from your local machine to a Pod or another computer using a secure peer-to-peer connection. -```sh Command +```bash Command runpodctl send [flags] ``` @@ -15,7 +15,7 @@ runpodctl send [flags] Send a folder to a Pod using a connection code: -```sh +```bash runpodctl send ./my-dataset --code rainbow-unicorn-42 ``` @@ -28,10 +28,9 @@ The path to the file or folder you want to send. Can be a single file or an enti ## Flags -A custom code phrase used to establish the secure connection between sender and receiver. The receiver must use the same code with the `receive` command. +A custom code phrase used to establish the secure connection between sender and receiver. The receiver must use the same code with the [`receive`](/runpodctl/reference/runpodctl-receive) command. ## Related commands - [`runpodctl receive`](/runpodctl/reference/runpodctl-receive) -- [`runpodctl`](/runpodctl/reference/runpodctl) diff --git a/runpodctl/reference/runpodctl-serverless.mdx b/runpodctl/reference/runpodctl-serverless.mdx new file mode 100644 index 00000000..4dd22e0e --- /dev/null +++ b/runpodctl/reference/runpodctl-serverless.mdx @@ -0,0 +1,182 @@ +--- +title: "serverless" +sidebarTitle: "serverless" +--- + +import { VolumeDiskTooltip } from "/snippets/tooltips.jsx"; + +Manage Serverless endpoints, including creating, listing, updating, and deleting endpoints. + +```bash +runpodctl serverless [flags] +``` + +## Alias + +You can use `sls` as a shorthand for `serverless`: + +```bash +runpodctl sls list +``` + +## Subcommands + +### List endpoints + +List all your Serverless endpoints: + +```bash +runpodctl serverless list +``` + +#### List flags + + +Include template information in the output. + + + +Include workers information in the output. + + +### Get endpoint details + +Get detailed information about a specific endpoint: + +```bash +runpodctl serverless get +``` + +#### Get flags + + +Include template information in the output. + + + +Include workers information in the output. + + +### Create an endpoint + +Create a new Serverless endpoint from a template or from a Hub repo: + +```bash +# Create from a template +runpodctl serverless create --name "my-endpoint" --template-id "tpl_abc123" + +# Create from a Hub repo +runpodctl hub search vllm # Find the hub ID +runpodctl serverless create --hub-id cm8h09d9n000008jvh2rqdsmb --name "my-vllm" +``` + +When using `--hub-id`, GPU IDs and container disk size are automatically pulled from the Hub release config. You can override the GPU type with `--gpu-id`. + + + +**Serverless templates vs Pod templates**: Serverless endpoints require a Serverless-specific template. Pod templates (like `runpod-torch-v21`) cannot be used because they include configuration, which Serverless does not support. When creating a template with [`runpodctl template create`](/runpodctl/reference/runpodctl-template), use the `--serverless` flag to create a Serverless template. + +Each Serverless template can only be bound to one endpoint at a time. To create multiple endpoints with the same configuration, create separate templates for each. + + + +#### Create flags + + +Name for the endpoint. + + + +Template ID to use (required if `--hub-id` is not specified). Use [`runpodctl template search`](/runpodctl/reference/runpodctl-template) to find templates. + + + +Hub listing ID to deploy from (alternative to `--template-id`). Use [`runpodctl hub search`](/runpodctl/reference/runpodctl-hub) to find repos. + + + +GPU type for workers. Use [`runpodctl gpu list`](/runpodctl/reference/runpodctl-gpu) to see available GPUs. + + + +Number of GPUs per worker. + + + +Compute type (`GPU` or `CPU`). + + + +Minimum number of workers. + + + +Maximum number of workers. + + + +Comma-separated list of preferred datacenter IDs. Use [`runpodctl datacenter list`](/runpodctl/reference/runpodctl-datacenter) to see available datacenters. + + + +Network volume ID to attach. Use [`runpodctl network-volume list`](/runpodctl/reference/runpodctl-network-volume) to see available network volumes. + + +### Update an endpoint + +Update endpoint configuration: + +```bash +runpodctl serverless update --workers-max 5 +``` + +#### Update flags + + +New name for the endpoint. + + + +New minimum number of workers. + + + +New maximum number of workers. + + + +New idle timeout in seconds. + + + +Scaler type (`QUEUE_DELAY` or `REQUEST_COUNT`). + + + +Scaler value. + + +### Delete an endpoint + +Delete an endpoint: + +```bash +runpodctl serverless delete +``` + +## Serverless URLs + +Access your Serverless endpoint using these URL patterns: + +| Operation | URL | +|-----------|-----| +| Async request | `https://api.runpod.ai/v2//run` | +| Sync request | `https://api.runpod.ai/v2//runsync` | +| Health check | `https://api.runpod.ai/v2//health` | +| Job status | `https://api.runpod.ai/v2//status/` | + +## Related commands + +- [`runpodctl hub`](/runpodctl/reference/runpodctl-hub) +- [`runpodctl template`](/runpodctl/reference/runpodctl-template) +- [`runpodctl gpu list`](/runpodctl/reference/runpodctl-gpu) diff --git a/runpodctl/reference/runpodctl-ssh-add-key.mdx b/runpodctl/reference/runpodctl-ssh-add-key.mdx deleted file mode 100644 index b5c42997..00000000 --- a/runpodctl/reference/runpodctl-ssh-add-key.mdx +++ /dev/null @@ -1,34 +0,0 @@ ---- -title: "ssh add-key" -sidebarTitle: "ssh add-key" ---- - -Add an SSH public key to your Runpod account for secure Pod access. If no key is provided, a new key pair will be generated automatically. - - -```sh Command -runpodctl ssh add-key [flags] -``` - - -## Example - -Add an SSH key from a file: - -```sh -runpodctl ssh add-key --key-file ~/.ssh/id_rsa.pub -``` - -## Flags - - -The SSH public key content to add to your account. This should be the full public key string. - - - -The path to a file containing the SSH public key to add. This is typically a `.pub` file from your SSH key pair. - - -## Related commands - -- [`runpodctl ssh list-keys`](/runpodctl/reference/runpodctl-ssh-list-keys) diff --git a/runpodctl/reference/runpodctl-ssh-list-keys.mdx b/runpodctl/reference/runpodctl-ssh-list-keys.mdx deleted file mode 100644 index c5cbd58b..00000000 --- a/runpodctl/reference/runpodctl-ssh-list-keys.mdx +++ /dev/null @@ -1,24 +0,0 @@ ---- -title: "ssh list-keys" -sidebarTitle: "ssh list-keys" ---- - -Display all SSH keys associated with your Runpod account. - - -```sh Command -runpodctl ssh list-keys [flags] -``` - - -## Example - -List all your SSH keys: - -```sh -runpodctl ssh list-keys -``` - -## Related commands - -- [`runpodctl ssh add-key`](/runpodctl/reference/runpodctl-ssh-add-key) diff --git a/runpodctl/reference/runpodctl-ssh.mdx b/runpodctl/reference/runpodctl-ssh.mdx new file mode 100644 index 00000000..c2800118 --- /dev/null +++ b/runpodctl/reference/runpodctl-ssh.mdx @@ -0,0 +1,74 @@ +--- +title: "ssh" +sidebarTitle: "ssh" +--- + +Manage SSH keys and get SSH connection information for Pods. + + +```bash Command +runpodctl ssh [flags] +``` + + +## Subcommands + +### Get SSH connection info + +Get SSH connection details for a Pod. This returns the SSH command and key information, but does not initiate an interactive session: + +```bash +runpodctl ssh info +``` + +#### Info flags + + +Include Pod ID and name in output. Shorthand: `-v`. + + + + +The `ssh info` command returns connection details that you can use to connect via SSH manually. It does not start an interactive SSH session. + +To connect to your Pod, use the SSH command provided in the output: +```bash +ssh user@host -p -i +``` + + + +### List SSH keys + +List all SSH keys associated with your account: + +```bash +runpodctl ssh list-keys +``` + +### Add an SSH key + +Add a new SSH key to your account: + +```bash +# Add a key from a file +runpodctl ssh add-key --key-file ~/.ssh/id_ed25519.pub + +# Add a key directly +runpodctl ssh add-key --key "ssh-ed25519 AAAA..." +``` + +#### Add-key flags + + +The public key string to add. + + + +Path to a file containing the public key. + + +## Related commands + +- [`runpodctl pod get`](/runpodctl/reference/runpodctl-pod) +- [`runpodctl doctor`](/runpodctl/reference/runpodctl-doctor) diff --git a/runpodctl/reference/runpodctl-start-pod.mdx b/runpodctl/reference/runpodctl-start-pod.mdx deleted file mode 100644 index 6b09d31d..00000000 --- a/runpodctl/reference/runpodctl-start-pod.mdx +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: "start pod" -sidebarTitle: "start pod" ---- - -Start a stopped Pod, resuming compute and billing. Use this to restart Pods that were previously stopped. - - -```sh Command -runpodctl start pod [flags] -``` - - -## Example - -Start a stopped Pod with a custom bid price for spot instances: - -```sh -runpodctl start pod abc123xyz456 --bid 0.50 -``` - -## Arguments - - -The ID of the Pod to start. You can find Pod IDs using the `runpodctl get pod` command. - - -## Flags - - -The bid price per GPU in dollars per hour for spot instance pricing. This only applies to Community Cloud Pods. - - -## Related commands - -- [`runpodctl stop pod`](/runpodctl/reference/runpodctl-stop-pod) -- [`runpodctl get pod`](/runpodctl/reference/runpodctl-get-pod) diff --git a/runpodctl/reference/runpodctl-stop-pod.mdx b/runpodctl/reference/runpodctl-stop-pod.mdx deleted file mode 100644 index 4598959a..00000000 --- a/runpodctl/reference/runpodctl-stop-pod.mdx +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: "stop pod" -sidebarTitle: "stop pod" ---- - -Stop a running Pod to pause compute operations. The Pod's persistent storage will be retained, but you'll continue to be charged for storage until the Pod is removed. - - -```sh Command -runpodctl stop pod [flags] -``` - - -## Example - -Stop a running Pod. - -```sh -runpodctl stop pod abc123xyz456 -``` - -## Arguments - - -The ID of the Pod to stop. You can find Pod IDs using the `runpodctl get pod` command. - - -## Related commands - -- [`runpodctl start pod`](/runpodctl/reference/runpodctl-start-pod) -- [`runpodctl get pod`](/runpodctl/reference/runpodctl-get-pod) diff --git a/runpodctl/reference/runpodctl-template.mdx b/runpodctl/reference/runpodctl-template.mdx new file mode 100644 index 00000000..4775130f --- /dev/null +++ b/runpodctl/reference/runpodctl-template.mdx @@ -0,0 +1,208 @@ +--- +title: "template" +sidebarTitle: "template" +--- + +import { VolumeDiskTooltip } from "/snippets/tooltips.jsx"; + +List, search, and manage templates for Pods and Serverless endpoints. + +```bash +runpodctl template [flags] +``` + +## Alias + +You can use `tpl` as a shorthand for `template`: + +```bash +runpodctl tpl list +``` + +## Subcommands + +### List templates + +List available templates: + +```bash +# List official and community templates (first 10) +runpodctl template list + +# List only official templates +runpodctl template list --type official + +# List community templates +runpodctl template list --type community + +# List your own templates +runpodctl template list --type user + +# List all templates including user templates +runpodctl template list --all + +# Show more results +runpodctl template list --limit 50 +``` + +#### List flags + + +Filter by template type (`official`, `community`, `user`). + + + +Maximum number of results to return. + + + +Number of results to skip for pagination. + + + +Include all templates including user templates. + + +### Search templates + +Search for templates by name: + +```bash +# Search for PyTorch templates +runpodctl template search pytorch + +# Search with limit +runpodctl template search comfyui --limit 5 + +# Search only official templates +runpodctl template search vllm --type official +``` + +#### Search flags + + +Filter by template type (`official`, `community`, `user`). + + + +Maximum number of results to return. + + + +Number of results to skip for pagination. + + +### Get template details + +Get detailed information about a specific template, including README, environment variables, and exposed ports: + +```bash +runpodctl template get +``` + +### Create a template + +Create a new template: + +```bash +# Create a Pod template +runpodctl template create --name "my-template" --image "runpod/pytorch:latest" + +# Create a Serverless template +runpodctl template create --name "my-serverless-template" --image "my-image:latest" --serverless +``` + + + +**Pod templates vs Serverless templates**: Templates are either for Pods or for Serverless endpoints, not both. Pod templates support storage and are used with `runpodctl pod create`. Serverless templates do not support volume disks and are used with `runpodctl serverless create`. Use the `--serverless` flag when creating templates for Serverless endpoints. + + + +#### Create flags + + +Template name. + + + +Docker image (e.g., `runpod/pytorch:latest`). + + + +Container disk size in GB. + + + +Persistent volume size in GB. + + + +Volume mount path. + + + +Comma-separated list of ports to expose (e.g., `8888/http,22/tcp`). + + + +Environment variables as a JSON object (e.g., `'{"KEY":"value"}'`). + + + +Comma-separated Docker start commands. + + + +Comma-separated Docker entrypoint commands. + + + +Create as a Serverless template. Serverless templates do not support volume disks (`--volume-in-gb` is not supported). Each Serverless template can only be bound to one endpoint at a time. + + + +Template README content. + + +### Update a template + +Update an existing template: + +```bash +runpodctl template update --name "new-name" +``` + +#### Update flags + + +New template name. + + + +New Docker image name. + + + +New comma-separated list of ports. + + + +New environment variables as a JSON object. + + + +New README content. + + +### Delete a template + +Delete a template: + +```bash +runpodctl template delete +``` + +## Related commands + +- [`runpodctl pod create`](/runpodctl/reference/runpodctl-pod) +- [`runpodctl serverless create`](/runpodctl/reference/runpodctl-serverless) diff --git a/runpodctl/reference/runpodctl-update.mdx b/runpodctl/reference/runpodctl-update.mdx index be01ebbb..9517bca0 100644 --- a/runpodctl/reference/runpodctl-update.mdx +++ b/runpodctl/reference/runpodctl-update.mdx @@ -6,11 +6,15 @@ sidebarTitle: "update" Update `runpodctl` to the latest version to access new features and bug fixes. -```sh Command +```bash Command runpodctl update ``` +## Description + +The `update` command downloads and installs the latest version of `runpodctl`. After updating, verify the installation with [`runpodctl version`](/runpodctl/reference/runpodctl-version). + ## Related commands - [`runpodctl version`](/runpodctl/reference/runpodctl-version) diff --git a/runpodctl/reference/runpodctl-user.mdx b/runpodctl/reference/runpodctl-user.mdx new file mode 100644 index 00000000..cba022ae --- /dev/null +++ b/runpodctl/reference/runpodctl-user.mdx @@ -0,0 +1,50 @@ +--- +title: "user" +sidebarTitle: "user" +--- + +View your account information and current balance. + + +```bash Command +runpodctl user +``` + + +## Alias + +You can use `me` as a shorthand for `user`: + +```bash +runpodctl me +``` + +## Description + +The `user` command displays information about your Runpod account, including: + +- Account email +- Current credit balance +- Current spend per hour +- Spend limit +- Notification settings + +## Example output + +```json +{ + "clientBalance": 435.85, + "currentSpendPerHr": 0.001, + "email": "user@example.com", + "id": "user_abc123", + "notifyLowBalance": true, + "notifyPodsGeneral": true, + "notifyPodsStale": true, + "spendLimit": 180 +} +``` + +## Related commands + +- [`runpodctl billing`](/runpodctl/reference/runpodctl-billing) +- [`runpodctl config`](/runpodctl/reference/runpodctl-config) diff --git a/runpodctl/reference/runpodctl-version.mdx b/runpodctl/reference/runpodctl-version.mdx index 4d7cf40e..85ca4b10 100644 --- a/runpodctl/reference/runpodctl-version.mdx +++ b/runpodctl/reference/runpodctl-version.mdx @@ -6,11 +6,17 @@ sidebarTitle: "version" Display the current version of `runpodctl` installed on your system. -```sh Command +```bash Command runpodctl version ``` +## Example output + +``` +runpodctl 2.1.6-400ac40 +``` + ## Related commands - [`runpodctl update`](/runpodctl/reference/runpodctl-update) diff --git a/runpodctl/transfer-files.mdx b/runpodctl/transfer-files.mdx deleted file mode 100644 index e69de29b..00000000 diff --git a/serverless/build-worker.mdx b/serverless/build-worker.mdx new file mode 100644 index 00000000..68cba3c9 --- /dev/null +++ b/serverless/build-worker.mdx @@ -0,0 +1,237 @@ +--- +title: "Build a custom worker" +sidebarTitle: "Build a custom worker" +description: "Write a handler function, build a worker image, and deploy your own Serverless endpoint." +--- + +This tutorial walks you through building a custom Serverless worker from scratch. You'll write a handler function, package it in a Docker container, and deploy it to Runpod. + + +For an even faster start, clone or download the [worker-basic](https://github.com/runpod-workers/worker-basic) repository for a pre-configured template. After cloning, skip to [Step 6](#step-6-build-and-push-your-docker-image) to deploy and test. + + +## Requirements + +* You've [created a Runpod account](/get-started/manage-accounts). +* You've installed [Python 3.x](https://www.python.org/downloads/) and [Docker](https://docs.docker.com/get-started/get-docker/) on your local machine and configured them for your command line. + +## Step 1: Create project files + +Create a new directory with empty files for your project: + +```bash +mkdir serverless-quickstart && cd serverless-quickstart +touch handler.py Dockerfile requirements.txt test_input.json +``` + +## Step 2: Install the Serverless SDK + +Create a virtual environment and install the Serverless SDK + +```bash +python3 -m venv .venv +source .venv/bin/activate +pip install runpod +``` + +## Step 3: Create a handler function + +Add the following code to `handler.py`: + +```python handler.py +import runpod +import time + +def handler(event): +# This function processes incoming requests to your Serverless endpoint. +# +# Args: +# event (dict): Contains the input data and request metadata +# +# Returns: +# Any: The result to be returned to the client + + # Extract input data + print(f"Worker Start") + input = event['input'] + + prompt = input.get('prompt') + seconds = input.get('seconds', 0) + + print(f"Received prompt: {prompt}") + print(f"Sleeping for {seconds} seconds...") + + # You can replace this sleep call with your own Python code + time.sleep(seconds) + + return prompt + +# Start the Serverless function when the script is run +if __name__ == '__main__': + runpod.serverless.start({'handler': handler }) +``` + +This is a bare-bones handler that processes a JSON object and outputs a `prompt` string contained in the `input` object. + + + +You can replace the `time.sleep(seconds)` call with your own Python code for generating images, text, or running any AI/ML workload. + + + +## Step 4: Create a test input file + +Add the following code to `test_input.json` to properly test your handler locally: + +```json test_input.json +{ + "input": { + "prompt": "Hey there!" + } +} +``` + +## Step 5: Test your handler function locally + +Run your handler function using your local terminal: + +```sh +python handler.py +``` + +You should see output similar to this: + +```text +--- Starting Serverless Worker | Version 1.7.9 --- +INFO | Using test_input.json as job input. +DEBUG | Retrieved local job: {'input': {'prompt': 'Hey there!'}, 'id': 'local_test'} +INFO | local_test | Started. +Worker Start +Received prompt: Hey there! +Sleeping for 0 seconds... +DEBUG | local_test | Handler output: Hey there! +DEBUG | local_test | run_job return: {'output': 'Hey there!'} +INFO | Job local_test completed successfully. +INFO | Job result: {'output': 'Hey there!'} +INFO | Local testing complete, exiting. +``` + +## Step 6: Create a Dockerfile + +Add the following content to `Dockerfile`: + + +New to Dockerfiles? Learn the fundamentals with our [introduction to containers](/tutorials/introduction/containers) tutorial series. + + +```dockerfile Dockerfile +FROM python:3.10-slim + +WORKDIR / + +# Install dependencies +RUN pip install --no-cache-dir runpod + +# Copy your handler file +COPY handler.py / + +# Start the container +CMD ["python3", "-u", "handler.py"] +``` + +## Step 7: Build and push your worker image + + + +Instead of building and pushing your image via Docker Hub, you can also [deploy your worker from a GitHub repository](/serverless/workers/github-integration). + + + +Before you can deploy your worker on Runpod Serverless, you need to push it to Docker Hub: + + + Build your Docker image, specifying the platform for Runpod deployment, replacing `[YOUR_USERNAME]` with your Docker username: + + ```sh + docker build --platform linux/amd64 --tag [YOUR_USERNAME]/serverless-test . + ``` + + + + ```sh + docker push [YOUR_USERNAME]/serverless-test:latest + ``` + + + +## Step 8: Deploy your worker using the Runpod console + +To deploy your worker to a Serverless endpoint: + +1. Go to the [Serverless section](https://www.console.runpod.io/serverless) of the Runpod console. +2. Click **New Endpoint**. +3. Click **Import from Docker Registry** +4. In the **Container Image** field, enter your Docker image URL: `docker.io/yourusername/serverless-test:latest`. +5. Click **Next** to proceed to endpoint configuration. +6. Configure your endpoint settings: + * (Optional) Enter a custom name for your endpoint, or use the randomly generated name. + * Make sure the **Endpoint Type** is set to **Queue**. + * Under **GPU Configuration**, check the box for **16 GB** GPUs. + * Leave the rest of the settings at their defaults. +7. Click **Deploy Endpoint**. + +The system will redirect you to a dedicated detail page for your new endpoint. + +## Step 9: Test your endpoint + +To test your endpoint, click the **Requests** tab in the endpoint detail page: + + + + + +On the left you should see the default test request: + +```json +{ + "input": { + "prompt": "Hello World" + } +} +``` + +Leave the default input as is and click **Run**. The system will take a few minutes to initialize your workers. + +When the workers finish processing your request, you should see output on the right side of the page similar to this: + +```json +{ + "delayTime": 15088, + "executionTime": 60, + "id": "04f01223-4aa2-40df-bdab-37e5caa43cbe-u1", + "output": "Hello World", + "status": "COMPLETED", + "workerId": "uhbbfre73gqjwh" +} +``` + + +Congratulations! You've successfully deployed and tested your first Serverless endpoint. + + +## Next steps + + + + Create more advanced handler functions. + + + Add AI/ML models and other dependencies to your worker. + + + Learn how to structure and send requests to your endpoint. + + + Configure and manage your Serverless endpoints. + + diff --git a/serverless/quickstart.mdx b/serverless/quickstart.mdx index 7706b009..346fb9e4 100644 --- a/serverless/quickstart.mdx +++ b/serverless/quickstart.mdx @@ -1,237 +1,309 @@ --- title: "Quickstart" sidebarTitle: "Quickstart" -description: "Write a handler function, build a worker image, create an endpoint, and send your first request." +description: "Deploy your first Serverless endpoint in 5 minutes using a ready-to-use template." --- - +This quickstart gets you running a Serverless endpoint on Runpod in minutes, using a ready-to-use template to deploy a language model and send a test request. -For an even faster start, clone or download the [worker-basic](https://github.com/runpod-workers/worker-basic) repository for a pre-configured template for building and deploying Serverless workers. After cloning the repository, skip to [step 6 of this tutorial](#step-6%3A-build-and-push-your-docker-image) to deploy and test the endpoint. +## Requirements - +- A [Runpod account](/get-started/manage-accounts) with available credits. +- A [Runpod API key](/get-started/api-keys). -## Requirements +## Step 1: Set up your environment + +Choose your preferred method for interacting with Runpod. If using the CLI or REST API, you'll need to configure your API key. -* You've [created a Runpod account](/get-started/manage-accounts). -* You've installed [Python 3.x](https://www.python.org/downloads/) and [Docker](https://docs.docker.com/get-started/get-docker/) on your local machine and configured them for your command line. + + -## Step 1: Create project files +Install and configure the Runpod CLI. -Create a new directory with empty files for your project: +**macOS/Linux:** ```bash -mkdir serverless-quickstart && cd serverless-quickstart -touch handler.py Dockerfile requirements.txt test_input.json +# Install runpodctl +curl -fsSL https://install.runpod.io | bash + +# Configure with your API key +runpodctl doctor ``` -## Step 2: Install the Serverless SDK +**Windows:** -Create a virtual environment and install the Serverless SDK +```powershell +# Install using PowerShell +iwr -useb https://install.runpod.io/windows | iex + +# Configure with your API key +runpodctl doctor +``` + +Verify the installation: ```bash -python3 -m venv .venv -source .venv/bin/activate -pip install runpod -``` - -## Step 3: Create a handler function - -Add the following code to `handler.py`: - -```python handler.py -import runpod -import time - -def handler(event): -# This function processes incoming requests to your Serverless endpoint. -# -# Args: -# event (dict): Contains the input data and request metadata -# -# Returns: -# Any: The result to be returned to the client - - # Extract input data - print(f"Worker Start") - input = event['input'] - - prompt = input.get('prompt') - seconds = input.get('seconds', 0) - - print(f"Received prompt: {prompt}") - print(f"Sleeping for {seconds} seconds...") - - # You can replace this sleep call with your own Python code - time.sleep(seconds) - - return prompt - -# Start the Serverless function when the script is run -if __name__ == '__main__': - runpod.serverless.start({'handler': handler }) -``` - -This is a bare-bones handler that processes a JSON object and outputs a `prompt` string contained in the `input` object. +runpodctl version +``` - + + -You can replace the `time.sleep(seconds)` call with your own Python code for generating images, text, or running any AI/ML workload. +Export your Runpod API key as an environment variable: - +```bash +export RUNPOD_API_KEY="your_api_key_here" +``` -## Step 4: Create a test input file + + -Add the following code to `test_input.json` to properly test your handler locally: +No setup required. Log in to the [Runpod console](https://www.runpod.io/console/serverless) to get started. -```json test_input.json -{ - "input": { - "prompt": "Hey there!" - } -} -``` + + + +## Step 2: Deploy an endpoint -## Step 5: Test your handler function locally +Deploy a vLLM worker with a small, fast language model. -Run your handler function using your local terminal: + + -```sh -python handler.py +First, create a Serverless template with the vLLM worker image: + +```bash +runpodctl template create \ + --name "vllm-qwen" \ + --image "runpod/worker-v1-vllm:stable-cuda12.1.0" \ + --env '{"MODEL_NAME": "Qwen/Qwen2.5-0.5B-Instruct"}' \ + --serverless ``` -You should see output similar to this: +Note the template ID from the output. Then create an endpoint using that template: -```text ---- Starting Serverless Worker | Version 1.7.9 --- -INFO | Using test_input.json as job input. -DEBUG | Retrieved local job: {'input': {'prompt': 'Hey there!'}, 'id': 'local_test'} -INFO | local_test | Started. -Worker Start -Received prompt: Hey there! -Sleeping for 0 seconds... -DEBUG | local_test | Handler output: Hey there! -DEBUG | local_test | run_job return: {'output': 'Hey there!'} -INFO | Job local_test completed successfully. -INFO | Job result: {'output': 'Hey there!'} -INFO | Local testing complete, exiting. +```bash +runpodctl serverless create \ + --name "my-first-endpoint" \ + --template-id YOUR_TEMPLATE_ID \ + --gpu-id "NVIDIA GeForce RTX 4090" \ + --workers-min 0 \ + --workers-max 3 ``` -## Step 6: Create a Dockerfile +The output includes your endpoint ID: -Add the following content to `Dockerfile`: +``` +Endpoint created successfully +ID: abc123xyz +Name: my-first-endpoint +``` - -New to Dockerfiles? Learn the fundamentals with our [introduction to containers](/tutorials/introduction/containers) tutorial series. - + + -```dockerfile Dockerfile -FROM python:3.10-slim +First, create a template using the vLLM worker image: -WORKDIR / +```bash +curl --request POST \ + --url https://rest.runpod.io/v1/templates \ + --header "Authorization: Bearer $RUNPOD_API_KEY" \ + --header "Content-Type: application/json" \ + --data '{ + "name": "vllm-qwen", + "imageName": "runpod/worker-v1-vllm:stable-cuda12.1.0", + "isServerless": true, + "env": { + "MODEL_NAME": "Qwen/Qwen2.5-0.5B-Instruct" + } + }' +``` + +Note the `id` from the response. Then create an endpoint using that template: -# Install dependencies -RUN pip install --no-cache-dir runpod +```bash +curl --request POST \ + --url https://rest.runpod.io/v1/endpoints \ + --header "Authorization: Bearer $RUNPOD_API_KEY" \ + --header "Content-Type: application/json" \ + --data '{ + "name": "my-first-endpoint", + "templateId": "YOUR_TEMPLATE_ID", + "gpuTypeIds": ["NVIDIA GeForce RTX 4090", "NVIDIA L4", "NVIDIA RTX A4000"], + "workersMin": 0, + "workersMax": 3, + "idleTimeout": 5 + }' +``` -# Copy your handler file -COPY handler.py / +The response includes your endpoint ID: -# Start the container -CMD ["python3", "-u", "handler.py"] +```json +{ + "id": "abc123xyz", + "name": "my-first-endpoint", + ... +} ``` -## Step 7: Build and push your worker image + + - +1. Go to the [Serverless section](https://www.runpod.io/console/serverless) and click **New Endpoint**. +2. Under **The Hub**, click **vLLM**. +3. Click **Deploy vX.X.X**. +4. In the **Model** field, enter: `Qwen/Qwen2.5-0.5B-Instruct` +5. Click **Next** then **Create Endpoint**. +6. Once deployed, note your **Endpoint ID** from the endpoint details page—you'll need it for API requests. -Instead of building and pushing your image via Docker Hub, you can also [deploy your worker from a GitHub repository](/serverless/workers/github-integration). + + - +Your endpoint will begin initializing. This takes 1-2 minutes while Runpod provisions resources and loads the model. -Before you can deploy your worker on Runpod Serverless, you need to push it to Docker Hub: - - - Build your Docker image, specifying the platform for Runpod deployment, replacing `[YOUR_USERNAME]` with your Docker username: - - ```sh - docker build --platform linux/amd64 --tag [YOUR_USERNAME]/serverless-test . - ``` - - - - ```sh - docker push [YOUR_USERNAME]/serverless-test:latest - ``` - - - -## Step 8: Deploy your worker using the Runpod console - -To deploy your worker to a Serverless endpoint: - -1. Go to the [Serverless section](https://www.console.runpod.io/serverless) of the Runpod console. -2. Click **New Endpoint**. -3. Click **Import from Docker Registry** -4. In the **Container Image** field, enter your Docker image URL: `docker.io/yourusername/serverless-test:latest`. -5. Click **Next** to proceed to endpoint configuration. -6. Configure your endpoint settings: - * (Optional) Enter a custom name for your endpoint, or use the randomly generated name. - * Make sure the **Endpoint Type** is set to **Queue**. - * Under **GPU Configuration**, check the box for **16 GB** GPUs. - * Leave the rest of the settings at their defaults. -7. Click **Deploy Endpoint**. - -The system will redirect you to a dedicated detail page for your new endpoint. - -## Step 9: Test your endpoint - -To test your endpoint, click the **Requests** tab in the endpoint detail page: - - - - - -On the left you should see the default test request: +## Step 3: Send a request -```json -{ +Once your endpoint shows **Ready** status, send a test request. If you haven't already, export your API key in your terminal: + +```bash +export RUNPOD_API_KEY="your_api_key_here" +``` + + + + +Run this command in your terminal, replacing `YOUR_ENDPOINT_ID` with your actual endpoint ID: + +```bash +curl --request POST \ + --url "https://api.runpod.ai/v2/YOUR_ENDPOINT_ID/runsync" \ + --header "Authorization: Bearer $RUNPOD_API_KEY" \ + --header "Content-Type: application/json" \ + --data '{ "input": { - "prompt": "Hello World" + "prompt": "What is the capital of France?", + "max_tokens": 100 } -} + }' +``` + + + + +Create a file called `test_endpoint.py` and paste the following code: + +```python test_endpoint.py +import requests +import os + +ENDPOINT_ID = "YOUR_ENDPOINT_ID" # Replace with your endpoint ID +API_KEY = os.environ.get("RUNPOD_API_KEY") + +if not API_KEY: + raise ValueError("RUNPOD_API_KEY environment variable not set") + +response = requests.post( + f"https://api.runpod.ai/v2/{ENDPOINT_ID}/runsync", + headers={"Authorization": f"Bearer {API_KEY}"}, + json={ + "input": { + "prompt": "What is the capital of France?", + "max_tokens": 100 + } + } +) + +print(response.json()) +``` + +Install dependencies and run the script: + +```bash +pip install requests +python test_endpoint.py ``` -Leave the default input as is and click **Run**. The system will take a few minutes to initialize your workers. + + -When the workers finish processing your request, you should see output on the right side of the page similar to this: +You should receive a response like this: ```json { - "delayTime": 15088, - "executionTime": 60, - "id": "04f01223-4aa2-40df-bdab-37e5caa43cbe-u1", - "output": "Hello World", - "status": "COMPLETED", - "workerId": "uhbbfre73gqjwh" + "id": "sync-abc123-xyz", + "status": "COMPLETED", + "output": { + "text": "The capital of France is Paris.", + ... + } } ``` + +The first request may take 30-60 seconds as the worker loads the model into GPU memory. Subsequent requests will complete in just a few seconds until the worker scales down due to inactivity. + + +## Step 4: Clean up + +To avoid ongoing charges, delete your endpoint when you're done testing. + + + + +List your endpoints to find the ID: + +```bash +runpodctl serverless list +``` + +Delete the endpoint: + +```bash +runpodctl serverless delete YOUR_ENDPOINT_ID +``` + +Optionally, delete the template you created: + +```bash +runpodctl template delete YOUR_TEMPLATE_ID +``` + + + + +```bash +curl --request DELETE \ + --url "https://rest.runpod.io/v1/endpoints/YOUR_ENDPOINT_ID" \ + --header "Authorization: Bearer $RUNPOD_API_KEY" +``` + + + + +1. Go to the [Serverless section](https://www.runpod.io/console/serverless). +2. Click the three dots on your endpoint and select **Delete Endpoint**. +3. Type the endpoint name to confirm. + + + + -Congratulations! You've successfully deployed and tested your first Serverless endpoint. +You've successfully deployed and tested your first Serverless endpoint. ## Next steps - - Create more advanced handler functions. - - - Add AI/ML models and other dependencies to your worker. + + Create your own handler function and Docker image. - Learn how to structure and send requests to your endpoint. + Learn about sync, async, and streaming requests. + + + Configure scaling, timeouts, and GPU selection. - - Configure and manage your Serverless endpoints. + + Customize your vLLM deployment for different models. diff --git a/serverless/troubleshooting.mdx b/serverless/troubleshooting.mdx index d55a91d3..d39cdd5a 100644 --- a/serverless/troubleshooting.mdx +++ b/serverless/troubleshooting.mdx @@ -128,6 +128,23 @@ If your vLLM worker runs out of memory: | 404 Not Found | Wrong endpoint URL | Use the format `https://api.runpod.ai/v2/ENDPOINT_ID/openai/v1` | | Connection refused | Endpoint not ready | Wait for workers to initialize | +### Slow response times + +| Cause | Solution | +|-------|----------| +| Cold start | First request loads model into GPU memory. Subsequent requests are faster. | +| Underpowered GPU | Use datacenter GPUs (A100, H100) for large models in production. | +| High context length | Reduce prompt length or `max_tokens` value. | +| Queue delays | If `delayTime` is high, increase `workersMax` in endpoint settings. | + +### Garbled or incorrect output + +| Cause | Solution | +|-------|----------| +| Wrong chat template | Set `CUSTOM_CHAT_TEMPLATE` to match your model's expected format. | +| Missing tokenizer settings | Some models (e.g., Mistral) require specific tokenizer configuration. See [Configure vLLM](/serverless/vllm/configuration). | +| Truncated output | Increase `max_tokens` in your request. | + ## Load balancing endpoint issues ### "No workers available" error diff --git a/serverless/vllm/get-started.mdx b/serverless/vllm/get-started.mdx deleted file mode 100644 index 1f5a1491..00000000 --- a/serverless/vllm/get-started.mdx +++ /dev/null @@ -1,155 +0,0 @@ ---- -title: "Deploy vLLM on Runpod Serverless" -sidebarTitle: "Quickstart" -description: "Create a Serverless endpoint to serve LLM inference via API request." ---- - -## Requirements - -* [Runpod account](/get-started/manage-accounts). -* [Runpod API key](/get-started/api-keys). -* (For gated models) [Hugging Face access token](https://huggingface.co/docs/hub/en/security-tokens). - -## Step 1: Choose a model - -First, decide which LLM you want to deploy. The vLLM worker supports most models available on Hugging Face, including: - -* Llama 3 (e.g., `meta-llama/Llama-3.2-3B-Instruct`). -* Mistral (e.g., `mistralai/Ministral-8B-Instruct-2410`). -* Qwen3 (e.g., `Qwen/Qwen3-8B`). -* OpenChat (e.g., `openchat/openchat-3.5-0106`). -* Gemma (e.g., `google/gemma-3-1b-it`). -* DeepSeek-R1 (e.g., `deepseek-ai/DeepSeek-R1-Distill-Qwen-7B`). -* Phi-4 (e.g., `microsoft/Phi-4-mini-instruct`). - -For this tutorial, we'll use `openchat/openchat-3.5-0106`, but you can substitute this with [any compatible model](https://docs.vllm.ai/en/latest/models/supported_models.html). - - -Depending on the model you choose, you may need to [configure your endpoint](/serverless/vllm/configuration) with additional environment variables. - - -## Step 2: Deploy using the Runpod UI - -The easiest way to deploy a vLLM worker is through Runpod's ready-to-deploy repos: - -1. Find the [vLLM repo](https://console.runpod.io/hub/runpod-workers/worker-vllm) in the Runpod Hub. -2. Click **Deploy**, using the latest vLLM worker version. -3. In the **Model** field, end the model name: `openchat/openchat-3.5-0106`. -4. Click **Advanced** to expand the vLLM settings. -5. Set **Max Model Length** to `8192` (or an appropriate context length for your model). -6. Leave other settings at their defaults unless you have specific requirements, then click **Next**. -7. Click **Create Endpoint** - -Your endpoint will now begin initializing. This may take several minutes while Runpod provisions resources and downloads the selected model. - - - -For more details on how to optimize your endpoint, see [Endpoint configurations](/serverless/endpoints/endpoint-configurations). - - - -## Step 3: Find your endpoint ID - -Once deployment is complete, make a note of your **Endpoint ID**, as you'll need this to make API requests. - - - - - -## Step 4: Send a test request using the UI - -To test your worker, click the **Requests** tab in the endpoint detail page: - - - - - -On the left you should see the default test request: - -```json -{ - "input": { - "prompt": "Hello World" - } -} -``` - -Leave the default input as is and click **Run**. The system will take a few minutes to initialize your workers. - -When the workers finish processing your request, you should see output on the right side of the page similar to this: - -```json -{ - "delayTime": 638, - "executionTime": 3344, - "id": "f0706ead-c5ec-4689-937c-e21d5fbbca47-u1", - "output": [ - { - "choices": [ - { - "tokens": ["CHAT_RESPONSE"] - } - ], - "usage": { - "input": 3, - "output": 100 - } - } - ], - "status": "COMPLETED", - "workerId": "0e7o8fgmm9xgty" -} -``` - -## Step 5: Send a test request using the API - -To send a test request using the API, use the following command, replacing `YOUR_ENDPOINT_ID` and `YOUR_API_KEY` with your actual endpoint ID and API key: - -```bash -curl -X POST "https://api.runpod.ai/v2/YOUR_ENDPOINT_ID/runsync" \ - -H "Authorization: Bearer YOUR_API_KEY" \ - -H "Content-Type: application/json" \ - -d '{"input": {"prompt": "Hello World"}}' -``` - - -Congratulations! You've successfully deployed a vLLM worker on Runpod Serverless. You now have a powerful, scalable LLM inference API that's compatible with both the OpenAI client and Runpod's native API. - - -## Customize your deployment with environment variables (optional) - -If you need to customize your model deployment, you can edit your endpoint settings to add environment variables. Here are some useful environment variables you might want to set: - -* `MAX_MODEL_LEN`: Maximum context length (e.g., `16384`). -* `DTYPE`: Data type for model weights (`float16`, `bfloat16`, or `float32`). -* `GPU_MEMORY_UTILIZATION`: Controls VRAM usage (e.g., `0.95` for 95%). -* `CUSTOM_CHAT_TEMPLATE`: For models that need a custom chat template. -* `OPENAI_SERVED_MODEL_NAME_OVERRIDE`: Change the model name to use in OpenAI requests. - -To add or modify environment variables: - -1. Go to your endpoint details page. -2. Select **Manage**, then select **Edit Endpoint**. -3. Expand the **Public Environment Variables** section. -4. Add or edit your desired variables. -5. Click **Save Endpoint**. - -For a complete list of available environment variables, see the [vLLM environment variables reference](/serverless/vllm/environment-variables). - -You may also wish to adjust the input parameters for your request. For example, use the `max_tokens` parameter to increase the maximum number of tokens generated per response. To learn more, see [Send vLLM requests](/serverless/vllm/vllm-requests). - -## Troubleshooting - -If you encounter issues with your deployment: - -* **Worker fails to initialize**: Check that your model is compatible with vLLM and your GPU has enough VRAM. -* **Slow response times**: Consider using a more powerful GPU or optimizing your request parameters. -* **Out of memory errors**: Try increasing GPU size or reducing `MAX_MODEL_LEN`. -* **API errors**: Verify your endpoint ID and API key are correct. - -## Next steps - -* [Send requests using the Runpod API](/serverless/vllm/vllm-requests). -* [Learn about vLLM's OpenAI API compatibility](/serverless/vllm/openai-compatibility). -* [Customize your vLLM worker's handler function](/serverless/workers/handler-functions). -* [Build a custom worker for more specialized workloads](/serverless/workers/custom-worker). diff --git a/serverless/vllm/overview.mdx b/serverless/vllm/overview.mdx index 78babe63..620facd3 100644 --- a/serverless/vllm/overview.mdx +++ b/serverless/vllm/overview.mdx @@ -10,8 +10,8 @@ mode: "wide" vLLM workers deploy and serve large language models on Runpod Serverless with fast inference and automatic scaling. Deploy directly from the [Runpod Hub](https://console.runpod.io/hub/runpod-workers/worker-vllm) or customize using the [runpod-workers/worker-vllm](https://github.com/runpod-workers/worker-vllm) repository as a base. - - Deploy your first vLLM worker in minutes. + + Deploy your first vLLM endpoint in minutes. Configure your vLLM endpoint with environment variables. diff --git a/tests/TESTS.md b/tests/TESTS.md index 9ce2ac75..8fe8b5e3 100644 --- a/tests/TESTS.md +++ b/tests/TESTS.md @@ -65,8 +65,9 @@ Fast tests that don't require GPU deployments. Run these for quick validation. | sdk-python-install | Install the Runpod Python SDK | `import runpod` succeeds | | sdk-js-install | Install the Runpod JavaScript SDK | `require('runpod-sdk')` succeeds | | cli-install | Install runpodctl on your local machine | `runpodctl version` returns version | -| cli-configure | Configure runpodctl with your API key | `runpodctl config` shows configured key | -| cli-list-pods | List pods using runpodctl | `runpodctl get pods` returns list | +| cli-configure | Configure runpodctl with your API key | `runpodctl user` shows account info | +| cli-list-pods | List pods using runpodctl | `runpodctl pod list` returns list | +| cli-list-gpus | List available GPUs using runpodctl | `runpodctl gpu list` returns GPU types | | template-list | List all templates | API returns template array | | api-key-create | Create an API key with specific permissions | New API key ID returned | | pods-add-ssh-key | Add an SSH key to your Runpod account | Key appears in account | @@ -109,6 +110,7 @@ Run all smoke tests using local docs | ID | Goal | Expected Outcome | |----|------|------------------| +| serverless-quickstart | Complete the Serverless quickstart using runpodctl | Endpoint responds to /runsync request | | serverless-create-endpoint | Create a serverless endpoint | Endpoint ID returned | | serverless-serve-qwen | Create an endpoint to serve a Qwen model | Chat completion works | | serverless-custom-handler | Write a custom handler function and deploy it | Handler responds to request | @@ -226,11 +228,21 @@ Run all smoke tests using local docs | ID | Goal | Expected Outcome | |----|------|------------------| | cli-install | Install runpodctl on your local machine | `runpodctl version` returns version | -| cli-configure | Configure runpodctl with your API key | `runpodctl config` shows key | -| cli-list-pods | List pods using runpodctl | `runpodctl get pods` returns list | -| cli-create-pod | Create a pod using runpodctl | Pod ID returned | +| cli-doctor | Run first-time setup with runpodctl doctor | API key and SSH configured | +| cli-configure | Configure runpodctl with your API key | `runpodctl user` shows account info | +| cli-list-gpus | List available GPUs using runpodctl | `runpodctl gpu list` returns GPU types | +| cli-list-pods | List pods using runpodctl | `runpodctl pod list` returns list | +| cli-create-pod | Create a pod using runpodctl | `runpodctl pod create` returns Pod ID | +| cli-start-stop-pod | Start and stop a pod using runpodctl | `runpodctl pod start/stop` succeeds | +| cli-delete-pod | Delete a pod using runpodctl | `runpodctl pod delete` succeeds | +| cli-list-serverless | List serverless endpoints using runpodctl | `runpodctl serverless list` returns list | +| cli-create-serverless | Create a serverless endpoint using runpodctl | `runpodctl serverless create` returns endpoint ID | +| cli-list-templates | Search templates using runpodctl | `runpodctl template search` returns templates | +| cli-list-network-volumes | List network volumes using runpodctl | `runpodctl network-volume list` returns list | +| cli-hub-search | Search the Runpod Hub using runpodctl | `runpodctl hub search` returns results | | cli-send-file | Send a file to a Pod using runpodctl | File arrives on Pod | | cli-receive-file | Receive a file from a Pod using runpodctl | File downloaded locally | +| cli-billing | View billing history using runpodctl | `runpodctl billing` returns history | --- diff --git a/tutorials/introduction/containers.mdx b/tutorials/introduction/containers.mdx index 86a0269d..6178b2af 100644 --- a/tutorials/introduction/containers.mdx +++ b/tutorials/introduction/containers.mdx @@ -38,7 +38,7 @@ Key benefits include: Runpod uses containers extensively across its platform: -- **Serverless workers**: When you [deploy a Serverless endpoint](/serverless/quickstart), you provide a container image that defines how your [worker processes requests](/serverless/workers/overview). Your handler code runs inside the container, and Runpod automatically scales workers up and down based on demand. +- **Serverless workers**: When you [build a custom worker](/serverless/build-worker), you provide a container image that defines how your [worker processes requests](/serverless/workers/overview). Your handler code runs inside the container, and Runpod automatically scales workers up and down based on demand. - **Pods**: With [Pods](/pods/overview), you can bring your own container (BYOC) to [run long-running GPU workloads](/pods/manage-pods) like training, inference servers, or development environments. [Choose from pre-built templates](/pods/choose-a-pod) or deploy custom containers. - **Templates**: Runpod's [templates](/pods/templates/overview) are pre-configured container images optimized for specific tasks. You can [create custom templates](/pods/templates/create-custom-template) to standardize your container configurations across deployments. @@ -117,7 +117,7 @@ When you're ready to deploy containers on Runpod: - Learn about [Serverless workers](/serverless/workers/overview) for scalable, GPU-powered inference. - Review [creating Dockerfiles for Serverless](/serverless/workers/create-dockerfile) with Runpod-specific best practices. - Understand [endpoint configurations](/serverless/endpoints/endpoint-configurations) for optimizing performance and cost. -- Explore [deploying your first endpoint](/serverless/quickstart) to get started quickly. +- Explore [building a custom worker](/serverless/build-worker) to deploy your own code. **For Pods:** - Explore [Pods](/pods/overview) for long-running GPU workloads and development environments. diff --git a/tutorials/introduction/containers/create-dockerfiles.mdx b/tutorials/introduction/containers/create-dockerfiles.mdx index 332345ec..7e1a6166 100644 --- a/tutorials/introduction/containers/create-dockerfiles.mdx +++ b/tutorials/introduction/containers/create-dockerfiles.mdx @@ -234,6 +234,6 @@ Now that you can create Dockerfiles and build images, continue learning: - [Learn about data persistence](/tutorials/introduction/containers/persist-data) with Docker volumes. **Deploy on Runpod:** -- For Serverless: [Deploy your first endpoint](/serverless/quickstart) and learn about [worker deployment](/serverless/workers/deploy). +- For Serverless: [Build a custom worker](/serverless/build-worker) and learn about [worker deployment](/serverless/workers/deploy). - For Pods: [Run your first Pod](/tutorials/pods/run-your-first) and explore [connecting to Pods](/pods/connect-to-a-pod). - Review [creating Dockerfiles for Serverless](/serverless/workers/create-dockerfile) with production best practices. diff --git a/tutorials/serverless/run-your-first.mdx b/tutorials/serverless/run-your-first.mdx index 73bc70c1..870a82dc 100644 --- a/tutorials/serverless/run-your-first.mdx +++ b/tutorials/serverless/run-your-first.mdx @@ -207,4 +207,4 @@ Now that you've learned how to generate images with Serverless, consider explori - Learn how to create [synchronous requests](/serverless/endpoints/operations) using the `/runsync` endpoint for faster responses. - Explore [endpoint configurations](/serverless/endpoints/endpoint-configurations) to optimize performance and cost. - Discover how to [send requests](/serverless/endpoints/send-requests) with advanced parameters and webhook notifications. -- Try deploying your own [custom worker](/serverless/quickstart) for specialized AI models. +- Try deploying your own [custom worker](/serverless/build-worker) for specialized AI models.