From d2b73cc14b7c4ac4ae55f67c8ab13462d9a4db5f Mon Sep 17 00:00:00 2001 From: technillogue Date: Tue, 17 Feb 2026 16:51:22 -0500 Subject: [PATCH 01/51] split uploader into a separate class --- .../lib/cli/api/beta/jig/_uploader.py | 287 ++++++++++++++++++ src/together/lib/cli/api/beta/jig/volumes.py | 266 +--------------- 2 files changed, 288 insertions(+), 265 deletions(-) create mode 100644 src/together/lib/cli/api/beta/jig/_uploader.py diff --git a/src/together/lib/cli/api/beta/jig/_uploader.py b/src/together/lib/cli/api/beta/jig/_uploader.py new file mode 100644 index 00000000..58e28d38 --- /dev/null +++ b/src/together/lib/cli/api/beta/jig/_uploader.py @@ -0,0 +1,287 @@ +"""Volume management CLI commands for jig.""" + +from __future__ import annotations + +import time +import asyncio +import itertools +from typing import Any +from pathlib import Path + +import click +import httpx + +from together import Together +from together.lib.cli.api.beta.jig._config import ( + DEBUG, + MAX_UPLOAD_RETRIES, + MULTIPART_THRESHOLD_MB, + MULTIPART_CHUNK_SIZE_MB, + UPLOAD_CONCURRENCY_LIMIT, +) + + +@click.group() +@click.pass_context +def volumes(ctx: click.Context) -> None: + """Manage volumes""" + pass + + +# --- File upload --- + + +def format_filename(filename: str, max_len: int = 100) -> str: + if len(filename) <= max_len: + return filename + return "..." + filename[-(max_len - 3) :] + + +class Uploader: + """Helper to handle file upload""" + + chunk_size = MULTIPART_CHUNK_SIZE_MB * 1024 * 1024 + multipart_threshold = MULTIPART_THRESHOLD_MB * 1024 * 1024 + spinner_chars = "|/-\\" + + def __init__(self, client: Together) -> None: + self.client = client + # progress + self.start_time = time.time() + self.completed_files = 0 + self.uploaded_bytes = 0 + self.current_file = "" + self.total_bytes = 0 + self.total_files = 0 + # cycle through spinner chars forever + self.spinner_running = True + self.spinner_iter = itertools.cycle("|/-\\") + # these will be set in upload_files when event loop is running + self.semaphore: asyncio.Semaphore + self.progress_lock: asyncio.Lock + self.http_client: httpx.AsyncClient + + def update_progress(self) -> None: + spinner = next(self.spinner_iter) + + bytes_denominator = self.total_bytes or float("inf") + percent = int(100 * self.uploaded_bytes / bytes_denominator) + + display_file = format_filename(self.current_file) + + uploaded_mb = self.uploaded_bytes / (1024 * 1024) + total_mb = self.total_bytes / (1024 * 1024) + size_str = f"({uploaded_mb:.1f}MB/{total_mb:.1f}MB)" + + elapsed = time.time() - self.start_time + speed_str = "" + if elapsed > 0.5 and self.uploaded_bytes > 0: + speed_kbps = self.uploaded_bytes / elapsed / 1024 + speed_str = f"{speed_kbps:.1f} KB/s - " + if speed_kbps > 1024: + speed_str = f"{(speed_kbps / 1024):.1f} MB/s - " + + msg = f"\r{spinner} {percent}% - {speed_str}{display_file} {size_str} ({self.completed_files}/{self.total_files} files)" + + # \r moves cursor to start of line, \033[K clears from cursor to end of line + print(f"\r{msg}\033[K", end="", flush=True) # noqa: T201 + + async def increment_progress(self, bytes_count: int, filename: str = "", file_complete: bool = False) -> None: + async with self.progress_lock: + if bytes_count > 0: + self.uploaded_bytes += bytes_count + if DEBUG: + click.echo(f"\nDEBUG: bytes_count={bytes_count}, total={self.uploaded_bytes}") + if file_complete: + self.completed_files += 1 + if filename: + self.current_file = filename + self.update_progress() + + async def spinner_updater(self) -> None: + while self.spinner_running: + async with self.progress_lock: + self.update_progress() + await asyncio.sleep(0.1) + + async def upload_files(self, source_path: Path, volume_name: str) -> None: + """Upload all files from source directory with progress tracking""" + # these require a running event loop + self.semaphore = asyncio.Semaphore(UPLOAD_CONCURRENCY_LIMIT) + self.progress_lock = asyncio.Lock() + + source_prefix = f"{volume_name}/{source_path.name}" + files_to_upload: list[tuple[Path, str, int]] = [] + + for file_path in source_path.rglob("*"): + if file_path.is_file(): + rel_path = file_path.relative_to(source_path) + remote_path = f"{source_prefix}/{rel_path.as_posix()}" + file_size = file_path.stat().st_size + files_to_upload.append((file_path, remote_path, file_size)) + + if not files_to_upload: + raise ValueError(f"No files found in {source_path}") + + files_to_upload.sort(key=lambda x: x[2], reverse=True) + + self.total_bytes = sum(size for _, _, size in files_to_upload) + self.total_files = len(files_to_upload) + spinner_task = asyncio.create_task(self.spinner_updater()) + async with httpx.AsyncClient(timeout=300.0) as self.http_client: + try: + tasks = [self.upload_file_with_retry(fp, rp, fs) for fp, rp, fs in files_to_upload] + await asyncio.gather(*tasks) + finally: + self.spinner_running = False + await spinner_task + + elapsed_time = time.time() - self.start_time + click.echo(f"\n\N{CHECK MARK} Upload completed in {elapsed_time:.1f} seconds") + + async def upload_file_with_retry(self, file_path: Path, remote_path: str, file_size: int) -> None: + for attempt in range(MAX_UPLOAD_RETRIES): + # Snapshot progress before attempt + async with self.progress_lock: + snapshot_bytes = self.uploaded_bytes + + try: + if file_size >= self.multipart_threshold: + await self._upload_file_multipart(file_path, remote_path, file_size) + else: + await self._upload_file_simple(file_path, remote_path, file_size) + return + except Exception as e: + # Rollback to snapshot on failure + async with self.progress_lock: + self.uploaded_bytes = snapshot_bytes + if attempt == MAX_UPLOAD_RETRIES - 1: + raise RuntimeError( + f"Failed to upload {remote_path} after {MAX_UPLOAD_RETRIES} attempts: {e}" + ) from e + await asyncio.sleep(1 * (attempt + 1)) + + async def _upload_file_simple( + self, + file_path: Path, + remote_path: str, + file_size: int, + ) -> None: + """Upload a single file using simple upload""" + async with self.semaphore: + response = self.client._client.post( + "/storage/upload-request", + json={"filename": remote_path}, + headers=self.client.auth_headers, + ) + response.raise_for_status() + upload_data = response.json() + + upload_url = upload_data["upload_url"]["url"] + method = upload_data["upload_url"]["method"] + headers = upload_data["upload_url"].get("headers", {}) + + file_data = await asyncio.to_thread(Path(file_path).read_bytes) + + try: + resp = await self.http_client.request(method, upload_url, content=file_data, headers=headers) + resp.raise_for_status() + except Exception as e: + raise RuntimeError(f"Failed to upload {remote_path}: {e}") from e + + await self.increment_progress(max(file_size, 1), remote_path, file_complete=True) + + async def _upload_file_multipart( + self, + file_path: Path, + remote_path: str, + file_size: int, + ) -> None: + """Upload a file using multipart upload""" + parts_count = (file_size + self.chunk_size - 1) // self.chunk_size + + response = self.client._client.post( + "/storage/multipart/init", + json={"filename": remote_path, "parts_count": parts_count}, + headers=self.client.auth_headers, + ) + response.raise_for_status() + init_data = response.json() + + upload_id = init_data["upload_id"] + part_urls = init_data["part_upload_urls"] + + try: + completed_parts = await self._upload_parts(file_path, part_urls) + + self.client._client.post( + "/storage/multipart/complete", + json={ + "filename": remote_path, + "upload_id": upload_id, + "parts": completed_parts, + }, + headers=self.client.auth_headers, + ) + + await self.increment_progress(0, remote_path, file_complete=True) + except Exception: + try: + self.client._client.post( + "/storage/multipart/abort", + json={"filename": remote_path, "upload_id": upload_id}, + headers=self.client.auth_headers, + ) + except Exception as e: + click.echo(f"Failed to abort multipart upload request: {repr(e)}") + raise + + async def _upload_parts( + self, + file_path: Path, + part_urls: list[dict[str, Any]], + ) -> list[dict[str, Any]]: + """Upload file parts concurrently""" + + async def upload_part(part_info: dict[str, Any], data: bytes) -> dict[str, Any]: + err = None + async with self.semaphore: + part_number = part_info["part_number"] + url = part_info["url"] + method = part_info["method"] + headers = part_info.get("headers", {}) + + part_size = len(data) + + for attempt in range(MAX_UPLOAD_RETRIES): + try: + response = await self.http_client.request(method, url, content=data, headers=headers) + response.raise_for_status() + etag = response.headers.get("ETag", "").strip('"') + await self.increment_progress( + part_size, + f"{file_path.name} (part {part_number}/{len(part_urls)})", + ) + return {"part_number": part_number, "etag": etag} + except Exception as e: + err = e + if attempt < MAX_UPLOAD_RETRIES - 1: + await asyncio.sleep(1 * (attempt + 1)) + raise RuntimeError(f"Failed to upload part {part_number}: {err}") + + with open(file_path, "rb") as f: + tasks = [ + asyncio.create_task( + upload_part( + part_info=part_info, + # read file sequentially while uploads proceed + data=await asyncio.to_thread(f.read, self.chunk_size), + ) + ) + for part_info in part_urls + ] + + completed_parts = await asyncio.gather(*tasks) + return sorted(completed_parts, key=lambda x: x["part_number"]) + + diff --git a/src/together/lib/cli/api/beta/jig/volumes.py b/src/together/lib/cli/api/beta/jig/volumes.py index c1f58fd8..bcc6301c 100644 --- a/src/together/lib/cli/api/beta/jig/volumes.py +++ b/src/together/lib/cli/api/beta/jig/volumes.py @@ -3,25 +3,15 @@ from __future__ import annotations import json -import time import asyncio -import itertools -from typing import Any from pathlib import Path import click -import httpx from together import Together from together._exceptions import APIStatusError from together.lib.cli.api._utils import handle_api_errors -from together.lib.cli.api.beta.jig._config import ( - DEBUG, - MAX_UPLOAD_RETRIES, - MULTIPART_THRESHOLD_MB, - MULTIPART_CHUNK_SIZE_MB, - UPLOAD_CONCURRENCY_LIMIT, -) +from together.lib.cli.api.beta.jig._uploader import Uploader @click.group() @@ -34,260 +24,6 @@ def volumes(ctx: click.Context) -> None: # --- File upload --- -def format_filename(filename: str, max_len: int = 100) -> str: - if len(filename) <= max_len: - return filename - return "..." + filename[-(max_len - 3) :] - - -class Uploader: - """Helper to handle file upload""" - - chunk_size = MULTIPART_CHUNK_SIZE_MB * 1024 * 1024 - multipart_threshold = MULTIPART_THRESHOLD_MB * 1024 * 1024 - spinner_chars = "|/-\\" - - def __init__(self, client: Together) -> None: - self.client = client - # progress - self.start_time = time.time() - self.completed_files = 0 - self.uploaded_bytes = 0 - self.current_file = "" - self.total_bytes = 0 - self.total_files = 0 - # cycle through spinner chars forever - self.spinner_running = True - self.spinner_iter = itertools.cycle("|/-\\") - # these will be set in upload_files when event loop is running - self.semaphore: asyncio.Semaphore - self.progress_lock: asyncio.Lock - self.http_client: httpx.AsyncClient - - def update_progress(self) -> None: - spinner = next(self.spinner_iter) - - bytes_denominator = self.total_bytes or float("inf") - percent = int(100 * self.uploaded_bytes / bytes_denominator) - - display_file = format_filename(self.current_file) - - uploaded_mb = self.uploaded_bytes / (1024 * 1024) - total_mb = self.total_bytes / (1024 * 1024) - size_str = f"({uploaded_mb:.1f}MB/{total_mb:.1f}MB)" - - elapsed = time.time() - self.start_time - speed_str = "" - if elapsed > 0.5 and self.uploaded_bytes > 0: - speed_kbps = self.uploaded_bytes / elapsed / 1024 - speed_str = f"{speed_kbps:.1f} KB/s - " - if speed_kbps > 1024: - speed_str = f"{(speed_kbps / 1024):.1f} MB/s - " - - msg = f"\r{spinner} {percent}% - {speed_str}{display_file} {size_str} ({self.completed_files}/{self.total_files} files)" - - # \r moves cursor to start of line, \033[K clears from cursor to end of line - print(f"\r{msg}\033[K", end="", flush=True) # noqa: T201 - - async def increment_progress(self, bytes_count: int, filename: str = "", file_complete: bool = False) -> None: - async with self.progress_lock: - if bytes_count > 0: - self.uploaded_bytes += bytes_count - if DEBUG: - click.echo(f"\nDEBUG: bytes_count={bytes_count}, total={self.uploaded_bytes}") - if file_complete: - self.completed_files += 1 - if filename: - self.current_file = filename - self.update_progress() - - async def spinner_updater(self) -> None: - while self.spinner_running: - async with self.progress_lock: - self.update_progress() - await asyncio.sleep(0.1) - - async def upload_files(self, source_path: Path, volume_name: str) -> None: - """Upload all files from source directory with progress tracking""" - # these require a running event loop - self.semaphore = asyncio.Semaphore(UPLOAD_CONCURRENCY_LIMIT) - self.progress_lock = asyncio.Lock() - - source_prefix = f"{volume_name}/{source_path.name}" - files_to_upload: list[tuple[Path, str, int]] = [] - - for file_path in source_path.rglob("*"): - if file_path.is_file(): - rel_path = file_path.relative_to(source_path) - remote_path = f"{source_prefix}/{rel_path.as_posix()}" - file_size = file_path.stat().st_size - files_to_upload.append((file_path, remote_path, file_size)) - - if not files_to_upload: - raise ValueError(f"No files found in {source_path}") - - files_to_upload.sort(key=lambda x: x[2], reverse=True) - - self.total_bytes = sum(size for _, _, size in files_to_upload) - self.total_files = len(files_to_upload) - spinner_task = asyncio.create_task(self.spinner_updater()) - async with httpx.AsyncClient(timeout=300.0) as self.http_client: - try: - tasks = [self.upload_file_with_retry(fp, rp, fs) for fp, rp, fs in files_to_upload] - await asyncio.gather(*tasks) - finally: - self.spinner_running = False - await spinner_task - - elapsed_time = time.time() - self.start_time - click.echo(f"\n\N{CHECK MARK} Upload completed in {elapsed_time:.1f} seconds") - - async def upload_file_with_retry(self, file_path: Path, remote_path: str, file_size: int) -> None: - for attempt in range(MAX_UPLOAD_RETRIES): - # Snapshot progress before attempt - async with self.progress_lock: - snapshot_bytes = self.uploaded_bytes - - try: - if file_size >= self.multipart_threshold: - await self._upload_file_multipart(file_path, remote_path, file_size) - else: - await self._upload_file_simple(file_path, remote_path, file_size) - return - except Exception as e: - # Rollback to snapshot on failure - async with self.progress_lock: - self.uploaded_bytes = snapshot_bytes - if attempt == MAX_UPLOAD_RETRIES - 1: - raise RuntimeError( - f"Failed to upload {remote_path} after {MAX_UPLOAD_RETRIES} attempts: {e}" - ) from e - await asyncio.sleep(1 * (attempt + 1)) - - async def _upload_file_simple( - self, - file_path: Path, - remote_path: str, - file_size: int, - ) -> None: - """Upload a single file using simple upload""" - async with self.semaphore: - response = self.client._client.post( - "/storage/upload-request", - json={"filename": remote_path}, - headers=self.client.auth_headers, - ) - response.raise_for_status() - upload_data = response.json() - - upload_url = upload_data["upload_url"]["url"] - method = upload_data["upload_url"]["method"] - headers = upload_data["upload_url"].get("headers", {}) - - file_data = await asyncio.to_thread(Path(file_path).read_bytes) - - try: - resp = await self.http_client.request(method, upload_url, content=file_data, headers=headers) - resp.raise_for_status() - except Exception as e: - raise RuntimeError(f"Failed to upload {remote_path}: {e}") from e - - await self.increment_progress(max(file_size, 1), remote_path, file_complete=True) - - async def _upload_file_multipart( - self, - file_path: Path, - remote_path: str, - file_size: int, - ) -> None: - """Upload a file using multipart upload""" - parts_count = (file_size + self.chunk_size - 1) // self.chunk_size - - response = self.client._client.post( - "/storage/multipart/init", - json={"filename": remote_path, "parts_count": parts_count}, - headers=self.client.auth_headers, - ) - response.raise_for_status() - init_data = response.json() - - upload_id = init_data["upload_id"] - part_urls = init_data["part_upload_urls"] - - try: - completed_parts = await self._upload_parts(file_path, part_urls) - - self.client._client.post( - "/storage/multipart/complete", - json={ - "filename": remote_path, - "upload_id": upload_id, - "parts": completed_parts, - }, - headers=self.client.auth_headers, - ) - - await self.increment_progress(0, remote_path, file_complete=True) - except Exception: - try: - self.client._client.post( - "/storage/multipart/abort", - json={"filename": remote_path, "upload_id": upload_id}, - headers=self.client.auth_headers, - ) - except Exception as e: - click.echo(f"Failed to abort multipart upload request: {repr(e)}") - raise - - async def _upload_parts( - self, - file_path: Path, - part_urls: list[dict[str, Any]], - ) -> list[dict[str, Any]]: - """Upload file parts concurrently""" - - async def upload_part(part_info: dict[str, Any], data: bytes) -> dict[str, Any]: - err = None - async with self.semaphore: - part_number = part_info["part_number"] - url = part_info["url"] - method = part_info["method"] - headers = part_info.get("headers", {}) - - part_size = len(data) - - for attempt in range(MAX_UPLOAD_RETRIES): - try: - response = await self.http_client.request(method, url, content=data, headers=headers) - response.raise_for_status() - etag = response.headers.get("ETag", "").strip('"') - await self.increment_progress( - part_size, - f"{file_path.name} (part {part_number}/{len(part_urls)})", - ) - return {"part_number": part_number, "etag": etag} - except Exception as e: - err = e - if attempt < MAX_UPLOAD_RETRIES - 1: - await asyncio.sleep(1 * (attempt + 1)) - raise RuntimeError(f"Failed to upload part {part_number}: {err}") - - with open(file_path, "rb") as f: - tasks = [ - asyncio.create_task( - upload_part( - part_info=part_info, - # read file sequentially while uploads proceed - data=await asyncio.to_thread(f.read, self.chunk_size), - ) - ) - for part_info in part_urls - ] - - completed_parts = await asyncio.gather(*tasks) - return sorted(completed_parts, key=lambda x: x["part_number"]) - - async def _create_volume(client: Together, name: str, source: str) -> None: """Create a volume and upload files""" source_path = Path(source) From 828cec4d0e10767594235a9f512c4f3e63f9dfed Mon Sep 17 00:00:00 2001 From: technillogue Date: Tue, 17 Feb 2026 16:55:04 -0500 Subject: [PATCH 02/51] inline secrets into jig --- src/together/lib/cli/api/beta/jig/jig.py | 148 +++++++++++++++++-- src/together/lib/cli/api/beta/jig/secrets.py | 138 ----------------- 2 files changed, 136 insertions(+), 150 deletions(-) delete mode 100644 src/together/lib/cli/api/beta/jig/secrets.py diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index f540ee35..a69ad3c1 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -3,30 +3,23 @@ from __future__ import annotations import json -import time import shlex import shutil import subprocess +import time +from dataclasses import asdict from enum import Enum -from typing import Any, Callable, Optional from pathlib import Path -from dataclasses import asdict +from typing import Any, Callable, Optional from urllib.parse import urlparse import click - from together import Together from together._exceptions import APIStatusError from together.lib.cli.api._utils import handle_api_errors -from together.types.beta.deployment import Deployment +from together.lib.cli.api.beta.jig._config import DEBUG, WARMUP_DEST, WARMUP_ENV_NAME, Config, State from together.lib.cli.api.beta.jig._utils import format_deployment_status -from together.lib.cli.api.beta.jig._config import ( - DEBUG, - WARMUP_DEST, - WARMUP_ENV_NAME, - State, - Config, -) +from together.types.beta.deployment import Deployment from together.types.beta.jig.queue_submit_response import QueueSubmitResponse # Managed dockerfile marker - if this is the first line, jig will regenerate the file @@ -932,3 +925,134 @@ def list_deployments(ctx: click.Context) -> None: client: Together = ctx.obj response = client.beta.jig.with_raw_response.list() click.echo(json.dumps(response.json(), indent=2)) + + +# --- Secrets -- + + +@click.group() +@click.pass_context +def secrets(ctx: click.Context) -> None: + """Manage deployment secrets""" + pass + + +@secrets.command("set") +@click.pass_context +@click.option("--name", required=True, help="Secret name") +@click.option("--value", required=True, help="Secret value") +@click.option("--description", default="", help="Secret description") +@click.option("-c", "--config", "config_path", default=None, help="Configuration file path") +@handle_api_errors("Secrets") +def secrets_set( + ctx: click.Context, + name: str, + value: str, + description: str, + config_path: str | None, +) -> None: + """Set a secret (create or update)""" + client: Together = ctx.obj + config = Config.find(config_path) + state = State.load(config._path.parent, config.model_name) + + deployment_secret_name = f"{config.model_name}-{name}" + + try: + client.beta.jig.secrets.retrieve(deployment_secret_name) + # Secret exists, update it + client.beta.jig.secrets.update( + deployment_secret_name, + name=deployment_secret_name, + description=description, + value=value, + ) + click.echo(f"\N{CHECK MARK} Updated secret: '{name}'") + except APIStatusError as e: + if hasattr(e, "status_code") and e.status_code == 404: + click.echo("\N{ROCKET} Creating new secret") + client.beta.jig.secrets.create( + name=deployment_secret_name, + value=value, + description=description, + ) + click.echo(f"\N{CHECK MARK} Created secret: {name}") + else: + raise + + state.secrets[name] = deployment_secret_name + state.save() + + +@secrets.command("unset") +@click.pass_context +@click.option("--name", required=True, help="Secret name to remove") +@click.option("-c", "--config", "config_path", default=None, help="Configuration file path") +@handle_api_errors("Secrets") +def secrets_unset( + ctx: click.Context, # noqa: ARG001 + name: str, + config_path: str | None, +) -> None: + """Remove a secret from both remote and local state""" + config = Config.find(config_path) + state = State.load(config._path.parent, config.model_name) + + if state.secrets.pop(name, ""): + state.save() + click.echo(f"\N{CHECK MARK} Deleted secret '{name}' from local state") + else: + click.echo(f"\N{CROSS MARK} Secret '{name}' is not set") + + +@secrets.command("list") +@click.pass_context +@click.option("-c", "--config", "config_path", default=None, help="Configuration file path") +@handle_api_errors("Secrets") +def secrets_list( + ctx: click.Context, + config_path: str | None, +) -> None: + """List all secrets with sync status""" + client: Together = ctx.obj + config = Config.find(config_path) + state = State.load(config._path.parent, config.model_name) + + prefix = f"{config.model_name}-" + + # Get remote secrets for this deployment + remote_response = client.beta.jig.secrets.list() + remote_secrets: set[str] = set() + + if hasattr(remote_response, "data") and remote_response.data: + for secret in remote_response.data: + secret_name = getattr(secret, "name", None) + if secret_name and secret_name.startswith(prefix): + # Strip prefix to get local name + remote_secrets.add(secret_name[len(prefix) :]) + + # Get local secrets + local_secrets = set(state.secrets.keys()) + + # Combine all secrets + all_secrets = local_secrets | remote_secrets + + if not all_secrets: + click.echo(f"\N{INFORMATION SOURCE} No secrets configured for deployment '{config.model_name}'") + return + + click.echo(f"\N{INFORMATION SOURCE} Secrets for deployment '{config.model_name}':") + click.echo() + + for name in sorted(all_secrets): + in_local = name in local_secrets + in_remote = name in remote_secrets + + if in_local and in_remote: + status = click.style("synced", fg="green") + elif in_local and not in_remote: + status = click.style("local only", fg="yellow") + else: # in_remote and not in_local + status = click.style("remote only", fg="yellow") + + click.echo(f" - {name} [{status}]") diff --git a/src/together/lib/cli/api/beta/jig/secrets.py b/src/together/lib/cli/api/beta/jig/secrets.py deleted file mode 100644 index 993b588e..00000000 --- a/src/together/lib/cli/api/beta/jig/secrets.py +++ /dev/null @@ -1,138 +0,0 @@ -"""Secrets management CLI commands for jig.""" - -from __future__ import annotations - -import click - -from together import Together -from together._exceptions import APIStatusError -from together.lib.cli.api._utils import handle_api_errors -from together.lib.cli.api.beta.jig._config import State, Config - - -@click.group() -@click.pass_context -def secrets(ctx: click.Context) -> None: - """Manage deployment secrets""" - pass - - -@secrets.command("set") -@click.pass_context -@click.option("--name", required=True, help="Secret name") -@click.option("--value", required=True, help="Secret value") -@click.option("--description", default="", help="Secret description") -@click.option("-c", "--config", "config_path", default=None, help="Configuration file path") -@handle_api_errors("Secrets") -def secrets_set( - ctx: click.Context, - name: str, - value: str, - description: str, - config_path: str | None, -) -> None: - """Set a secret (create or update)""" - client: Together = ctx.obj - config = Config.find(config_path) - state = State.load(config._path.parent, config.model_name) - - deployment_secret_name = f"{config.model_name}-{name}" - - try: - client.beta.jig.secrets.retrieve(deployment_secret_name) - # Secret exists, update it - client.beta.jig.secrets.update( - deployment_secret_name, - name=deployment_secret_name, - description=description, - value=value, - ) - click.echo(f"\N{CHECK MARK} Updated secret: '{name}'") - except APIStatusError as e: - if hasattr(e, "status_code") and e.status_code == 404: - click.echo("\N{ROCKET} Creating new secret") - client.beta.jig.secrets.create( - name=deployment_secret_name, - value=value, - description=description, - ) - click.echo(f"\N{CHECK MARK} Created secret: {name}") - else: - raise - - state.secrets[name] = deployment_secret_name - state.save() - - -@secrets.command("unset") -@click.pass_context -@click.option("--name", required=True, help="Secret name to remove") -@click.option("-c", "--config", "config_path", default=None, help="Configuration file path") -@handle_api_errors("Secrets") -def secrets_unset( - ctx: click.Context, # noqa: ARG001 - name: str, - config_path: str | None, -) -> None: - """Remove a secret from both remote and local state""" - config = Config.find(config_path) - state = State.load(config._path.parent, config.model_name) - - if state.secrets.pop(name, ""): - state.save() - click.echo(f"\N{CHECK MARK} Deleted secret '{name}' from local state") - else: - click.echo(f"\N{CROSS MARK} Secret '{name}' is not set") - - -@secrets.command("list") -@click.pass_context -@click.option("-c", "--config", "config_path", default=None, help="Configuration file path") -@handle_api_errors("Secrets") -def secrets_list( - ctx: click.Context, - config_path: str | None, -) -> None: - """List all secrets with sync status""" - client: Together = ctx.obj - config = Config.find(config_path) - state = State.load(config._path.parent, config.model_name) - - prefix = f"{config.model_name}-" - - # Get remote secrets for this deployment - remote_response = client.beta.jig.secrets.list() - remote_secrets: set[str] = set() - - if hasattr(remote_response, "data") and remote_response.data: - for secret in remote_response.data: - secret_name = getattr(secret, "name", None) - if secret_name and secret_name.startswith(prefix): - # Strip prefix to get local name - remote_secrets.add(secret_name[len(prefix) :]) - - # Get local secrets - local_secrets = set(state.secrets.keys()) - - # Combine all secrets - all_secrets = local_secrets | remote_secrets - - if not all_secrets: - click.echo(f"\N{INFORMATION SOURCE} No secrets configured for deployment '{config.model_name}'") - return - - click.echo(f"\N{INFORMATION SOURCE} Secrets for deployment '{config.model_name}':") - click.echo() - - for name in sorted(all_secrets): - in_local = name in local_secrets - in_remote = name in remote_secrets - - if in_local and in_remote: - status = click.style("synced", fg="green") - elif in_local and not in_remote: - status = click.style("local only", fg="yellow") - else: # in_remote and not in_local - status = click.style("remote only", fg="yellow") - - click.echo(f" - {name} [{status}]") From fbd016d3b1442931f3d9be24dfcf36f5d2d497a5 Mon Sep 17 00:00:00 2001 From: technillogue Date: Tue, 17 Feb 2026 16:55:59 -0500 Subject: [PATCH 03/51] inline volumes into jig --- src/together/lib/cli/api/beta/jig/jig.py | 162 ++++++++++++++++++ src/together/lib/cli/api/beta/jig/volumes.py | 170 ------------------- 2 files changed, 162 insertions(+), 170 deletions(-) delete mode 100644 src/together/lib/cli/api/beta/jig/volumes.py diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index a69ad3c1..e3a5de85 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -2,6 +2,7 @@ from __future__ import annotations +import asyncio import json import shlex import shutil @@ -18,6 +19,7 @@ from together._exceptions import APIStatusError from together.lib.cli.api._utils import handle_api_errors from together.lib.cli.api.beta.jig._config import DEBUG, WARMUP_DEST, WARMUP_ENV_NAME, Config, State +from together.lib.cli.api.beta.jig._uploader import Uploader from together.lib.cli.api.beta.jig._utils import format_deployment_status from together.types.beta.deployment import Deployment from together.types.beta.jig.queue_submit_response import QueueSubmitResponse @@ -1056,3 +1058,163 @@ def secrets_list( status = click.style("remote only", fg="yellow") click.echo(f" - {name} [{status}]") + + +# --- Volumes --- +"""Volume management CLI commands for jig.""" + + +@click.group() +@click.pass_context +def volumes(ctx: click.Context) -> None: + """Manage volumes""" + pass + + +# --- File upload --- + + +async def _create_volume(client: Together, name: str, source: str) -> None: + """Create a volume and upload files""" + source_path = Path(source) + if not source_path.exists(): + raise ValueError(f"Source path does not exist: {source}") + if not source_path.is_dir(): + raise ValueError(f"Source path must be a directory: {source}") + + source_prefix = f"{name}/{source_path.name}" + + click.echo(f"\N{ROCKET} Creating volume '{name}' with source prefix '{source_prefix}'") + try: + volume_response = client.beta.jig.volumes.create( + name=name, + type="readOnly", + content={"type": "files", "source_prefix": source_prefix}, + ) + click.echo(f"\N{CHECK MARK} Volume created: {volume_response.id}") + except Exception as e: + raise RuntimeError(f"Failed to create volume: {e}") from e + + try: + await Uploader(client).upload_files(source_path, volume_name=name) + except Exception as e: + click.echo(f"\N{CROSS MARK} Upload failed: {e}") + click.echo(f"\N{WASTEBASKET} Cleaning up volume '{name}'") + try: + client.beta.jig.volumes.delete(name) + except Exception as cleanup_error: + click.echo(f"\N{WARNING SIGN} Failed to delete volume: {cleanup_error}") + raise + + +async def _update_volume(client: Together, name: str, source: str) -> None: + """Update a volume and re-upload files""" + source_path = Path(source) + if not source_path.exists(): + raise ValueError(f"Source path does not exist: {source}") + if not source_path.is_dir(): + raise ValueError(f"Source path must be a directory: {source}") + + try: + client.beta.jig.volumes.retrieve(name) + except APIStatusError as e: + if hasattr(e, "status_code") and e.status_code == 404: + raise ValueError(f"Volume '{name}' does not exist") from e + raise + + source_prefix = f"{name}/{source_path.name}" + + click.echo(f"\N{INFORMATION SOURCE} Uploading files for volume '{name}'") + await Uploader(client).upload_files(source_path, volume_name=name) + + click.echo(f"\N{INFORMATION SOURCE} Updating volume '{name}' with source prefix '{source_prefix}'") + client.beta.jig.volumes.update( + name, + content={"type": "files", "source_prefix": source_prefix}, + ) + click.echo("\N{CHECK MARK} Volume updated successfully") + + +# --- CLI Commands --- + + +@volumes.command("create") +@click.pass_context +@click.option("--name", required=True, help="Volume name") +@click.option("--source", required=True, help="Source directory path") +@handle_api_errors("Volumes") +def volumes_create( + ctx: click.Context, + name: str, + source: str, +) -> None: + """Create a volume and upload files""" + client: Together = ctx.obj + asyncio.run(_create_volume(client, name, source)) + + +@volumes.command("update") +@click.pass_context +@click.option("--name", required=True, help="Volume name") +@click.option("--source", required=True, help="New source directory path") +@handle_api_errors("Volumes") +def volumes_update( + ctx: click.Context, + name: str, + source: str, +) -> None: + """Update a volume and re-upload files""" + client: Together = ctx.obj + asyncio.run(_update_volume(client, name, source)) + + +@volumes.command("delete") +@click.pass_context +@click.option("--name", required=True, help="Volume name") +@handle_api_errors("Volumes") +def volumes_delete( + ctx: click.Context, + name: str, +) -> None: + """Delete a volume""" + client: Together = ctx.obj + + try: + client.beta.jig.volumes.delete(name) + click.echo(f"\N{CHECK MARK} Deleted volume '{name}'") + except APIStatusError as e: + if hasattr(e, "status_code") and e.status_code == 404: + click.echo(f"\N{CROSS MARK} Volume '{name}' not found") + return + raise + + +@volumes.command("describe") +@click.pass_context +@click.option("--name", required=True, help="Volume name") +@handle_api_errors("Volumes") +def volumes_describe( + ctx: click.Context, + name: str, +) -> None: + """Describe a volume""" + client: Together = ctx.obj + + try: + response = client.beta.jig.volumes.with_raw_response.retrieve(name) + click.echo(json.dumps(response.json(), indent=2)) + except APIStatusError as e: + if hasattr(e, "status_code") and e.status_code == 404: + click.echo(f"\N{CROSS MARK} Volume '{name}' not found") + return + raise + + +@volumes.command("list") +@click.pass_context +@handle_api_errors("Volumes") +def volumes_list(ctx: click.Context) -> None: + """List all volumes""" + client: Together = ctx.obj + response = client.beta.jig.volumes.with_raw_response.list() + click.echo(json.dumps(response.json(), indent=2)) diff --git a/src/together/lib/cli/api/beta/jig/volumes.py b/src/together/lib/cli/api/beta/jig/volumes.py deleted file mode 100644 index bcc6301c..00000000 --- a/src/together/lib/cli/api/beta/jig/volumes.py +++ /dev/null @@ -1,170 +0,0 @@ -"""Volume management CLI commands for jig.""" - -from __future__ import annotations - -import json -import asyncio -from pathlib import Path - -import click - -from together import Together -from together._exceptions import APIStatusError -from together.lib.cli.api._utils import handle_api_errors -from together.lib.cli.api.beta.jig._uploader import Uploader - - -@click.group() -@click.pass_context -def volumes(ctx: click.Context) -> None: - """Manage volumes""" - pass - - -# --- File upload --- - - -async def _create_volume(client: Together, name: str, source: str) -> None: - """Create a volume and upload files""" - source_path = Path(source) - if not source_path.exists(): - raise ValueError(f"Source path does not exist: {source}") - if not source_path.is_dir(): - raise ValueError(f"Source path must be a directory: {source}") - - source_prefix = f"{name}/{source_path.name}" - - click.echo(f"\N{ROCKET} Creating volume '{name}' with source prefix '{source_prefix}'") - try: - volume_response = client.beta.jig.volumes.create( - name=name, - type="readOnly", - content={"type": "files", "source_prefix": source_prefix}, - ) - click.echo(f"\N{CHECK MARK} Volume created: {volume_response.id}") - except Exception as e: - raise RuntimeError(f"Failed to create volume: {e}") from e - - try: - await Uploader(client).upload_files(source_path, volume_name=name) - except Exception as e: - click.echo(f"\N{CROSS MARK} Upload failed: {e}") - click.echo(f"\N{WASTEBASKET} Cleaning up volume '{name}'") - try: - client.beta.jig.volumes.delete(name) - except Exception as cleanup_error: - click.echo(f"\N{WARNING SIGN} Failed to delete volume: {cleanup_error}") - raise - - -async def _update_volume(client: Together, name: str, source: str) -> None: - """Update a volume and re-upload files""" - source_path = Path(source) - if not source_path.exists(): - raise ValueError(f"Source path does not exist: {source}") - if not source_path.is_dir(): - raise ValueError(f"Source path must be a directory: {source}") - - try: - client.beta.jig.volumes.retrieve(name) - except APIStatusError as e: - if hasattr(e, "status_code") and e.status_code == 404: - raise ValueError(f"Volume '{name}' does not exist") from e - raise - - source_prefix = f"{name}/{source_path.name}" - - click.echo(f"\N{INFORMATION SOURCE} Uploading files for volume '{name}'") - await Uploader(client).upload_files(source_path, volume_name=name) - - click.echo(f"\N{INFORMATION SOURCE} Updating volume '{name}' with source prefix '{source_prefix}'") - client.beta.jig.volumes.update( - name, - content={"type": "files", "source_prefix": source_prefix}, - ) - click.echo("\N{CHECK MARK} Volume updated successfully") - - -# --- CLI Commands --- - - -@volumes.command("create") -@click.pass_context -@click.option("--name", required=True, help="Volume name") -@click.option("--source", required=True, help="Source directory path") -@handle_api_errors("Volumes") -def volumes_create( - ctx: click.Context, - name: str, - source: str, -) -> None: - """Create a volume and upload files""" - client: Together = ctx.obj - asyncio.run(_create_volume(client, name, source)) - - -@volumes.command("update") -@click.pass_context -@click.option("--name", required=True, help="Volume name") -@click.option("--source", required=True, help="New source directory path") -@handle_api_errors("Volumes") -def volumes_update( - ctx: click.Context, - name: str, - source: str, -) -> None: - """Update a volume and re-upload files""" - client: Together = ctx.obj - asyncio.run(_update_volume(client, name, source)) - - -@volumes.command("delete") -@click.pass_context -@click.option("--name", required=True, help="Volume name") -@handle_api_errors("Volumes") -def volumes_delete( - ctx: click.Context, - name: str, -) -> None: - """Delete a volume""" - client: Together = ctx.obj - - try: - client.beta.jig.volumes.delete(name) - click.echo(f"\N{CHECK MARK} Deleted volume '{name}'") - except APIStatusError as e: - if hasattr(e, "status_code") and e.status_code == 404: - click.echo(f"\N{CROSS MARK} Volume '{name}' not found") - return - raise - - -@volumes.command("describe") -@click.pass_context -@click.option("--name", required=True, help="Volume name") -@handle_api_errors("Volumes") -def volumes_describe( - ctx: click.Context, - name: str, -) -> None: - """Describe a volume""" - client: Together = ctx.obj - - try: - response = client.beta.jig.volumes.with_raw_response.retrieve(name) - click.echo(json.dumps(response.json(), indent=2)) - except APIStatusError as e: - if hasattr(e, "status_code") and e.status_code == 404: - click.echo(f"\N{CROSS MARK} Volume '{name}' not found") - return - raise - - -@volumes.command("list") -@click.pass_context -@handle_api_errors("Volumes") -def volumes_list(ctx: click.Context) -> None: - """List all volumes""" - client: Together = ctx.obj - response = client.beta.jig.volumes.with_raw_response.list() - click.echo(json.dumps(response.json(), indent=2)) From 39f885aecd3b67e4fff5af5b2ad61413cbff983f Mon Sep 17 00:00:00 2001 From: technillogue Date: Tue, 17 Feb 2026 17:31:14 -0500 Subject: [PATCH 04/51] simplify status format utils slightly --- src/together/lib/cli/api/beta/jig/__init__.py | 19 +++++----- src/together/lib/cli/api/beta/jig/_utils.py | 37 ++++++++----------- 2 files changed, 24 insertions(+), 32 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/__init__.py b/src/together/lib/cli/api/beta/jig/__init__.py index 02dd8065..b9ca42ca 100644 --- a/src/together/lib/cli/api/beta/jig/__init__.py +++ b/src/together/lib/cli/api/beta/jig/__init__.py @@ -1,24 +1,23 @@ """Jig CLI - deployment tool for Together AI.""" import click - from together.lib.cli.api.beta.jig.jig import ( - init, - logs, - push, build, deploy, - status, - submit, destroy, - endpoint, dockerfile, + endpoint, + init, job_status, - queue_status, list_deployments, + logs, + push, + queue_status, + secrets, + status, + submit, + volumes, ) -from together.lib.cli.api.beta.jig.secrets import secrets -from together.lib.cli.api.beta.jig.volumes import volumes @click.group() diff --git a/src/together/lib/cli/api/beta/jig/_utils.py b/src/together/lib/cli/api/beta/jig/_utils.py index 60a13d61..cf991691 100644 --- a/src/together/lib/cli/api/beta/jig/_utils.py +++ b/src/together/lib/cli/api/beta/jig/_utils.py @@ -1,31 +1,25 @@ """Utility functions for jig CLI commands.""" from __future__ import annotations - +from itertools import groupby from datetime import datetime from together.types.beta.deployment import Deployment -def _format_timestamp(timestamp_str: str | None) -> str: +def _format_timestamp(ts: str | None) -> str: """Format ISO timestamp for display""" - if not timestamp_str: - return "-" try: - ts = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00")) - return ts.strftime("%Y-%m-%d %H:%M:%S") - except (ValueError, TypeError): + return datetime.fromisoformat(ts.replace("Z", "+00:00")).strftime("%Y-%m-%d %H:%M:%S") + except (ValueError, TypeError, AttributeError): return timestamp_str or "-" def _image_tag(image: str | None) -> str: if image is None: return "unknown" - tag = image.rsplit(":", 1)[-1] if ":" in image else image - if "@sha256:" in image: - tag = f"sha256:{tag[:8]}" - - return tag + tag = image.rsplit(":", 1)[-1] + return f"sha256:{tag[:8]}" if "sha256:" in image else tag def format_deployment_status(d: Deployment) -> str: @@ -86,22 +80,21 @@ def format_deployment_status(d: Deployment) -> str: status += config_status if d.replica_events: + for replica in d.replica_events.values(): + replica.image = replica.image or "-" + sorted_replicas = sorted(d.replica_events.items(), key=lambda item: item[1].image, reverse=True) events_status = "\nReplica Events:\n" - images = set(map(lambda x: x.image or "-", d.replica_events.values())) - for image in reversed(sorted(images)): - events = filter(lambda x: ((x[1].image or "-") == image), d.replica_events.items()) + for image, group in groupby(sorted_replicas, key=lambda item: item[1].image): events_status += f"{_image_tag(image)}:\n" - for replica_id, event in events: + for replica_id, replica in group: events_status += f" {replica_id}: " - - if event.volume_preload_status and not event.volume_preload_completed_at: + if replica.volume_preload_status and not replica.volume_preload_completed_at: events_status += f"Volume Preloading" else: - events_status += f"{event.replica_status}" - if event.replica_status == "Running": - events_status += f", ready since {_format_timestamp(event.replica_ready_since)}" + events_status += f"{replica.replica_status}" + if replica.replica_status == "Running": + events_status += f", ready since {_format_timestamp(replica.replica_ready_since)}" events_status += "\n" status += events_status - return status From a3745121786b25b399e868015a8c187a3340f450 Mon Sep 17 00:00:00 2001 From: technillogue Date: Tue, 17 Feb 2026 17:34:22 -0500 Subject: [PATCH 05/51] inline utils into jig --- src/together/lib/cli/api/beta/jig/_utils.py | 100 -------------------- src/together/lib/cli/api/beta/jig/jig.py | 99 +++++++++++++++++++ 2 files changed, 99 insertions(+), 100 deletions(-) delete mode 100644 src/together/lib/cli/api/beta/jig/_utils.py diff --git a/src/together/lib/cli/api/beta/jig/_utils.py b/src/together/lib/cli/api/beta/jig/_utils.py deleted file mode 100644 index cf991691..00000000 --- a/src/together/lib/cli/api/beta/jig/_utils.py +++ /dev/null @@ -1,100 +0,0 @@ -"""Utility functions for jig CLI commands.""" - -from __future__ import annotations -from itertools import groupby -from datetime import datetime - -from together.types.beta.deployment import Deployment - - -def _format_timestamp(ts: str | None) -> str: - """Format ISO timestamp for display""" - try: - return datetime.fromisoformat(ts.replace("Z", "+00:00")).strftime("%Y-%m-%d %H:%M:%S") - except (ValueError, TypeError, AttributeError): - return timestamp_str or "-" - - -def _image_tag(image: str | None) -> str: - if image is None: - return "unknown" - tag = image.rsplit(":", 1)[-1] - return f"sha256:{tag[:8]}" if "sha256:" in image else tag - - -def format_deployment_status(d: Deployment) -> str: - """Format d status for CLI display""" - status = ( - "App:\n" - f" {'Name':<8}: {d.name} ┃ ID: {d.id}\n" - f" {'Image':<8}: {d.image}\n" - f" {'Status':<8}: {d.status}\n" - f" Created : {_format_timestamp(d.created_at)}" - f" ┃ Updated : {_format_timestamp(d.updated_at)}\n" - ) - - if d.autoscaling: - autoscaling_status = ( - f"\n Autoscaling: {d.autoscaling.get('metric', 'N/A')} {d.autoscaling.get('target', 'N/A')}(target)\n" - ) - status += autoscaling_status - - replica_status = ( - "\n" - f" Replicas:\n" - f" {'Min/Max':<16}: {d.min_replicas}/{d.max_replicas}\n" - f" {'Ready/Desired':<16}: {d.ready_replicas}/{d.desired_replicas}\n" - ) - - status += replica_status - - config_status = ( - f"\nConfiguration:\n" - f" Port: {d.port}\n" - f" Command: {d.command}\n" - f" Args: {d.args}\n" - f" Health Check Path: {d.health_check_path}\n" - f" Resources: {d.cpu} core CPU ┃ {d.memory}GB Memory ┃ {d.storage}GB Storage \n" - ) - - if d.gpu_count and d.gpu_type: - config_status += f" GPU: {d.gpu_count}x {d.gpu_type}\n" - - if d.volumes: - config_status += f"\n Volumes:\n {'NAME':<28} MOUNT_PATH\n" - for vol in d.volumes: - config_status += f" {vol.name:<28} {vol.mount_path}\n" - - if d.environment_variables: - secrets = [env for env in d.environment_variables if env.value_from_secret] - env_vars = [env for env in d.environment_variables if not env.value_from_secret] - - if secrets: - config_status += f"\n Secrets: {[secret.name for secret in secrets]}\n" - - if env_vars: - config_status += f"\n Environment Variables:\n {'NAME':<40} VALUE\n" - for env in env_vars: - config_status += f" {env.name:<40} {env.value}\n" - - status += config_status - - if d.replica_events: - for replica in d.replica_events.values(): - replica.image = replica.image or "-" - sorted_replicas = sorted(d.replica_events.items(), key=lambda item: item[1].image, reverse=True) - events_status = "\nReplica Events:\n" - for image, group in groupby(sorted_replicas, key=lambda item: item[1].image): - events_status += f"{_image_tag(image)}:\n" - for replica_id, replica in group: - events_status += f" {replica_id}: " - if replica.volume_preload_status and not replica.volume_preload_completed_at: - events_status += f"Volume Preloading" - else: - events_status += f"{replica.replica_status}" - if replica.replica_status == "Running": - events_status += f", ready since {_format_timestamp(replica.replica_ready_since)}" - events_status += "\n" - - status += events_status - return status diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index e3a5de85..d37dbfe5 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -9,7 +9,9 @@ import subprocess import time from dataclasses import asdict +from datetime import datetime from enum import Enum +from itertools import groupby from pathlib import Path from typing import Any, Callable, Optional from urllib.parse import urlparse @@ -27,6 +29,103 @@ # Managed dockerfile marker - if this is the first line, jig will regenerate the file DOCKERFILE_MANAGED_MARKER = "# MANAGED BY JIG - Remove this line to prevent jig from overwriting this file" + +# --- Status prettyprint utils --- + + +def _format_timestamp(ts: str | None) -> str: + """Format ISO timestamp for display""" + try: + return datetime.fromisoformat(ts.replace("Z", "+00:00")).strftime("%Y-%m-%d %H:%M:%S") + except (ValueError, TypeError, AttributeError): + return timestamp_str or "-" + + +def _image_tag(image: str | None) -> str: + if image is None: + return "unknown" + tag = image.rsplit(":", 1)[-1] + return f"sha256:{tag[:8]}" if "sha256:" in image else tag + + +def format_deployment_status(d: Deployment) -> str: + """Format d status for CLI display""" + status = ( + "App:\n" + f" {'Name':<8}: {d.name} ┃ ID: {d.id}\n" + f" {'Image':<8}: {d.image}\n" + f" {'Status':<8}: {d.status}\n" + f" Created : {_format_timestamp(d.created_at)}" + f" ┃ Updated : {_format_timestamp(d.updated_at)}\n" + ) + + if d.autoscaling: + autoscaling_status = ( + f"\n Autoscaling: {d.autoscaling.get('metric', 'N/A')} {d.autoscaling.get('target', 'N/A')}(target)\n" + ) + status += autoscaling_status + + replica_status = ( + "\n" + f" Replicas:\n" + f" {'Min/Max':<16}: {d.min_replicas}/{d.max_replicas}\n" + f" {'Ready/Desired':<16}: {d.ready_replicas}/{d.desired_replicas}\n" + ) + + status += replica_status + + config_status = ( + f"\nConfiguration:\n" + f" Port: {d.port}\n" + f" Command: {d.command}\n" + f" Args: {d.args}\n" + f" Health Check Path: {d.health_check_path}\n" + f" Resources: {d.cpu} core CPU ┃ {d.memory}GB Memory ┃ {d.storage}GB Storage \n" + ) + + if d.gpu_count and d.gpu_type: + config_status += f" GPU: {d.gpu_count}x {d.gpu_type}\n" + + if d.volumes: + config_status += f"\n Volumes:\n {'NAME':<28} MOUNT_PATH\n" + for vol in d.volumes: + config_status += f" {vol.name:<28} {vol.mount_path}\n" + + if d.environment_variables: + secrets = [env for env in d.environment_variables if env.value_from_secret] + env_vars = [env for env in d.environment_variables if not env.value_from_secret] + + if secrets: + config_status += f"\n Secrets: {[secret.name for secret in secrets]}\n" + + if env_vars: + config_status += f"\n Environment Variables:\n {'NAME':<40} VALUE\n" + for env in env_vars: + config_status += f" {env.name:<40} {env.value}\n" + + status += config_status + + if d.replica_events: + for replica in d.replica_events.values(): + replica.image = replica.image or "-" + sorted_replicas = sorted(d.replica_events.items(), key=lambda item: item[1].image, reverse=True) + events_status = "\nReplica Events:\n" + for image, group in groupby(sorted_replicas, key=lambda item: item[1].image): + events_status += f"{_image_tag(image)}:\n" + for replica_id, replica in group: + events_status += f" {replica_id}: " + if replica.volume_preload_status and not replica.volume_preload_completed_at: + events_status += f"Volume Preloading" + else: + events_status += f"{replica.replica_status}" + if replica.replica_status == "Running": + events_status += f", ready since {_format_timestamp(replica.replica_ready_since)}" + events_status += "\n" + + status += events_status + return status + + # --- Helper Functions --- From 6d891331a4ccb7d77e8330b1ce634005c4c6d255 Mon Sep 17 00:00:00 2001 From: technillogue Date: Tue, 17 Feb 2026 17:44:29 -0500 Subject: [PATCH 06/51] inline config into jig --- src/together/lib/cli/api/beta/jig/_config.py | 293 ----------------- src/together/lib/cli/api/beta/jig/jig.py | 316 ++++++++++++++++++- 2 files changed, 300 insertions(+), 309 deletions(-) delete mode 100644 src/together/lib/cli/api/beta/jig/_config.py diff --git a/src/together/lib/cli/api/beta/jig/_config.py b/src/together/lib/cli/api/beta/jig/_config.py deleted file mode 100644 index 21f9d858..00000000 --- a/src/together/lib/cli/api/beta/jig/_config.py +++ /dev/null @@ -1,293 +0,0 @@ -"""Configuration and state management for jig CLI.""" - -from __future__ import annotations - -import os -import sys -import json -import typing -from typing import TYPE_CHECKING, Any, Union, Optional -from pathlib import Path -from dataclasses import field, asdict, dataclass, is_dataclass - -import click - -if TYPE_CHECKING: - import tomli as tomllib -else: - try: - import tomllib - except ImportError: - import tomli as tomllib - -# --- Environment Configuration --- - -DEBUG = os.getenv("TOGETHER_DEBUG", "").strip()[:1] in ("y", "1", "t") - -UPLOAD_CONCURRENCY_LIMIT = int(os.getenv("TOGETHER_UPLOAD_CONCURRENCY", "15")) -MULTIPART_CHUNK_SIZE_MB = int(os.getenv("TOGETHER_MULTIPART_CHUNK_SIZE_MB", "20")) -MULTIPART_THRESHOLD_MB = int(os.getenv("TOGETHER_MULTIPART_THRESHOLD_MB", "100")) -MAX_UPLOAD_RETRIES = 3 - -# Warmup configuration (for torch compile cache) -WARMUP_ENV_NAME = os.getenv("WARMUP_ENV_NAME", "TORCHINDUCTOR_CACHE_DIR") -WARMUP_DEST = os.getenv("WARMUP_DEST", "torch_cache") - - -# --- Configuration Dataclasses --- - - -@dataclass -class ImageConfig: - """Container image configuration from pyproject.toml""" - - python_version: str = "3.11" - system_packages: list[str] = field(default_factory=list[str]) - environment: dict[str, str] = field(default_factory=dict[str, str]) - run: list[str] = field(default_factory=list[str]) - cmd: str = "python app.py" - copy: list[str] = field(default_factory=list[str]) - auto_include_git: bool = False - - @classmethod - def from_dict(cls, data: dict[str, Any]) -> ImageConfig: - return cls(**{k: v for k, v in data.items() if k in cls.__annotations__}) - - -@dataclass -class VolumeMount: - """Volume mount configuration""" - - name: str - mount_path: str - - @classmethod - def from_dict(cls, data: dict[str, Any]) -> VolumeMount: - try: - return cls(**{k: v for k, v in data.items() if k in cls.__annotations__}) - except Exception as e: - raise click.UsageError(f"Invalid volume mount {data}: {e}") from None - - -@dataclass -class DeployConfig: - """Deployment configuration""" - - description: str = "" - gpu_type: str = "h100-80gb" - gpu_count: int = 1 - cpu: int | float = 1 - memory: int | float = 8 - storage: int = 100 - min_replicas: int = 1 - max_replicas: int = 1 - port: int = 8000 - environment_variables: dict[str, str] = field(default_factory=dict[str, str]) - command: Optional[list[str]] = None - autoscaling: dict[str, str] = field(default_factory=dict[str, str]) - health_check_path: str = "/health" - termination_grace_period_seconds: int = 300 - volume_mounts: list[VolumeMount] = field(default_factory=list[VolumeMount]) - - @classmethod - def from_dict(cls, data: dict[str, Any]) -> DeployConfig: - deploy_config = {k: v for k, v in data.items() if k in cls.__annotations__} - if isinstance((mounts := deploy_config.get("volume_mounts")), list): - deploy_config["volume_mounts"] = [VolumeMount.from_dict(vm) for vm in mounts] # pyright: ignore - return cls(**deploy_config) - - -def validate(value: Any, value_type: type, path: str = "") -> str | None: - origin = typing.get_origin(value_type) - args = typing.get_args(value_type) - - if origin is list: - if not isinstance(value, list): - return f"{path}: expected list, got {type(value).__name__}" - for i, v in enumerate(value): # pyright: ignore - if err := validate(v, args[0], f"{path}[{i}]"): - return err - return None - - if origin is dict: - if not isinstance(value, dict): - return f"{path}: expected dict, got {type(value).__name__}" - for k, v in value.items(): # pyright: ignore - if err := validate(k, args[0], f"{path}.key({k!r})"): - return err - if err := validate(v, args[1], f"{path}[{k!r}]"): - return err - return None - - if origin is Union: - if value is None or any(validate(value, a, path) is None for a in args if a is not type(None)): - return None - return f"{path}: expected {value_type}, got {type(value).__name__}" - - if is_dataclass(value_type): - if not isinstance(value, value_type): - return f"{path}: expected {value_type.__name__}, got {type(value).__name__}" - for k, t in typing.get_type_hints(value_type, globalns=globals()).items(): - if err := validate(getattr(value, k), t, f"{path}.{k}" if path else k): - return err - return None - - if not isinstance(value, value_type): - return f"{path}: expected {type(value).__name__}, got {value!r}" - return None - - -@dataclass -class Config: - """Main configuration from jig.toml or pyproject.toml""" - - model_name: str = "" - dockerfile: str = "Dockerfile" - image: ImageConfig = field(default_factory=ImageConfig) - deploy: DeployConfig = field(default_factory=DeployConfig) - _path: Path = field(default_factory=lambda: Path("pyproject.toml")) - _unique_name_tip: str = "Update project.name in pyproject.toml" - - def __post_init__(self) -> None: - if err := validate(self, type(self)): - raise click.UsageError(f"Invalid {self._path}: {err}") - - @classmethod - def find(cls, config_path: Optional[str] = None, init: bool = False) -> Config: - """Find specified config_path, pyproject.toml, or jig.toml""" - if config_path: - found_path = Path(config_path) - if not found_path.exists(): - click.echo(f"ERROR: Configuration file not found: {config_path}", err=True) - sys.exit(1) - return cls.load(tomllib.load(found_path.open("rb")), found_path) - - if (jigfile := Path("jig.toml")).exists(): - return cls.load(tomllib.load(jigfile.open("rb")), jigfile) - - if (pyproject_path := Path("pyproject.toml")).exists(): - data = tomllib.load(pyproject_path.open("rb")) - if "tool" in data and "jig" in data["tool"]: - return cls.load(data, pyproject_path) - - if init: - return cls() - click.echo( - "ERROR: No pyproject.toml or jig.toml found, use --config to specify a config path.", - err=True, - ) - sys.exit(1) - - @classmethod - def load(cls, data: dict[str, Any], path: Path) -> Config: - """Load configuration from parsed TOML data""" - # figure out config location and "Deployment name must be unique. Tip: update ..." message - is_pyproject = path.name.endswith("pyproject.toml") - if is_pyproject: - jig_config = data.get("tool", {}).get("jig", {}) - if name := jig_config.get("name"): - tip = "update `name` in your pyproject.toml" - elif name := data.get("project", {}).get("name", ""): - tip = "update `project.name` in your pyproject.toml" - else: - name = path.resolve().parent.name - tip = "rename your folder or add `project.name` to your pyproject.toml" - click.echo(f"\N{PACKAGE} Name not set in {path} - defaulting to {name}") - else: - jig_config = data - if name := jig_config.get("name"): - tip = "update `name` in {path}" - else: - name = path.resolve().parent.name - tip = f"rename your folder or add `name` to {path}" - click.echo(f"\N{PACKAGE} Name not set in {path} - defaulting to {name}") - - if autoscaling := jig_config.get("autoscaling", {}): - autoscaling["model"] = name - jig_config["deploy"]["autoscaling"] = autoscaling - - # Support volume_mounts at jig level (merge into deploy config) - jig_config["deploy"]["volume_mounts"] = jig_config.get("volume_mounts", []) - - return cls( - image=ImageConfig.from_dict(jig_config.get("image", {})), - deploy=DeployConfig.from_dict(jig_config.get("deploy", {})), - dockerfile=jig_config.get("dockerfile", "Dockerfile"), - model_name=name, - _path=path, - _unique_name_tip=tip, - ) - - -# --- State Management --- - - -@dataclass -class State: - """Persistent state stored in .jig.json""" - - _config_dir: Path - _project_name: str - registry_base_path: str = "" - secrets: dict[str, str] = field(default_factory=dict[str, str]) - volumes: dict[str, str] = field(default_factory=dict[str, str]) - - @classmethod - def from_dict(cls, config_dir: Path, project_name: str, **data: Any) -> State: - filtered = {k: v for k, v in data.items() if k in cls.__annotations__ and not k.startswith("_")} - return cls(_config_dir=config_dir, _project_name=project_name, **filtered) - - @classmethod - def load(cls, config_dir: Path, project_name: str) -> State: - """Load state for a specific project from .jig.json. - - The state file structure is: - { - "project-name-1": { - "registry_base_path": "...", - "secrets": {...}, - "volumes": {...} - }, - "project-name-2": {...} - } - - """ - path = config_dir / ".jig.json" - try: - with open(path) as f: - all_data = json.load(f) - - # Check if this is the new nested structure (project_name as key) - if project_name in all_data and isinstance(all_data[project_name], dict): - # New structure: extract project-specific state - project_data = all_data[project_name] - return cls.from_dict(config_dir, project_name, **project_data) - # Secrets or volumes exist, but not yet migrated (don't care about registry base path) - if "secrets" in all_data or "volumes" in all_data: - return cls.from_dict(config_dir, project_name, **all_data) - # File exists but this project isn't in it yet - return cls(_config_dir=config_dir, _project_name=project_name) - except FileNotFoundError: - return cls(_config_dir=config_dir, _project_name=project_name) - - def save(self) -> None: - """Save state for this project to .jig.json. - - Preserves other projects' state in the same file. - """ - path = self._config_dir / ".jig.json" - - # Load existing file to preserve other projects - try: - with open(path) as f: - all_data = json.load(f) - except FileNotFoundError: - all_data = {} - - # Update this project's state - project_data = {k: v for k, v in asdict(self).items() if not k.startswith("_")} - all_data[self._project_name] = project_data - - # Save back to file - with open(path, "w") as f: - json.dump(all_data, f, indent=2) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index d37dbfe5..7d9618ba 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -4,16 +4,19 @@ import asyncio import json +import os import shlex import shutil import subprocess +import sys import time -from dataclasses import asdict +import typing +from dataclasses import asdict, dataclass, field, is_dataclass from datetime import datetime from enum import Enum from itertools import groupby from pathlib import Path -from typing import Any, Callable, Optional +from typing import TYPE_CHECKING, Any, Callable, Optional, Union from urllib.parse import urlparse import click @@ -26,11 +29,294 @@ from together.types.beta.deployment import Deployment from together.types.beta.jig.queue_submit_response import QueueSubmitResponse +if TYPE_CHECKING: + import tomli as tomllib +else: + try: + import tomllib + except ImportError: + import tomli as tomllib + + # Managed dockerfile marker - if this is the first line, jig will regenerate the file DOCKERFILE_MANAGED_MARKER = "# MANAGED BY JIG - Remove this line to prevent jig from overwriting this file" -# --- Status prettyprint utils --- +# == Config and state == +# --- Environment Configuration --- + +DEBUG = os.getenv("TOGETHER_DEBUG", "").strip()[:1] in ("y", "1", "t") + +UPLOAD_CONCURRENCY_LIMIT = int(os.getenv("TOGETHER_UPLOAD_CONCURRENCY", "15")) +MULTIPART_CHUNK_SIZE_MB = int(os.getenv("TOGETHER_MULTIPART_CHUNK_SIZE_MB", "20")) +MULTIPART_THRESHOLD_MB = int(os.getenv("TOGETHER_MULTIPART_THRESHOLD_MB", "100")) +MAX_UPLOAD_RETRIES = 3 + +# Warmup configuration (for torch compile cache) +WARMUP_ENV_NAME = os.getenv("WARMUP_ENV_NAME", "TORCHINDUCTOR_CACHE_DIR") +WARMUP_DEST = os.getenv("WARMUP_DEST", "torch_cache") + + +# --- Configuration Dataclasses --- + + +@dataclass +class ImageConfig: + """Container image configuration from pyproject.toml""" + + python_version: str = "3.11" + system_packages: list[str] = field(default_factory=list[str]) + environment: dict[str, str] = field(default_factory=dict[str, str]) + run: list[str] = field(default_factory=list[str]) + cmd: str = "python app.py" + copy: list[str] = field(default_factory=list[str]) + auto_include_git: bool = False + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> ImageConfig: + return cls(**{k: v for k, v in data.items() if k in cls.__annotations__}) + + +@dataclass +class VolumeMount: + """Volume mount configuration""" + + name: str + mount_path: str + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> VolumeMount: + try: + return cls(**{k: v for k, v in data.items() if k in cls.__annotations__}) + except Exception as e: + raise click.UsageError(f"Invalid volume mount {data}: {e}") from None + + +@dataclass +class DeployConfig: + """Deployment configuration""" + + description: str = "" + gpu_type: str = "h100-80gb" + gpu_count: int = 1 + cpu: int | float = 1 + memory: int | float = 8 + storage: int = 100 + min_replicas: int = 1 + max_replicas: int = 1 + port: int = 8000 + environment_variables: dict[str, str] = field(default_factory=dict[str, str]) + command: Optional[list[str]] = None + autoscaling: dict[str, str] = field(default_factory=dict[str, str]) + health_check_path: str = "/health" + termination_grace_period_seconds: int = 300 + volume_mounts: list[VolumeMount] = field(default_factory=list[VolumeMount]) + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> DeployConfig: + deploy_config = {k: v for k, v in data.items() if k in cls.__annotations__} + if isinstance((mounts := deploy_config.get("volume_mounts")), list): + deploy_config["volume_mounts"] = [VolumeMount.from_dict(vm) for vm in mounts] # pyright: ignore + return cls(**deploy_config) + + +def validate(value: Any, value_type: type, path: str = "") -> str | None: + origin = typing.get_origin(value_type) + args = typing.get_args(value_type) + + if origin is list: + if not isinstance(value, list): + return f"{path}: expected list, got {type(value).__name__}" + for i, v in enumerate(value): # pyright: ignore + if err := validate(v, args[0], f"{path}[{i}]"): + return err + return None + + if origin is dict: + if not isinstance(value, dict): + return f"{path}: expected dict, got {type(value).__name__}" + for k, v in value.items(): # pyright: ignore + if err := validate(k, args[0], f"{path}.key({k!r})"): + return err + if err := validate(v, args[1], f"{path}[{k!r}]"): + return err + return None + + if origin is Union: + if value is None or any(validate(value, a, path) is None for a in args if a is not type(None)): + return None + return f"{path}: expected {value_type}, got {type(value).__name__}" + + if is_dataclass(value_type): + if not isinstance(value, value_type): + return f"{path}: expected {value_type.__name__}, got {type(value).__name__}" + for k, t in typing.get_type_hints(value_type, globalns=globals()).items(): + if err := validate(getattr(value, k), t, f"{path}.{k}" if path else k): + return err + return None + + if not isinstance(value, value_type): + return f"{path}: expected {type(value).__name__}, got {value!r}" + return None + + +@dataclass +class Config: + """Main configuration from jig.toml or pyproject.toml""" + + model_name: str = "" + dockerfile: str = "Dockerfile" + image: ImageConfig = field(default_factory=ImageConfig) + deploy: DeployConfig = field(default_factory=DeployConfig) + _path: Path = field(default_factory=lambda: Path("pyproject.toml")) + _unique_name_tip: str = "Update project.name in pyproject.toml" + + def __post_init__(self) -> None: + if err := validate(self, type(self)): + raise click.UsageError(f"Invalid {self._path}: {err}") + + @classmethod + def find(cls, config_path: Optional[str] = None, init: bool = False) -> Config: + """Find specified config_path, pyproject.toml, or jig.toml""" + if config_path: + found_path = Path(config_path) + if not found_path.exists(): + click.echo(f"ERROR: Configuration file not found: {config_path}", err=True) + sys.exit(1) + return cls.load(tomllib.load(found_path.open("rb")), found_path) + + if (jigfile := Path("jig.toml")).exists(): + return cls.load(tomllib.load(jigfile.open("rb")), jigfile) + + if (pyproject_path := Path("pyproject.toml")).exists(): + data = tomllib.load(pyproject_path.open("rb")) + if "tool" in data and "jig" in data["tool"]: + return cls.load(data, pyproject_path) + + if init: + return cls() + click.echo( + "ERROR: No pyproject.toml or jig.toml found, use --config to specify a config path.", + err=True, + ) + sys.exit(1) + + @classmethod + def load(cls, data: dict[str, Any], path: Path) -> Config: + """Load configuration from parsed TOML data""" + # figure out config location and "Deployment name must be unique. Tip: update ..." message + is_pyproject = path.name.endswith("pyproject.toml") + if is_pyproject: + jig_config = data.get("tool", {}).get("jig", {}) + if name := jig_config.get("name"): + tip = "update `name` in your pyproject.toml" + elif name := data.get("project", {}).get("name", ""): + tip = "update `project.name` in your pyproject.toml" + else: + name = path.resolve().parent.name + tip = "rename your folder or add `project.name` to your pyproject.toml" + click.echo(f"\N{PACKAGE} Name not set in {path} - defaulting to {name}") + else: + jig_config = data + if name := jig_config.get("name"): + tip = "update `name` in {path}" + else: + name = path.resolve().parent.name + tip = f"rename your folder or add `name` to {path}" + click.echo(f"\N{PACKAGE} Name not set in {path} - defaulting to {name}") + + if autoscaling := jig_config.get("autoscaling", {}): + autoscaling["model"] = name + jig_config["deploy"]["autoscaling"] = autoscaling + + # Support volume_mounts at jig level (merge into deploy config) + jig_config["deploy"]["volume_mounts"] = jig_config.get("volume_mounts", []) + + return cls( + image=ImageConfig.from_dict(jig_config.get("image", {})), + deploy=DeployConfig.from_dict(jig_config.get("deploy", {})), + dockerfile=jig_config.get("dockerfile", "Dockerfile"), + model_name=name, + _path=path, + _unique_name_tip=tip, + ) + + +# --- State Management --- + + +@dataclass +class State: + """Persistent state stored in .jig.json""" + + _config_dir: Path + _project_name: str + registry_base_path: str = "" + secrets: dict[str, str] = field(default_factory=dict[str, str]) + volumes: dict[str, str] = field(default_factory=dict[str, str]) + + @classmethod + def from_dict(cls, config_dir: Path, project_name: str, **data: Any) -> State: + filtered = {k: v for k, v in data.items() if k in cls.__annotations__ and not k.startswith("_")} + return cls(_config_dir=config_dir, _project_name=project_name, **filtered) + + @classmethod + def load(cls, config_dir: Path, project_name: str) -> State: + """Load state for a specific project from .jig.json. + + The state file structure is: + { + "project-name-1": { + "registry_base_path": "...", + "secrets": {...}, + "volumes": {...} + }, + "project-name-2": {...} + } + + """ + path = config_dir / ".jig.json" + try: + with open(path) as f: + all_data = json.load(f) + + # Check if this is the new nested structure (project_name as key) + if project_name in all_data and isinstance(all_data[project_name], dict): + # New structure: extract project-specific state + project_data = all_data[project_name] + return cls.from_dict(config_dir, project_name, **project_data) + # Secrets or volumes exist, but not yet migrated (don't care about registry base path) + if "secrets" in all_data or "volumes" in all_data: + return cls.from_dict(config_dir, project_name, **all_data) + # File exists but this project isn't in it yet + return cls(_config_dir=config_dir, _project_name=project_name) + except FileNotFoundError: + return cls(_config_dir=config_dir, _project_name=project_name) + + def save(self) -> None: + """Save state for this project to .jig.json. + + Preserves other projects' state in the same file. + """ + path = self._config_dir / ".jig.json" + + # Load existing file to preserve other projects + try: + with open(path) as f: + all_data = json.load(f) + except FileNotFoundError: + all_data = {} + + # Update this project's state + project_data = {k: v for k, v in asdict(self).items() if not k.startswith("_")} + all_data[self._project_name] = project_data + + # Save back to file + with open(path, "w") as f: + json.dump(all_data, f, indent=2) + + +# == Status prettyprint utils == def _format_timestamp(ts: str | None) -> str: @@ -126,6 +412,7 @@ def format_deployment_status(d: Deployment) -> str: return status +# == Main CLI == # --- Helper Functions --- @@ -1028,7 +1315,7 @@ def list_deployments(ctx: click.Context) -> None: click.echo(json.dumps(response.json(), indent=2)) -# --- Secrets -- +# == Secrets == @click.group() @@ -1159,17 +1446,7 @@ def secrets_list( click.echo(f" - {name} [{status}]") -# --- Volumes --- -"""Volume management CLI commands for jig.""" - - -@click.group() -@click.pass_context -def volumes(ctx: click.Context) -> None: - """Manage volumes""" - pass - - +# == Volumes == # --- File upload --- @@ -1234,7 +1511,14 @@ async def _update_volume(client: Together, name: str, source: str) -> None: click.echo("\N{CHECK MARK} Volume updated successfully") -# --- CLI Commands --- +# --- Volumes CLI Commands --- + + +@click.group() +@click.pass_context +def volumes(ctx: click.Context) -> None: + """Manage volumes""" + pass @volumes.command("create") From 373d0914d9094f6222c3e438ec0fe93a0012f540 Mon Sep 17 00:00:00 2001 From: technillogue Date: Tue, 17 Feb 2026 17:59:40 -0500 Subject: [PATCH 07/51] start simplifying code - use Path.read_text and more walrus --- src/together/lib/cli/api/beta/jig/__init__.py | 19 ++--- src/together/lib/cli/api/beta/jig/jig.py | 75 ++++++++----------- 2 files changed, 41 insertions(+), 53 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/__init__.py b/src/together/lib/cli/api/beta/jig/__init__.py index b9ca42ca..6c7edc54 100644 --- a/src/together/lib/cli/api/beta/jig/__init__.py +++ b/src/together/lib/cli/api/beta/jig/__init__.py @@ -1,22 +1,23 @@ """Jig CLI - deployment tool for Together AI.""" import click + from together.lib.cli.api.beta.jig.jig import ( - build, - deploy, - destroy, - dockerfile, - endpoint, init, - job_status, - list_deployments, logs, push, - queue_status, - secrets, + build, + deploy, status, submit, + destroy, + secrets, volumes, + endpoint, + dockerfile, + job_status, + queue_status, + list_deployments, ) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 7d9618ba..a066ecc8 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -2,31 +2,30 @@ from __future__ import annotations -import asyncio -import json import os -import shlex -import shutil -import subprocess import sys +import json import time +import shlex +import shutil import typing -from dataclasses import asdict, dataclass, field, is_dataclass -from datetime import datetime +import asyncio +import subprocess from enum import Enum -from itertools import groupby +from typing import TYPE_CHECKING, Any, Union, Callable, Optional from pathlib import Path -from typing import TYPE_CHECKING, Any, Callable, Optional, Union +from datetime import datetime +from itertools import groupby +from dataclasses import field, asdict, dataclass, is_dataclass from urllib.parse import urlparse import click + from together import Together from together._exceptions import APIStatusError from together.lib.cli.api._utils import handle_api_errors -from together.lib.cli.api.beta.jig._config import DEBUG, WARMUP_DEST, WARMUP_ENV_NAME, Config, State -from together.lib.cli.api.beta.jig._uploader import Uploader -from together.lib.cli.api.beta.jig._utils import format_deployment_status from together.types.beta.deployment import Deployment +from together.lib.cli.api.beta.jig._uploader import Uploader from together.types.beta.jig.queue_submit_response import QueueSubmitResponse if TYPE_CHECKING: @@ -277,19 +276,18 @@ def load(cls, config_dir: Path, project_name: str) -> State: """ path = config_dir / ".jig.json" try: - with open(path) as f: - all_data = json.load(f) - - # Check if this is the new nested structure (project_name as key) - if project_name in all_data and isinstance(all_data[project_name], dict): - # New structure: extract project-specific state - project_data = all_data[project_name] - return cls.from_dict(config_dir, project_name, **project_data) - # Secrets or volumes exist, but not yet migrated (don't care about registry base path) - if "secrets" in all_data or "volumes" in all_data: - return cls.from_dict(config_dir, project_name, **all_data) - # File exists but this project isn't in it yet - return cls(_config_dir=config_dir, _project_name=project_name) + all_data = json.loads(path.read_text()) + + # Check if this is the new nested structure (project_name as key) + if project_name in all_data and isinstance(all_data[project_name], dict): + # New structure: extract project-specific state + project_data = all_data[project_name] + return cls.from_dict(config_dir, project_name, **project_data) + # Secrets or volumes exist, but not yet migrated (don't care about registry base path) + if "secrets" in all_data or "volumes" in all_data: + return cls.from_dict(config_dir, project_name, **all_data) + # File exists but this project isn't in it yet + return cls(_config_dir=config_dir, _project_name=project_name) except FileNotFoundError: return cls(_config_dir=config_dir, _project_name=project_name) @@ -302,8 +300,7 @@ def save(self) -> None: # Load existing file to preserve other projects try: - with open(path) as f: - all_data = json.load(f) + all_data = json.loads(path.read_text()) except FileNotFoundError: all_data = {} @@ -324,7 +321,7 @@ def _format_timestamp(ts: str | None) -> str: try: return datetime.fromisoformat(ts.replace("Z", "+00:00")).strftime("%Y-%m-%d %H:%M:%S") except (ValueError, TypeError, AttributeError): - return timestamp_str or "-" + return ts or "-" def _image_tag(image: str | None) -> str: @@ -439,12 +436,10 @@ def _generate_dockerfile(config: Config) -> str: apt-get clean && rm -rf /var/lib/apt/lists/* """ - env = "\n".join(f"ENV {k}={v}" for k, v in config.image.environment.items()) - if env: + if env := "\n".join(f"ENV {k}={v}" for k, v in config.image.environment.items()): env += "\n" - run = "\n".join(f"RUN {cmd}" for cmd in config.image.run) - if run: + if run := "\n".join(f"RUN {cmd}" for cmd in config.image.run): run += "\n" copy = "\n".join(f"COPY {file} {file}" for file in _get_files_to_copy(config)) @@ -524,9 +519,7 @@ def _dockerfile(config: Config) -> bool: dockerfile_path = Path(config.dockerfile) if dockerfile_path.exists(): - with open(dockerfile_path) as f: - first_line = f.readline().strip() - + first_line = dockerfile_path.read_text().split("\n")[0] if first_line != DOCKERFILE_MANAGED_MARKER: return False @@ -552,8 +545,7 @@ def _get_image_with_digest(state: State, config: Config, tag: str = "latest") -> return image_name try: cmd = ["docker", "inspect", "--format={{json .RepoDigests}}", image_name] - repo_digests = _run(cmd).stdout.strip() - if repo_digests and repo_digests != "null": + if (repo_digests := _run(cmd).stdout.strip()) and repo_digests != "null": registry = image_name.rsplit("/", 2)[0] for digest in json.loads(repo_digests): if digest.startswith(registry): @@ -606,13 +598,8 @@ def _ensure_registry_base_path(client: Together, state: State) -> None: response = client._client.get("/image-repositories/base-path", headers=client.auth_headers) response.raise_for_status() data = response.json() - base_path = data["base-path"] # Strip protocol prefix - Docker tags don't support URLs - if base_path.startswith("https://"): - base_path = base_path[8:] - elif base_path.startswith("http://"): - base_path = base_path[7:] - state.registry_base_path = base_path + state.registry_base_path = data["base-path"].removeprefix("http://").removeprefix("http://") state.save() @@ -716,7 +703,7 @@ def _is_volume_preload_done(event: Any) -> bool: return bool(event.volume_preload_completed_at) -class ReplicaTrackingResult(Enum): +class ReplicaTrackingResult(str, Enum): """Result of processing a single replica event.""" CONTINUE = "continue" From e7a0e9628fb53f32c7d610c5a5a7b40d860b4b8e Mon Sep 17 00:00:00 2001 From: technillogue Date: Tue, 17 Feb 2026 18:43:40 -0500 Subject: [PATCH 08/51] fix lints --- .../lib/cli/api/beta/jig/_uploader.py | 20 +++++----------- src/together/lib/cli/api/beta/jig/jig.py | 24 +++++++------------ 2 files changed, 14 insertions(+), 30 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/_uploader.py b/src/together/lib/cli/api/beta/jig/_uploader.py index 58e28d38..94d3417a 100644 --- a/src/together/lib/cli/api/beta/jig/_uploader.py +++ b/src/together/lib/cli/api/beta/jig/_uploader.py @@ -2,6 +2,7 @@ from __future__ import annotations +import os import time import asyncio import itertools @@ -12,20 +13,13 @@ import httpx from together import Together -from together.lib.cli.api.beta.jig._config import ( - DEBUG, - MAX_UPLOAD_RETRIES, - MULTIPART_THRESHOLD_MB, - MULTIPART_CHUNK_SIZE_MB, - UPLOAD_CONCURRENCY_LIMIT, -) +DEBUG = os.getenv("TOGETHER_DEBUG", "").strip()[:1] in ("y", "1", "t") -@click.group() -@click.pass_context -def volumes(ctx: click.Context) -> None: - """Manage volumes""" - pass +UPLOAD_CONCURRENCY_LIMIT = int(os.getenv("TOGETHER_UPLOAD_CONCURRENCY", "15")) +MULTIPART_CHUNK_SIZE_MB = int(os.getenv("TOGETHER_MULTIPART_CHUNK_SIZE_MB", "20")) +MULTIPART_THRESHOLD_MB = int(os.getenv("TOGETHER_MULTIPART_THRESHOLD_MB", "100")) +MAX_UPLOAD_RETRIES = 3 # --- File upload --- @@ -283,5 +277,3 @@ async def upload_part(part_info: dict[str, Any], data: bytes) -> dict[str, Any]: completed_parts = await asyncio.gather(*tasks) return sorted(completed_parts, key=lambda x: x["part_number"]) - - diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index a066ecc8..a36150cb 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -28,14 +28,10 @@ from together.lib.cli.api.beta.jig._uploader import Uploader from together.types.beta.jig.queue_submit_response import QueueSubmitResponse -if TYPE_CHECKING: +if TYPE_CHECKING or sys.version_info < (3, 11): import tomli as tomllib else: - try: - import tomllib - except ImportError: - import tomli as tomllib - + import tomllib # Managed dockerfile marker - if this is the first line, jig will regenerate the file DOCKERFILE_MANAGED_MARKER = "# MANAGED BY JIG - Remove this line to prevent jig from overwriting this file" @@ -46,11 +42,6 @@ DEBUG = os.getenv("TOGETHER_DEBUG", "").strip()[:1] in ("y", "1", "t") -UPLOAD_CONCURRENCY_LIMIT = int(os.getenv("TOGETHER_UPLOAD_CONCURRENCY", "15")) -MULTIPART_CHUNK_SIZE_MB = int(os.getenv("TOGETHER_MULTIPART_CHUNK_SIZE_MB", "20")) -MULTIPART_THRESHOLD_MB = int(os.getenv("TOGETHER_MULTIPART_THRESHOLD_MB", "100")) -MAX_UPLOAD_RETRIES = 3 - # Warmup configuration (for torch compile cache) WARMUP_ENV_NAME = os.getenv("WARMUP_ENV_NAME", "TORCHINDUCTOR_CACHE_DIR") WARMUP_DEST = os.getenv("WARMUP_DEST", "torch_cache") @@ -316,12 +307,13 @@ def save(self) -> None: # == Status prettyprint utils == -def _format_timestamp(ts: str | None) -> str: +def _format_timestamp(timestamp: str | None) -> str: """Format ISO timestamp for display""" + t = timestamp or "-" try: - return datetime.fromisoformat(ts.replace("Z", "+00:00")).strftime("%Y-%m-%d %H:%M:%S") - except (ValueError, TypeError, AttributeError): - return ts or "-" + return datetime.fromisoformat(t.replace("Z", "+00:00")).strftime("%Y-%m-%d %H:%M:%S") + except (ValueError, TypeError): + return t def _image_tag(image: str | None) -> str: @@ -391,7 +383,7 @@ def format_deployment_status(d: Deployment) -> str: if d.replica_events: for replica in d.replica_events.values(): replica.image = replica.image or "-" - sorted_replicas = sorted(d.replica_events.items(), key=lambda item: item[1].image, reverse=True) + sorted_replicas = sorted(d.replica_events.items(), key=lambda item: item[1].image or "-", reverse=True) events_status = "\nReplica Events:\n" for image, group in groupby(sorted_replicas, key=lambda item: item[1].image): events_status += f"{_image_tag(image)}:\n" From d41ad315e53fc6af2fed3ecc0c45e3c4fd843d95 Mon Sep 17 00:00:00 2001 From: technillogue Date: Tue, 17 Feb 2026 18:44:52 -0500 Subject: [PATCH 09/51] move secret --- src/together/lib/cli/api/beta/jig/jig.py | 72 ++++++++++++------------ 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index a36150cb..4498d27d 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -548,42 +548,6 @@ def _get_image_with_digest(state: State, config: Config, tag: str = "latest") -> raise RuntimeError(f"No registry digest found for {image_name}. Make sure the image was pushed to registry first.") -def _set_secret( - client: Together, - config: Config, - state: State, - name: str, - value: str, - description: str, -) -> None: - """Set secret for the deployment""" - deployment_secret_name = f"{config.model_name}-{name}" - - try: - client.beta.jig.secrets.retrieve(deployment_secret_name) - client.beta.jig.secrets.update( - deployment_secret_name, - name=deployment_secret_name, - description=description, - value=value, - ) - click.echo(f"\N{CHECK MARK} Updated secret: '{name}'") - except APIStatusError as e: - if hasattr(e, "status_code") and e.status_code == 404: - click.echo("\N{ROCKET} Creating new secret") - client.beta.jig.secrets.create( - name=deployment_secret_name, - value=value, - description=description, - ) - click.echo(f"\N{CHECK MARK} Created secret: {name}") - else: - raise - - state.secrets[name] = deployment_secret_name - state.save() - - def _ensure_registry_base_path(client: Together, state: State) -> None: """Ensure registry base path is set in state""" if not state.registry_base_path: @@ -1297,6 +1261,42 @@ def list_deployments(ctx: click.Context) -> None: # == Secrets == +def _set_secret( + client: Together, + config: Config, + state: State, + name: str, + value: str, + description: str, +) -> None: + """Set secret for the deployment""" + deployment_secret_name = f"{config.model_name}-{name}" + + try: + client.beta.jig.secrets.retrieve(deployment_secret_name) + client.beta.jig.secrets.update( + deployment_secret_name, + name=deployment_secret_name, + description=description, + value=value, + ) + click.echo(f"\N{CHECK MARK} Updated secret: '{name}'") + except APIStatusError as e: + if hasattr(e, "status_code") and e.status_code == 404: + click.echo("\N{ROCKET} Creating new secret") + client.beta.jig.secrets.create( + name=deployment_secret_name, + value=value, + description=description, + ) + click.echo(f"\N{CHECK MARK} Created secret: {name}") + else: + raise + + state.secrets[name] = deployment_secret_name + state.save() + + @click.group() @click.pass_context def secrets(ctx: click.Context) -> None: From 9ef7b5bcd60eeabaaa84dd90a117e6da977c5b98 Mon Sep 17 00:00:00 2001 From: technillogue Date: Tue, 17 Feb 2026 18:53:57 -0500 Subject: [PATCH 10/51] actually, move secrets and volumes before the main CLI --- src/together/lib/cli/api/beta/jig/jig.py | 1068 +++++++++++----------- 1 file changed, 518 insertions(+), 550 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 4498d27d..022135f5 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -401,137 +401,436 @@ def format_deployment_status(d: Deployment) -> str: return status -# == Main CLI == -# --- Helper Functions --- +# = Secrets and Volumes subcommands = +# == Secrets == -def _get_api_base_url(client: Together) -> str: - """Extract base URL (scheme://host) from client, stripping any path like /v1""" - parsed = urlparse(str(client.base_url)) - return f"{parsed.scheme}://{parsed.netloc}" +def _set_secret( + client: Together, + config: Config, + state: State, + name: str, + value: str, + description: str, +) -> None: + """Set secret for the deployment""" + deployment_secret_name = f"{config.model_name}-{name}" + try: + client.beta.jig.secrets.retrieve(deployment_secret_name) + client.beta.jig.secrets.update( + deployment_secret_name, + name=deployment_secret_name, + description=description, + value=value, + ) + click.echo(f"\N{CHECK MARK} Updated secret: '{name}'") + except APIStatusError as e: + if hasattr(e, "status_code") and e.status_code == 404: + click.echo("\N{ROCKET} Creating new secret") + client.beta.jig.secrets.create( + name=deployment_secret_name, + value=value, + description=description, + ) + click.echo(f"\N{CHECK MARK} Created secret: {name}") + else: + raise -def _run(cmd: list[str]) -> subprocess.CompletedProcess[str]: - """Run process with defaults""" - return subprocess.run(cmd, capture_output=True, text=True, check=True) + state.secrets[name] = deployment_secret_name + state.save() -def _generate_dockerfile(config: Config) -> str: - """Generate Dockerfile from config""" - apt = "" - if config.image.system_packages: - sys_pkgs = " ".join(config.image.system_packages or []) - apt = f"""RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \\ - apt-get update && \\ - DEBIAN_FRONTEND=noninteractive \\ - apt-get install -y --no-install-recommends {sys_pkgs} && \\ - apt-get clean && rm -rf /var/lib/apt/lists/* -""" +@click.group() +@click.pass_context +def secrets(ctx: click.Context) -> None: + """Manage deployment secrets""" + pass - if env := "\n".join(f"ENV {k}={v}" for k, v in config.image.environment.items()): - env += "\n" - if run := "\n".join(f"RUN {cmd}" for cmd in config.image.run): - run += "\n" +@secrets.command("set") +@click.pass_context +@click.option("--name", required=True, help="Secret name") +@click.option("--value", required=True, help="Secret value") +@click.option("--description", default="", help="Secret description") +@click.option("-c", "--config", "config_path", default=None, help="Configuration file path") +@handle_api_errors("Secrets") +def secrets_set( + ctx: click.Context, + name: str, + value: str, + description: str, + config_path: str | None, +) -> None: + """Set a secret (create or update)""" + client: Together = ctx.obj + config = Config.find(config_path) + state = State.load(config._path.parent, config.model_name) + _set_secret(client, config, state, name, value, description) - copy = "\n".join(f"COPY {file} {file}" for file in _get_files_to_copy(config)) - # Check if .git exists in current directory - if Path(".git").exists(): - git_version_cmd = 'RUN --mount=type=bind,source=.git,target=/git git --git-dir /git describe --tags --exact-match > VERSION || echo "0.0.0-dev" > VERSION' +@secrets.command("unset") +@click.pass_context +@click.option("--name", required=True, help="Secret name to remove") +@click.option("-c", "--config", "config_path", default=None, help="Configuration file path") +@handle_api_errors("Secrets") +def secrets_unset( + ctx: click.Context, # noqa: ARG001 + name: str, + config_path: str | None, +) -> None: + """Remove a secret from both remote and local state""" + config = Config.find(config_path) + state = State.load(config._path.parent, config.model_name) + + if state.secrets.pop(name, ""): + state.save() + click.echo(f"\N{CHECK MARK} Deleted secret '{name}' from local state") else: - git_version_cmd = 'RUN echo "0.0.0-dev" > VERSION' + click.echo(f"\N{CROSS MARK} Secret '{name}' is not set") - return f"""{DOCKERFILE_MANAGED_MARKER} -# Build stage -FROM python:{config.image.python_version} AS builder +@secrets.command("list") +@click.pass_context +@click.option("-c", "--config", "config_path", default=None, help="Configuration file path") +@handle_api_errors("Secrets") +def secrets_list( + ctx: click.Context, + config_path: str | None, +) -> None: + """List all secrets with sync status""" + client: Together = ctx.obj + config = Config.find(config_path) + state = State.load(config._path.parent, config.model_name) -{apt} -# Grab UV to install python packages -COPY --from=ghcr.io/astral-sh/uv /uv /usr/local/bin/uv + prefix = f"{config.model_name}-" -WORKDIR /app -COPY pyproject.toml . -RUN --mount=type=cache,target=/root/.cache/uv \\ - uv pip install --system --compile-bytecode . && \\ - (python -c "import sprocket" 2>/dev/null || (echo "sprocket not found in pyproject.toml, installing from pypi.together.ai..." && uv pip install --system --extra-index-url https://pypi.together.ai/ sprocket)) + # Get remote secrets for this deployment + remote_response = client.beta.jig.secrets.list() + remote_secrets: set[str] = set() -# Final stage - slim image -FROM python:{config.image.python_version}-slim + if hasattr(remote_response, "data") and remote_response.data: + for secret in remote_response.data: + secret_name = getattr(secret, "name", None) + if secret_name and secret_name.startswith(prefix): + # Strip prefix to get local name + remote_secrets.add(secret_name[len(prefix) :]) -{apt} -COPY --from=builder /usr/local/lib/python{config.image.python_version} /usr/local/lib/python{config.image.python_version} -COPY --from=builder /usr/local/bin /usr/local/bin + # Get local secrets + local_secrets = set(state.secrets.keys()) -# Tini for proper signal handling -COPY --from=krallin/ubuntu-tini:latest /usr/local/bin/tini /tini -ENTRYPOINT ["/tini", "--"] + # Combine all secrets + all_secrets = local_secrets | remote_secrets -{env} -{run} -WORKDIR /app -{copy} -ENV DEPLOYMENT_NAME={config.model_name} -# this tag will set the X-Worker-Version header, used for rollout monitoring -{git_version_cmd} + if not all_secrets: + click.echo(f"\N{INFORMATION SOURCE} No secrets configured for deployment '{config.model_name}'") + return -CMD {json.dumps(shlex.split(config.image.cmd))}""" + click.echo(f"\N{INFORMATION SOURCE} Secrets for deployment '{config.model_name}':") + click.echo() + for name in sorted(all_secrets): + in_local = name in local_secrets + in_remote = name in remote_secrets -def _get_files_to_copy(config: Config) -> list[str]: - """Get list of files to copy""" - files = set(config.image.copy) - if config.image.auto_include_git: - try: - if _run(["git", "status", "--porcelain"]).stdout.strip(): - raise RuntimeError("Git repository has uncommitted changes: auto_include_git not allowed.") - git_files = _run(["git", "ls-files"]).stdout.strip().split("\n") - files.update(f for f in git_files if f and f != ".") - except subprocess.CalledProcessError: - pass + if in_local and in_remote: + status = click.style("synced", fg="green") + elif in_local and not in_remote: + status = click.style("local only", fg="yellow") + else: # in_remote and not in_local + status = click.style("remote only", fg="yellow") - if "." in files: - raise ValueError("Copying '.' is not allowed. Please enumerate specific files.") + click.echo(f" - {name} [{status}]") - return sorted(files) +# == Volumes == +# --- File upload --- -def _dockerfile(config: Config) -> bool: - """Generate Dockerfile if appropriate. - Returns True if Dockerfile was generated, False if skipped (user-managed file exists). +async def _create_volume(client: Together, name: str, source: str) -> None: + """Create a volume and upload files""" + source_path = Path(source) + if not source_path.exists(): + raise ValueError(f"Source path does not exist: {source}") + if not source_path.is_dir(): + raise ValueError(f"Source path must be a directory: {source}") - Logic: - - If no Dockerfile exists → generate and return True - - If Dockerfile exists without our marker → skip and return False (user-managed) - - If Dockerfile exists with marker but config is older → skip and return True (no-op) - - If Dockerfile exists with marker and config is newer → regenerate and return True - """ - dockerfile_path = Path(config.dockerfile) + source_prefix = f"{name}/{source_path.name}" - if dockerfile_path.exists(): - first_line = dockerfile_path.read_text().split("\n")[0] - if first_line != DOCKERFILE_MANAGED_MARKER: - return False + click.echo(f"\N{ROCKET} Creating volume '{name}' with source prefix '{source_prefix}'") + try: + volume_response = client.beta.jig.volumes.create( + name=name, + type="readOnly", + content={"type": "files", "source_prefix": source_prefix}, + ) + click.echo(f"\N{CHECK MARK} Volume created: {volume_response.id}") + except Exception as e: + raise RuntimeError(f"Failed to create volume: {e}") from e - # Skip regeneration if config hasn't changed - if config._path and config._path.exists() and dockerfile_path.stat().st_mtime >= config._path.stat().st_mtime: - return True + try: + await Uploader(client).upload_files(source_path, volume_name=name) + except Exception as e: + click.echo(f"\N{CROSS MARK} Upload failed: {e}") + click.echo(f"\N{WASTEBASKET} Cleaning up volume '{name}'") + try: + client.beta.jig.volumes.delete(name) + except Exception as cleanup_error: + click.echo(f"\N{WARNING SIGN} Failed to delete volume: {cleanup_error}") + raise - with open(dockerfile_path, "w") as f: - f.write(_generate_dockerfile(config)) - return True +async def _update_volume(client: Together, name: str, source: str) -> None: + """Update a volume and re-upload files""" + source_path = Path(source) + if not source_path.exists(): + raise ValueError(f"Source path does not exist: {source}") + if not source_path.is_dir(): + raise ValueError(f"Source path must be a directory: {source}") + try: + client.beta.jig.volumes.retrieve(name) + except APIStatusError as e: + if hasattr(e, "status_code") and e.status_code == 404: + raise ValueError(f"Volume '{name}' does not exist") from e + raise -def _get_image(state: State, config: Config, tag: str = "latest") -> str: - """Get full image name""" - return f"{state.registry_base_path}/{config.model_name}:{tag}" + source_prefix = f"{name}/{source_path.name}" + click.echo(f"\N{INFORMATION SOURCE} Uploading files for volume '{name}'") + await Uploader(client).upload_files(source_path, volume_name=name) -def _get_image_with_digest(state: State, config: Config, tag: str = "latest") -> str: - """Get full image name tagged with digest""" + click.echo(f"\N{INFORMATION SOURCE} Updating volume '{name}' with source prefix '{source_prefix}'") + client.beta.jig.volumes.update( + name, + content={"type": "files", "source_prefix": source_prefix}, + ) + click.echo("\N{CHECK MARK} Volume updated successfully") + + +# --- Volumes CLI Commands --- + + +@click.group() +@click.pass_context +def volumes(ctx: click.Context) -> None: + """Manage volumes""" + pass + + +@volumes.command("create") +@click.pass_context +@click.option("--name", required=True, help="Volume name") +@click.option("--source", required=True, help="Source directory path") +@handle_api_errors("Volumes") +def volumes_create( + ctx: click.Context, + name: str, + source: str, +) -> None: + """Create a volume and upload files""" + client: Together = ctx.obj + asyncio.run(_create_volume(client, name, source)) + + +@volumes.command("update") +@click.pass_context +@click.option("--name", required=True, help="Volume name") +@click.option("--source", required=True, help="New source directory path") +@handle_api_errors("Volumes") +def volumes_update( + ctx: click.Context, + name: str, + source: str, +) -> None: + """Update a volume and re-upload files""" + client: Together = ctx.obj + asyncio.run(_update_volume(client, name, source)) + + +@volumes.command("delete") +@click.pass_context +@click.option("--name", required=True, help="Volume name") +@handle_api_errors("Volumes") +def volumes_delete( + ctx: click.Context, + name: str, +) -> None: + """Delete a volume""" + client: Together = ctx.obj + + try: + client.beta.jig.volumes.delete(name) + click.echo(f"\N{CHECK MARK} Deleted volume '{name}'") + except APIStatusError as e: + if hasattr(e, "status_code") and e.status_code == 404: + click.echo(f"\N{CROSS MARK} Volume '{name}' not found") + return + raise + + +@volumes.command("describe") +@click.pass_context +@click.option("--name", required=True, help="Volume name") +@handle_api_errors("Volumes") +def volumes_describe( + ctx: click.Context, + name: str, +) -> None: + """Describe a volume""" + client: Together = ctx.obj + + try: + response = client.beta.jig.volumes.with_raw_response.retrieve(name) + click.echo(json.dumps(response.json(), indent=2)) + except APIStatusError as e: + if hasattr(e, "status_code") and e.status_code == 404: + click.echo(f"\N{CROSS MARK} Volume '{name}' not found") + return + raise + + +@volumes.command("list") +@click.pass_context +@handle_api_errors("Volumes") +def volumes_list(ctx: click.Context) -> None: + """List all volumes""" + client: Together = ctx.obj + response = client.beta.jig.volumes.with_raw_response.list() + click.echo(json.dumps(response.json(), indent=2)) + + +# == Main CLI == +# --- Helper Functions --- + + +def _get_api_base_url(client: Together) -> str: + """Extract base URL (scheme://host) from client, stripping any path like /v1""" + parsed = urlparse(str(client.base_url)) + return f"{parsed.scheme}://{parsed.netloc}" + + +def _run(cmd: list[str]) -> subprocess.CompletedProcess[str]: + """Run process with defaults""" + return subprocess.run(cmd, capture_output=True, text=True, check=True) + + +def _generate_dockerfile(config: Config) -> str: + """Generate Dockerfile from config""" + apt = "" + if config.image.system_packages: + sys_pkgs = " ".join(config.image.system_packages or []) + apt = f"""RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \\ + apt-get update && \\ + DEBIAN_FRONTEND=noninteractive \\ + apt-get install -y --no-install-recommends {sys_pkgs} && \\ + apt-get clean && rm -rf /var/lib/apt/lists/* +""" + + if env := "\n".join(f"ENV {k}={v}" for k, v in config.image.environment.items()): + env += "\n" + + if run := "\n".join(f"RUN {cmd}" for cmd in config.image.run): + run += "\n" + + copy = "\n".join(f"COPY {file} {file}" for file in _get_files_to_copy(config)) + + # Check if .git exists in current directory + if Path(".git").exists(): + git_version_cmd = 'RUN --mount=type=bind,source=.git,target=/git git --git-dir /git describe --tags --exact-match > VERSION || echo "0.0.0-dev" > VERSION' + else: + git_version_cmd = 'RUN echo "0.0.0-dev" > VERSION' + + return f"""{DOCKERFILE_MANAGED_MARKER} + +# Build stage +FROM python:{config.image.python_version} AS builder + +{apt} +# Grab UV to install python packages +COPY --from=ghcr.io/astral-sh/uv /uv /usr/local/bin/uv + +WORKDIR /app +COPY pyproject.toml . +RUN --mount=type=cache,target=/root/.cache/uv \\ + uv pip install --system --compile-bytecode . && \\ + (python -c "import sprocket" 2>/dev/null || (echo "sprocket not found in pyproject.toml, installing from pypi.together.ai..." && uv pip install --system --extra-index-url https://pypi.together.ai/ sprocket)) + +# Final stage - slim image +FROM python:{config.image.python_version}-slim + +{apt} +COPY --from=builder /usr/local/lib/python{config.image.python_version} /usr/local/lib/python{config.image.python_version} +COPY --from=builder /usr/local/bin /usr/local/bin + +# Tini for proper signal handling +COPY --from=krallin/ubuntu-tini:latest /usr/local/bin/tini /tini +ENTRYPOINT ["/tini", "--"] + +{env} +{run} +WORKDIR /app +{copy} +ENV DEPLOYMENT_NAME={config.model_name} +# this tag will set the X-Worker-Version header, used for rollout monitoring +{git_version_cmd} + +CMD {json.dumps(shlex.split(config.image.cmd))}""" + + +def _get_files_to_copy(config: Config) -> list[str]: + """Get list of files to copy""" + files = set(config.image.copy) + if config.image.auto_include_git: + try: + if _run(["git", "status", "--porcelain"]).stdout.strip(): + raise RuntimeError("Git repository has uncommitted changes: auto_include_git not allowed.") + git_files = _run(["git", "ls-files"]).stdout.strip().split("\n") + files.update(f for f in git_files if f and f != ".") + except subprocess.CalledProcessError: + pass + + if "." in files: + raise ValueError("Copying '.' is not allowed. Please enumerate specific files.") + + return sorted(files) + + +def _dockerfile(config: Config) -> bool: + """Generate Dockerfile if appropriate. + + Returns True if Dockerfile was generated, False if skipped (user-managed file exists). + + Logic: + - If no Dockerfile exists → generate and return True + - If Dockerfile exists without our marker → skip and return False (user-managed) + - If Dockerfile exists with marker but config is older → skip and return True (no-op) + - If Dockerfile exists with marker and config is newer → regenerate and return True + """ + dockerfile_path = Path(config.dockerfile) + + if dockerfile_path.exists(): + first_line = dockerfile_path.read_text().split("\n")[0] + if first_line != DOCKERFILE_MANAGED_MARKER: + return False + + # Skip regeneration if config hasn't changed + if config._path and config._path.exists() and dockerfile_path.stat().st_mtime >= config._path.stat().st_mtime: + return True + + with open(dockerfile_path, "w") as f: + f.write(_generate_dockerfile(config)) + + return True + + +def _get_image(state: State, config: Config, tag: str = "latest") -> str: + """Get full image name""" + return f"{state.registry_base_path}/{config.model_name}:{tag}" + + +def _get_image_with_digest(state: State, config: Config, tag: str = "latest") -> str: + """Get full image name tagged with digest""" image_name = _get_image(state, config, tag) if tag != "latest": return image_name @@ -1033,14 +1332,7 @@ def deploy( env_vars.append({"name": "TOGETHER_API_BASE_URL", "value": _get_api_base_url(client)}) if "TOGETHER_API_KEY" not in state.secrets: - _set_secret( - client, - config, - state, - "TOGETHER_API_KEY", - client.api_key, - "Auth key for queue API", - ) + _set_secret(client, config, state, "TOGETHER_API_KEY", client.api_key, "Auth key for queue API") for name, secret_id in state.secrets.items(): env_vars.append({"name": name, "value_from_secret": secret_id}) @@ -1093,490 +1385,166 @@ def handle_create() -> Deployment: else: raise - if detach: - return response.model_dump() - - # Skip tracking if revision didn't change and not scaling up from zero - new_revision_id = _get_current_revision_id(response) - scaling_up = was_scaled_to_zero and response.min_replicas and response.min_replicas > 0 - if old_revision_id and old_revision_id == new_revision_id and not scaling_up: - return None - - return _track_deployment_progress(config.model_name, client) - - -@jig_command -@click.option("--json", "json_output", is_flag=True, help="Output raw JSON") -def status(ctx: click.Context, config_path: str | None, json_output: bool = False) -> None: - """Get deployment status""" - client: Together = ctx.obj - config = Config.find(config_path) - response = client.beta.jig.retrieve(config.model_name) - - if json_output: - click.echo(response.model_dump_json(indent=2)) - else: - click.echo(format_deployment_status(response)) - - -@jig_command -def endpoint(ctx: click.Context, config_path: str | None) -> None: - """Get deployment endpoint URL""" - client: Together = ctx.obj - config = Config.find(config_path) - click.echo(f"{_get_api_base_url(client)}/v1/deployment-request/{config.model_name}") - - -@jig_command -@click.option("--follow", is_flag=True, help="Follow log output") -def logs(ctx: click.Context, follow: bool, config_path: str | None) -> None: - """Get deployment logs""" - client: Together = ctx.obj - config = Config.find(config_path) - - if not follow: - response = client.beta.jig.retrieve_logs(config.model_name) - if hasattr(response, "lines") and response.lines: - for log_line in response.lines: - click.echo(log_line) - else: - click.echo("No logs available") - return - - # Stream logs using SDK streaming response - try: - with client.beta.jig.with_streaming_response.retrieve_logs(config.model_name) as streaming_response: - for line in streaming_response.iter_lines(): - if line: - for log_line in json.loads(line).get("lines", []): - click.echo(log_line) - except KeyboardInterrupt: - click.echo("\nStopped following logs") - except Exception as e: - click.echo(f"\nConnection ended: {e}") - - -@jig_command -def destroy(ctx: click.Context, config_path: str | None) -> None: - """Destroy deployment""" - client: Together = ctx.obj - config = Config.find(config_path) - client.beta.jig.destroy(config.model_name) - click.echo(f"\N{WASTEBASKET} Destroyed {config.model_name}") - - -@jig_command -@click.option("--prompt", default=None, help="Job prompt") -@click.option("--payload", default=None, help="Job payload JSON") -@click.option("--watch", is_flag=True, help="Watch job status until completion") -def submit( - ctx: click.Context, - prompt: str | None, - payload: str | None, - watch: bool, - config_path: str | None, -) -> None: - """Submit a job to the deployment""" - client: Together = ctx.obj - config = Config.find(config_path) - - if not prompt and not payload: - raise click.UsageError("Either --prompt or --payload required") - - raw_response = client.beta.jig.queue.with_raw_response.submit( - model=config.model_name, - payload=json.loads(payload) if payload else {"prompt": prompt}, - priority=1, - ) - - # Getting raw response and parsing ourselves here due to Stainless limitation with - # Pydantic aliases not handled correctly (both fields are present in the model) - submit_response = QueueSubmitResponse.model_validate_json(raw_response.read()) - - click.echo("\N{CHECK MARK} Submitted job") - click.echo(submit_response.model_dump_json(indent=2)) - - if not watch or not submit_response.request_id: - return - - click.echo(f"\nWatching job {submit_response.request_id}...") - last_status: str | None = None - while True: - try: - response = client.beta.jig.queue.retrieve( - model=config.model_name, - request_id=submit_response.request_id, - ) - current_status = response.status - if current_status != last_status: - click.echo(response.model_dump_json(indent=2)) - last_status = current_status - - if current_status in ["done", "failed", "finished", "error", "canceled"]: - if current_status != "done": - ctx.exit(1) - break - - time.sleep(1) - - except KeyboardInterrupt: - click.echo(f"\nStopped watching {submit_response.request_id}") - ctx.exit(130) - - -@jig_command -@click.option("--request-id", required=True, help="Job request ID") -def job_status(ctx: click.Context, request_id: str, config_path: str | None) -> None: - """Get status of a specific job""" - client: Together = ctx.obj - config = Config.find(config_path) - - response = client.beta.jig.queue.retrieve( - model=config.model_name, - request_id=request_id, - ) - click.echo(response.model_dump_json(indent=2)) - - -@jig_command -def queue_status(ctx: click.Context, config_path: str | None) -> None: - """Get queue metrics for the deployment""" - client: Together = ctx.obj - config = Config.find(config_path) - - response = client.beta.jig.queue.with_raw_response.metrics(model=config.model_name) - click.echo(json.dumps(response.json(), indent=2)) - - -@click.command("list") -@handle_api_errors("Jig") -@click.pass_context -def list_deployments(ctx: click.Context) -> None: - """List all deployments""" - client: Together = ctx.obj - response = client.beta.jig.with_raw_response.list() - click.echo(json.dumps(response.json(), indent=2)) - - -# == Secrets == - - -def _set_secret( - client: Together, - config: Config, - state: State, - name: str, - value: str, - description: str, -) -> None: - """Set secret for the deployment""" - deployment_secret_name = f"{config.model_name}-{name}" - - try: - client.beta.jig.secrets.retrieve(deployment_secret_name) - client.beta.jig.secrets.update( - deployment_secret_name, - name=deployment_secret_name, - description=description, - value=value, - ) - click.echo(f"\N{CHECK MARK} Updated secret: '{name}'") - except APIStatusError as e: - if hasattr(e, "status_code") and e.status_code == 404: - click.echo("\N{ROCKET} Creating new secret") - client.beta.jig.secrets.create( - name=deployment_secret_name, - value=value, - description=description, - ) - click.echo(f"\N{CHECK MARK} Created secret: {name}") - else: - raise - - state.secrets[name] = deployment_secret_name - state.save() - - -@click.group() -@click.pass_context -def secrets(ctx: click.Context) -> None: - """Manage deployment secrets""" - pass - - -@secrets.command("set") -@click.pass_context -@click.option("--name", required=True, help="Secret name") -@click.option("--value", required=True, help="Secret value") -@click.option("--description", default="", help="Secret description") -@click.option("-c", "--config", "config_path", default=None, help="Configuration file path") -@handle_api_errors("Secrets") -def secrets_set( - ctx: click.Context, - name: str, - value: str, - description: str, - config_path: str | None, -) -> None: - """Set a secret (create or update)""" - client: Together = ctx.obj - config = Config.find(config_path) - state = State.load(config._path.parent, config.model_name) - - deployment_secret_name = f"{config.model_name}-{name}" - - try: - client.beta.jig.secrets.retrieve(deployment_secret_name) - # Secret exists, update it - client.beta.jig.secrets.update( - deployment_secret_name, - name=deployment_secret_name, - description=description, - value=value, - ) - click.echo(f"\N{CHECK MARK} Updated secret: '{name}'") - except APIStatusError as e: - if hasattr(e, "status_code") and e.status_code == 404: - click.echo("\N{ROCKET} Creating new secret") - client.beta.jig.secrets.create( - name=deployment_secret_name, - value=value, - description=description, - ) - click.echo(f"\N{CHECK MARK} Created secret: {name}") - else: - raise - - state.secrets[name] = deployment_secret_name - state.save() - - -@secrets.command("unset") -@click.pass_context -@click.option("--name", required=True, help="Secret name to remove") -@click.option("-c", "--config", "config_path", default=None, help="Configuration file path") -@handle_api_errors("Secrets") -def secrets_unset( - ctx: click.Context, # noqa: ARG001 - name: str, - config_path: str | None, -) -> None: - """Remove a secret from both remote and local state""" - config = Config.find(config_path) - state = State.load(config._path.parent, config.model_name) - - if state.secrets.pop(name, ""): - state.save() - click.echo(f"\N{CHECK MARK} Deleted secret '{name}' from local state") - else: - click.echo(f"\N{CROSS MARK} Secret '{name}' is not set") - - -@secrets.command("list") -@click.pass_context -@click.option("-c", "--config", "config_path", default=None, help="Configuration file path") -@handle_api_errors("Secrets") -def secrets_list( - ctx: click.Context, - config_path: str | None, -) -> None: - """List all secrets with sync status""" - client: Together = ctx.obj - config = Config.find(config_path) - state = State.load(config._path.parent, config.model_name) - - prefix = f"{config.model_name}-" - - # Get remote secrets for this deployment - remote_response = client.beta.jig.secrets.list() - remote_secrets: set[str] = set() - - if hasattr(remote_response, "data") and remote_response.data: - for secret in remote_response.data: - secret_name = getattr(secret, "name", None) - if secret_name and secret_name.startswith(prefix): - # Strip prefix to get local name - remote_secrets.add(secret_name[len(prefix) :]) - - # Get local secrets - local_secrets = set(state.secrets.keys()) - - # Combine all secrets - all_secrets = local_secrets | remote_secrets - - if not all_secrets: - click.echo(f"\N{INFORMATION SOURCE} No secrets configured for deployment '{config.model_name}'") - return + if detach: + return response.model_dump() - click.echo(f"\N{INFORMATION SOURCE} Secrets for deployment '{config.model_name}':") - click.echo() + # Skip tracking if revision didn't change and not scaling up from zero + new_revision_id = _get_current_revision_id(response) + scaling_up = was_scaled_to_zero and response.min_replicas and response.min_replicas > 0 + if old_revision_id and old_revision_id == new_revision_id and not scaling_up: + return None - for name in sorted(all_secrets): - in_local = name in local_secrets - in_remote = name in remote_secrets + return _track_deployment_progress(config.model_name, client) - if in_local and in_remote: - status = click.style("synced", fg="green") - elif in_local and not in_remote: - status = click.style("local only", fg="yellow") - else: # in_remote and not in_local - status = click.style("remote only", fg="yellow") - click.echo(f" - {name} [{status}]") +@jig_command +@click.option("--json", "json_output", is_flag=True, help="Output raw JSON") +def status(ctx: click.Context, config_path: str | None, json_output: bool = False) -> None: + """Get deployment status""" + client: Together = ctx.obj + config = Config.find(config_path) + response = client.beta.jig.retrieve(config.model_name) + if json_output: + click.echo(response.model_dump_json(indent=2)) + else: + click.echo(format_deployment_status(response)) -# == Volumes == -# --- File upload --- +@jig_command +def endpoint(ctx: click.Context, config_path: str | None) -> None: + """Get deployment endpoint URL""" + client: Together = ctx.obj + config = Config.find(config_path) + click.echo(f"{_get_api_base_url(client)}/v1/deployment-request/{config.model_name}") -async def _create_volume(client: Together, name: str, source: str) -> None: - """Create a volume and upload files""" - source_path = Path(source) - if not source_path.exists(): - raise ValueError(f"Source path does not exist: {source}") - if not source_path.is_dir(): - raise ValueError(f"Source path must be a directory: {source}") - source_prefix = f"{name}/{source_path.name}" +@jig_command +@click.option("--follow", is_flag=True, help="Follow log output") +def logs(ctx: click.Context, follow: bool, config_path: str | None) -> None: + """Get deployment logs""" + client: Together = ctx.obj + config = Config.find(config_path) - click.echo(f"\N{ROCKET} Creating volume '{name}' with source prefix '{source_prefix}'") - try: - volume_response = client.beta.jig.volumes.create( - name=name, - type="readOnly", - content={"type": "files", "source_prefix": source_prefix}, - ) - click.echo(f"\N{CHECK MARK} Volume created: {volume_response.id}") - except Exception as e: - raise RuntimeError(f"Failed to create volume: {e}") from e + if not follow: + response = client.beta.jig.retrieve_logs(config.model_name) + if hasattr(response, "lines") and response.lines: + for log_line in response.lines: + click.echo(log_line) + else: + click.echo("No logs available") + return + # Stream logs using SDK streaming response try: - await Uploader(client).upload_files(source_path, volume_name=name) + with client.beta.jig.with_streaming_response.retrieve_logs(config.model_name) as streaming_response: + for line in streaming_response.iter_lines(): + if line: + for log_line in json.loads(line).get("lines", []): + click.echo(log_line) + except KeyboardInterrupt: + click.echo("\nStopped following logs") except Exception as e: - click.echo(f"\N{CROSS MARK} Upload failed: {e}") - click.echo(f"\N{WASTEBASKET} Cleaning up volume '{name}'") - try: - client.beta.jig.volumes.delete(name) - except Exception as cleanup_error: - click.echo(f"\N{WARNING SIGN} Failed to delete volume: {cleanup_error}") - raise + click.echo(f"\nConnection ended: {e}") -async def _update_volume(client: Together, name: str, source: str) -> None: - """Update a volume and re-upload files""" - source_path = Path(source) - if not source_path.exists(): - raise ValueError(f"Source path does not exist: {source}") - if not source_path.is_dir(): - raise ValueError(f"Source path must be a directory: {source}") +@jig_command +def destroy(ctx: click.Context, config_path: str | None) -> None: + """Destroy deployment""" + client: Together = ctx.obj + config = Config.find(config_path) + client.beta.jig.destroy(config.model_name) + click.echo(f"\N{WASTEBASKET} Destroyed {config.model_name}") - try: - client.beta.jig.volumes.retrieve(name) - except APIStatusError as e: - if hasattr(e, "status_code") and e.status_code == 404: - raise ValueError(f"Volume '{name}' does not exist") from e - raise - source_prefix = f"{name}/{source_path.name}" +@jig_command +@click.option("--prompt", default=None, help="Job prompt") +@click.option("--payload", default=None, help="Job payload JSON") +@click.option("--watch", is_flag=True, help="Watch job status until completion") +def submit( + ctx: click.Context, + prompt: str | None, + payload: str | None, + watch: bool, + config_path: str | None, +) -> None: + """Submit a job to the deployment""" + client: Together = ctx.obj + config = Config.find(config_path) - click.echo(f"\N{INFORMATION SOURCE} Uploading files for volume '{name}'") - await Uploader(client).upload_files(source_path, volume_name=name) + if not prompt and not payload: + raise click.UsageError("Either --prompt or --payload required") - click.echo(f"\N{INFORMATION SOURCE} Updating volume '{name}' with source prefix '{source_prefix}'") - client.beta.jig.volumes.update( - name, - content={"type": "files", "source_prefix": source_prefix}, + raw_response = client.beta.jig.queue.with_raw_response.submit( + model=config.model_name, + payload=json.loads(payload) if payload else {"prompt": prompt}, + priority=1, ) - click.echo("\N{CHECK MARK} Volume updated successfully") + # Getting raw response and parsing ourselves here due to Stainless limitation with + # Pydantic aliases not handled correctly (both fields are present in the model) + submit_response = QueueSubmitResponse.model_validate_json(raw_response.read()) -# --- Volumes CLI Commands --- - + click.echo("\N{CHECK MARK} Submitted job") + click.echo(submit_response.model_dump_json(indent=2)) -@click.group() -@click.pass_context -def volumes(ctx: click.Context) -> None: - """Manage volumes""" - pass + if not watch or not submit_response.request_id: + return + click.echo(f"\nWatching job {submit_response.request_id}...") + last_status: str | None = None + while True: + try: + response = client.beta.jig.queue.retrieve( + model=config.model_name, + request_id=submit_response.request_id, + ) + current_status = response.status + if current_status != last_status: + click.echo(response.model_dump_json(indent=2)) + last_status = current_status -@volumes.command("create") -@click.pass_context -@click.option("--name", required=True, help="Volume name") -@click.option("--source", required=True, help="Source directory path") -@handle_api_errors("Volumes") -def volumes_create( - ctx: click.Context, - name: str, - source: str, -) -> None: - """Create a volume and upload files""" - client: Together = ctx.obj - asyncio.run(_create_volume(client, name, source)) + if current_status in ["done", "failed", "finished", "error", "canceled"]: + if current_status != "done": + ctx.exit(1) + break + time.sleep(1) -@volumes.command("update") -@click.pass_context -@click.option("--name", required=True, help="Volume name") -@click.option("--source", required=True, help="New source directory path") -@handle_api_errors("Volumes") -def volumes_update( - ctx: click.Context, - name: str, - source: str, -) -> None: - """Update a volume and re-upload files""" - client: Together = ctx.obj - asyncio.run(_update_volume(client, name, source)) + except KeyboardInterrupt: + click.echo(f"\nStopped watching {submit_response.request_id}") + ctx.exit(130) -@volumes.command("delete") -@click.pass_context -@click.option("--name", required=True, help="Volume name") -@handle_api_errors("Volumes") -def volumes_delete( - ctx: click.Context, - name: str, -) -> None: - """Delete a volume""" +@jig_command +@click.option("--request-id", required=True, help="Job request ID") +def job_status(ctx: click.Context, request_id: str, config_path: str | None) -> None: + """Get status of a specific job""" client: Together = ctx.obj + config = Config.find(config_path) - try: - client.beta.jig.volumes.delete(name) - click.echo(f"\N{CHECK MARK} Deleted volume '{name}'") - except APIStatusError as e: - if hasattr(e, "status_code") and e.status_code == 404: - click.echo(f"\N{CROSS MARK} Volume '{name}' not found") - return - raise + response = client.beta.jig.queue.retrieve( + model=config.model_name, + request_id=request_id, + ) + click.echo(response.model_dump_json(indent=2)) -@volumes.command("describe") -@click.pass_context -@click.option("--name", required=True, help="Volume name") -@handle_api_errors("Volumes") -def volumes_describe( - ctx: click.Context, - name: str, -) -> None: - """Describe a volume""" +@jig_command +def queue_status(ctx: click.Context, config_path: str | None) -> None: + """Get queue metrics for the deployment""" client: Together = ctx.obj + config = Config.find(config_path) - try: - response = client.beta.jig.volumes.with_raw_response.retrieve(name) - click.echo(json.dumps(response.json(), indent=2)) - except APIStatusError as e: - if hasattr(e, "status_code") and e.status_code == 404: - click.echo(f"\N{CROSS MARK} Volume '{name}' not found") - return - raise + response = client.beta.jig.queue.with_raw_response.metrics(model=config.model_name) + click.echo(json.dumps(response.json(), indent=2)) -@volumes.command("list") +@click.command("list") +@handle_api_errors("Jig") @click.pass_context -@handle_api_errors("Volumes") -def volumes_list(ctx: click.Context) -> None: - """List all volumes""" +def list_deployments(ctx: click.Context) -> None: + """List all deployments""" client: Together = ctx.obj - response = client.beta.jig.volumes.with_raw_response.list() + response = client.beta.jig.with_raw_response.list() click.echo(json.dumps(response.json(), indent=2)) From 8b35fed09ca2118649d4b85eb072b0d97b1c62fe Mon Sep 17 00:00:00 2001 From: technillogue Date: Tue, 17 Feb 2026 19:10:50 -0500 Subject: [PATCH 11/51] simplify secrets slightly --- src/together/lib/cli/api/beta/jig/jig.py | 30 ++++++++---------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 022135f5..4823a7ec 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -463,10 +463,9 @@ def secrets_set( config_path: str | None, ) -> None: """Set a secret (create or update)""" - client: Together = ctx.obj config = Config.find(config_path) state = State.load(config._path.parent, config.model_name) - _set_secret(client, config, state, name, value, description) + _set_secret(ctx.obj, config, state, name, value, description) @secrets.command("unset") @@ -505,31 +504,22 @@ def secrets_list( prefix = f"{config.model_name}-" - # Get remote secrets for this deployment - remote_response = client.beta.jig.secrets.list() - remote_secrets: set[str] = set() - - if hasattr(remote_response, "data") and remote_response.data: - for secret in remote_response.data: - secret_name = getattr(secret, "name", None) - if secret_name and secret_name.startswith(prefix): - # Strip prefix to get local name - remote_secrets.add(secret_name[len(prefix) :]) - - # Get local secrets local_secrets = set(state.secrets.keys()) + remote_secrets: set[str] = set() + # Get all remote secrets then filter for this deployment + for secret in client.beta.jig.secrets.list().data or []: + if (name := secret.name) and name.startswith(prefix): + # Strip prefix to get local name + remote_secrets.add(name.removeprefix(prefix)) - # Combine all secrets - all_secrets = local_secrets | remote_secrets - - if not all_secrets: + if not local_secrets and not remote_secrets: click.echo(f"\N{INFORMATION SOURCE} No secrets configured for deployment '{config.model_name}'") return click.echo(f"\N{INFORMATION SOURCE} Secrets for deployment '{config.model_name}':") click.echo() - for name in sorted(all_secrets): + for name in sorted(local_secrets | remote_secrets): in_local = name in local_secrets in_remote = name in remote_secrets @@ -537,7 +527,7 @@ def secrets_list( status = click.style("synced", fg="green") elif in_local and not in_remote: status = click.style("local only", fg="yellow") - else: # in_remote and not in_local + else: status = click.style("remote only", fg="yellow") click.echo(f" - {name} [{status}]") From 1c847e962c4c78ab6dc87957ea30d67a1480a7b1 Mon Sep 17 00:00:00 2001 From: technillogue Date: Tue, 17 Feb 2026 19:16:27 -0500 Subject: [PATCH 12/51] rename warm image vars --- src/together/lib/cli/api/beta/jig/jig.py | 31 +++++++++++------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 4823a7ec..27380c39 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -794,8 +794,8 @@ def _dockerfile(config: Config) -> bool: Logic: - If no Dockerfile exists → generate and return True - If Dockerfile exists without our marker → skip and return False (user-managed) - - If Dockerfile exists with marker but config is older → skip and return True (no-op) - - If Dockerfile exists with marker and config is newer → regenerate and return True + - Else and config is older → skip and return True (no-op) + - Else → regenerate and return True """ dockerfile_path = Path(config.dockerfile) @@ -808,8 +808,7 @@ def _dockerfile(config: Config) -> bool: if config._path and config._path.exists() and dockerfile_path.stat().st_mtime >= config._path.stat().st_mtime: return True - with open(dockerfile_path, "w") as f: - f.write(_generate_dockerfile(config)) + dockerfile_path.write_text(_generate_dockerfile(config)) return True @@ -855,8 +854,6 @@ def _build_warm_image(base_image: str) -> None: The cache directory is mounted at /app/torch_cache and the user's code should set the appropriate env var (TORCHINDUCTOR_CACHE_DIR, TKCC_OUTPUT_DIR, etc.) to point there. """ - import os - cache_dir = Path(".") / WARMUP_DEST # Clean any existing cache try: @@ -870,17 +867,17 @@ def _build_warm_image(base_image: str) -> None: # Run container with GPU and RUN_AND_EXIT=1 # Mount current dir as /app so warmup_inputs can reference local weights # Mount cache dir for compile artifacts - warmup_cmd = ["docker", "run", "--rm", "--gpus", "all", "-e", "RUN_AND_EXIT=1"] - warmup_cmd.extend(["-e", f"{WARMUP_ENV_NAME}=/app/{WARMUP_DEST}"]) - warmup_cmd.extend(["-v", f"{Path.cwd().absolute()}:/app"]) + cmd = ["docker", "run", "--rm", "--gpus", "all", "-e", "RUN_AND_EXIT=1"] + cmd.extend(["-e", f"{WARMUP_ENV_NAME}=/app/{WARMUP_DEST}"]) + cmd.extend(["-v", f"{Path.cwd().absolute()}:/app"]) # if MODEL_PRELOAD_PATH is set, also mount that (e.g. ~/.cache/huggingface) if weights_path := os.getenv("MODEL_PRELOAD_PATH"): - warmup_cmd.extend(["-v", f"{weights_path}:{weights_path}"]) - warmup_cmd.extend(["-e", f"MODEL_PRELOAD_PATH={weights_path}"]) - warmup_cmd.append(base_image) + cmd.extend(["-v", f"{weights_path}:{weights_path}"]) + cmd.extend(["-e", f"MODEL_PRELOAD_PATH={weights_path}"]) + cmd.append(base_image) - click.echo(f"Running: {' '.join(warmup_cmd)}") - result = subprocess.run(warmup_cmd) + click.echo(f"Running: {' '.join(cmd)}") + result = subprocess.run(cmd) if result.returncode != 0: raise RuntimeError(f"Warmup failed with code {result.returncode}") @@ -899,10 +896,10 @@ def _build_warm_image(base_image: str) -> None: cache_dockerfile.write_text(dockerfile_content) click.echo("\N{PACKAGE} Building final image with cache...") - cmd = ["docker", "build", "--platform", "linux/amd64", "-t", base_image] - cmd.extend(["-f", str(cache_dockerfile), "."]) + final_cmd = ["docker", "build", "--platform", "linux/amd64", "-t", base_image] + final_cmd.extend(["-f", str(cache_dockerfile), "."]) - if subprocess.run(cmd).returncode != 0: + if subprocess.run(final_cmd).returncode != 0: cache_dockerfile.unlink(missing_ok=True) raise RuntimeError("Cache image build failed") cache_dockerfile.unlink(missing_ok=True) From b967cef7f20fdacc2cb55a16b2b95ad2de743bf0 Mon Sep 17 00:00:00 2001 From: technillogue Date: Tue, 17 Feb 2026 19:18:22 -0500 Subject: [PATCH 13/51] properly use 3.9 features --- src/together/lib/cli/api/beta/jig/jig.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 27380c39..04db3cce 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -12,7 +12,7 @@ import asyncio import subprocess from enum import Enum -from typing import TYPE_CHECKING, Any, Union, Callable, Optional +from typing import TYPE_CHECKING, Any, Union, Callable from pathlib import Path from datetime import datetime from itertools import groupby @@ -55,11 +55,11 @@ class ImageConfig: """Container image configuration from pyproject.toml""" python_version: str = "3.11" - system_packages: list[str] = field(default_factory=list[str]) - environment: dict[str, str] = field(default_factory=dict[str, str]) - run: list[str] = field(default_factory=list[str]) + system_packages: list[str] = field(default_factory=list) + environment: dict[str, str] = field(default_factory=dict) + run: list[str] = field(default_factory=list) cmd: str = "python app.py" - copy: list[str] = field(default_factory=list[str]) + copy: list[str] = field(default_factory=list) auto_include_git: bool = False @classmethod @@ -95,12 +95,12 @@ class DeployConfig: min_replicas: int = 1 max_replicas: int = 1 port: int = 8000 - environment_variables: dict[str, str] = field(default_factory=dict[str, str]) - command: Optional[list[str]] = None - autoscaling: dict[str, str] = field(default_factory=dict[str, str]) + environment_variables: dict[str, str] = field(default_factory=dict) + command: list[str] | None = None + autoscaling: dict[str, str] = field(default_factory=dict) health_check_path: str = "/health" termination_grace_period_seconds: int = 300 - volume_mounts: list[VolumeMount] = field(default_factory=list[VolumeMount]) + volume_mounts: list[VolumeMount] = field(default_factory=list) @classmethod def from_dict(cls, data: dict[str, Any]) -> DeployConfig: @@ -166,7 +166,7 @@ def __post_init__(self) -> None: raise click.UsageError(f"Invalid {self._path}: {err}") @classmethod - def find(cls, config_path: Optional[str] = None, init: bool = False) -> Config: + def find(cls, config_path: str | None = None, init: bool = False) -> Config: """Find specified config_path, pyproject.toml, or jig.toml""" if config_path: found_path = Path(config_path) @@ -1029,7 +1029,7 @@ def _process_replica_event( return ReplicaTrackingResult.CONTINUE -def _track_deployment_progress(deployment_name: str, client: Together) -> Optional[dict[str, Any]]: +def _track_deployment_progress(deployment_name: str, client: Together) -> dict[str, Any] | None: """Track deployment progress until ready or failed. Polls deployment status every 3 seconds until: @@ -1268,7 +1268,7 @@ def deploy( docker_args: str | None, existing_image: str | None, config_path: str | None, -) -> Optional[dict[str, Any]]: +) -> dict[str, Any] | None: """Deploy model""" client: Together = ctx.obj config = Config.find(config_path) From 250546b719f56a869d0152c264a27450d295ec6a Mon Sep 17 00:00:00 2001 From: technillogue Date: Tue, 17 Feb 2026 19:30:26 -0500 Subject: [PATCH 14/51] start simplifying deployment tracking --- src/together/lib/cli/api/beta/jig/jig.py | 37 +++++++++--------------- 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 04db3cce..b1939462 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -24,7 +24,7 @@ from together import Together from together._exceptions import APIStatusError from together.lib.cli.api._utils import handle_api_errors -from together.types.beta.deployment import Deployment +from together.types.beta.deployment import Deployment, ReplicaEvents from together.lib.cli.api.beta.jig._uploader import Uploader from together.types.beta.jig.queue_submit_response import QueueSubmitResponse @@ -906,16 +906,15 @@ def _build_warm_image(base_image: str) -> None: click.echo("\N{CHECK MARK} Final image with cache built") -def _get_current_revision_id(deployment: Any) -> str: +def _get_current_revision_id(d: Deployment) -> str: """Extract current revision ID from deployment environment variables.""" - env_vars: list[Any] = deployment.environment_variables or [] - for env_var in env_vars: - if env_var.name == "TOGETHER_DEPLOYMENT_REVISION_ID": - return str(env_var.value) + for var in d.environment_variables or []: + if var.name == "TOGETHER_DEPLOYMENT_REVISION_ID": + return str(var.value) return "" -def _print_replica_failure(event: Any) -> None: +def _print_replica_failure(event: ReplicaEvents) -> None: """Print replica failure details.""" if event.replica_status_reason: click.echo(f" Reason: {event.replica_status_reason}") @@ -928,9 +927,8 @@ def _fetch_and_print_logs(client: Together, deployment_name: str, replica_id: st click.echo(f"\n--- Logs for {replica_id} ---") try: response = client.beta.jig.retrieve_logs(deployment_name, replica_id=replica_id) - if hasattr(response, "lines") and response.lines: - for log_line in response.lines: - click.echo(log_line) + for log_line in response.lines or []: + click.echo(log_line) else: click.echo("No logs available") except Exception as e: @@ -938,13 +936,6 @@ def _fetch_and_print_logs(client: Together, deployment_name: str, replica_id: st click.echo("--- End of logs ---\n") -def _is_volume_preload_done(event: Any) -> bool: - """Check if volume preload is complete or not applicable.""" - if not event.volume_preload_status: - return True # No volume preload - return bool(event.volume_preload_completed_at) - - class ReplicaTrackingResult(str, Enum): """Result of processing a single replica event.""" @@ -955,7 +946,7 @@ class ReplicaTrackingResult(str, Enum): def _process_replica_event( replica_id: str, - event: Any, + event: ReplicaEvents, states: set[str], replica_ready_wait_start: dict[str, float], ready_timeout: float, @@ -966,7 +957,7 @@ def _process_replica_event( Updates `states` and `replica_ready_wait_start` as side effects. """ - volume_done = _is_volume_preload_done(event) + volume_done = not event.volume_preload_status or bool(event.volume_preload_completed_at) # Track volume preload progress if event.volume_preload_status: @@ -1000,11 +991,9 @@ def _process_replica_event( # Check for stuck in Running state without becoming ready if event.replica_status == "Running" and volume_done: - if replica_id not in replica_ready_wait_start: - replica_ready_wait_start[replica_id] = time.time() - - wait_duration = time.time() - replica_ready_wait_start[replica_id] - if wait_duration > ready_timeout: + # If wait start time is not set, set it to now + wait_start = replica_ready_wait_start.setdefault(replica_id, time.time()) + if time.time() - wait_start > ready_timeout: click.echo( f"\N{CROSS MARK} [{replica_id}] Container is running but " f"not ready to serve requests after {ready_timeout} seconds" From b5f024b4b97a2d5c03dc89887ada11f814545de7 Mon Sep 17 00:00:00 2001 From: technillogue Date: Tue, 17 Feb 2026 19:52:05 -0500 Subject: [PATCH 15/51] fix some bugs and lints --- src/together/lib/cli/api/beta/jig/jig.py | 34 +++++++++++------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index b1939462..b296f229 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -146,7 +146,7 @@ def validate(value: Any, value_type: type, path: str = "") -> str | None: return None if not isinstance(value, value_type): - return f"{path}: expected {type(value).__name__}, got {value!r}" + return f"{path}: expected {value_type.__name__}, got {value!r}" # pyright: ignore return None @@ -209,7 +209,7 @@ def load(cls, data: dict[str, Any], path: Path) -> Config: else: jig_config = data if name := jig_config.get("name"): - tip = "update `name` in {path}" + tip = f"update `name` in {path}" else: name = path.resolve().parent.name tip = f"rename your folder or add `name` to {path}" @@ -242,8 +242,8 @@ class State: _config_dir: Path _project_name: str registry_base_path: str = "" - secrets: dict[str, str] = field(default_factory=dict[str, str]) - volumes: dict[str, str] = field(default_factory=dict[str, str]) + secrets: dict[str, str] = field(default_factory=dict) + volumes: dict[str, str] = field(default_factory=dict) @classmethod def from_dict(cls, config_dir: Path, project_name: str, **data: Any) -> State: @@ -709,7 +709,7 @@ def _generate_dockerfile(config: Config) -> str: """Generate Dockerfile from config""" apt = "" if config.image.system_packages: - sys_pkgs = " ".join(config.image.system_packages or []) + sys_pkgs = " ".join(config.image.system_packages) apt = f"""RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \\ apt-get update && \\ DEBIAN_FRONTEND=noninteractive \\ @@ -843,7 +843,7 @@ def _ensure_registry_base_path(client: Together, state: State) -> None: response.raise_for_status() data = response.json() # Strip protocol prefix - Docker tags don't support URLs - state.registry_base_path = data["base-path"].removeprefix("http://").removeprefix("http://") + state.registry_base_path = data["base-path"].removeprefix("http://").removeprefix("https://") state.save() @@ -926,9 +926,9 @@ def _fetch_and_print_logs(client: Together, deployment_name: str, replica_id: st """Fetch and print logs for a specific replica.""" click.echo(f"\n--- Logs for {replica_id} ---") try: - response = client.beta.jig.retrieve_logs(deployment_name, replica_id=replica_id) - for log_line in response.lines or []: - click.echo(log_line) + if lines := client.beta.jig.retrieve_logs(deployment_name, replica_id=replica_id).lines: + for line in lines: + click.echo(line) else: click.echo("No logs available") except Exception as e: @@ -1177,9 +1177,6 @@ def build( config_path: str | None, ) -> None: """Build container image""" - import os - import shlex as shlex_module - client: Together = ctx.obj config = Config.find(config_path) state = State.load(config._path.parent, config.model_name) @@ -1200,7 +1197,7 @@ def build( # Add extra docker args from flag or env extra_args = docker_args or os.getenv("DOCKER_BUILD_EXTRA_ARGS", "") if extra_args: - cmd.extend(shlex_module.split(extra_args)) + cmd.extend(shlex.split(extra_args)) if subprocess.run(cmd).returncode != 0: raise RuntimeError("Build failed") @@ -1403,18 +1400,17 @@ def logs(ctx: click.Context, follow: bool, config_path: str | None) -> None: config = Config.find(config_path) if not follow: - response = client.beta.jig.retrieve_logs(config.model_name) - if hasattr(response, "lines") and response.lines: - for log_line in response.lines: - click.echo(log_line) + if lines := client.beta.jig.retrieve_logs(config.model_name).lines: + for line in lines: + click.echo(line) else: click.echo("No logs available") return # Stream logs using SDK streaming response try: - with client.beta.jig.with_streaming_response.retrieve_logs(config.model_name) as streaming_response: - for line in streaming_response.iter_lines(): + with client.beta.jig.with_streaming_response.retrieve_logs(config.model_name) as stream: + for line in stream.iter_lines(): if line: for log_line in json.loads(line).get("lines", []): click.echo(log_line) From 45f0c320d541b46c1c9f619c048e3a3a8593775c Mon Sep 17 00:00:00 2001 From: technillogue Date: Tue, 17 Feb 2026 20:44:59 -0500 Subject: [PATCH 16/51] remove some dead code, fix a KeyError, factor out validate_source --- src/together/lib/cli/api/beta/jig/jig.py | 36 ++++++++++++------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index b296f229..e78d0d33 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -104,10 +104,10 @@ class DeployConfig: @classmethod def from_dict(cls, data: dict[str, Any]) -> DeployConfig: - deploy_config = {k: v for k, v in data.items() if k in cls.__annotations__} - if isinstance((mounts := deploy_config.get("volume_mounts")), list): - deploy_config["volume_mounts"] = [VolumeMount.from_dict(vm) for vm in mounts] # pyright: ignore - return cls(**deploy_config) + cfg = {k: v for k, v in data.items() if k in cls.__annotations__} + if isinstance((mounts := cfg.get("volume_mounts")), list): + cfg["volume_mounts"] = [VolumeMount.from_dict(vm) for vm in mounts] # pyright: ignore + return cls(**cfg) def validate(value: Any, value_type: type, path: str = "") -> str | None: @@ -220,7 +220,7 @@ def load(cls, data: dict[str, Any], path: Path) -> Config: jig_config["deploy"]["autoscaling"] = autoscaling # Support volume_mounts at jig level (merge into deploy config) - jig_config["deploy"]["volume_mounts"] = jig_config.get("volume_mounts", []) + jig_config.setdefault("deploy", {})["volume_mounts"] = jig_config.get("volume_mounts", []) return cls( image=ImageConfig.from_dict(jig_config.get("image", {})), @@ -390,7 +390,7 @@ def format_deployment_status(d: Deployment) -> str: for replica_id, replica in group: events_status += f" {replica_id}: " if replica.volume_preload_status and not replica.volume_preload_completed_at: - events_status += f"Volume Preloading" + events_status += "Volume Preloading" else: events_status += f"{replica.replica_status}" if replica.replica_status == "Running": @@ -525,7 +525,7 @@ def secrets_list( if in_local and in_remote: status = click.style("synced", fg="green") - elif in_local and not in_remote: + elif in_local: status = click.style("local only", fg="yellow") else: status = click.style("remote only", fg="yellow") @@ -537,14 +537,17 @@ def secrets_list( # --- File upload --- +def _validate_source(p: Path) -> None: + if not p.exists(): + raise ValueError(f"Source path does not exist: {p}") + if not p.is_dir(): + raise ValueError(f"Source path must be a directory: {p}") + + async def _create_volume(client: Together, name: str, source: str) -> None: """Create a volume and upload files""" source_path = Path(source) - if not source_path.exists(): - raise ValueError(f"Source path does not exist: {source}") - if not source_path.is_dir(): - raise ValueError(f"Source path must be a directory: {source}") - + _validate_source(source_path) source_prefix = f"{name}/{source_path.name}" click.echo(f"\N{ROCKET} Creating volume '{name}' with source prefix '{source_prefix}'") @@ -573,11 +576,7 @@ async def _create_volume(client: Together, name: str, source: str) -> None: async def _update_volume(client: Together, name: str, source: str) -> None: """Update a volume and re-upload files""" source_path = Path(source) - if not source_path.exists(): - raise ValueError(f"Source path does not exist: {source}") - if not source_path.is_dir(): - raise ValueError(f"Source path must be a directory: {source}") - + validate_source(source_path) try: client.beta.jig.volumes.retrieve(name) except APIStatusError as e: @@ -1032,7 +1031,8 @@ def _track_deployment_progress(deployment_name: str, client: Together) -> dict[s start_time = time.time() printed_states: dict[str, set[str]] = {} # replica_id -> set of printed states - replica_ready_wait_start: dict[str, float] = {} # replica_id -> when we started waiting for ready + # replica_id -> when we started waiting for ready + replica_ready_wait_start: dict[str, float] = {} click.echo("\N{HOURGLASS WITH FLOWING SAND} Deployment in-progress...") From 1ecc5de8cd6954e562142b1e1f34e3ac119f4f3d Mon Sep 17 00:00:00 2001 From: technillogue Date: Wed, 18 Feb 2026 00:15:02 -0500 Subject: [PATCH 17/51] fix lints --- src/together/lib/cli/api/beta/jig/jig.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index e78d0d33..dfd3bdc1 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -7,6 +7,7 @@ import json import time import shlex +import types import shutil import typing import asyncio @@ -132,7 +133,7 @@ def validate(value: Any, value_type: type, path: str = "") -> str | None: return err return None - if origin is Union: + if origin is Union or origin is types.UnionType: if value is None or any(validate(value, a, path) is None for a in args if a is not type(None)): return None return f"{path}: expected {value_type}, got {type(value).__name__}" @@ -576,7 +577,7 @@ async def _create_volume(client: Together, name: str, source: str) -> None: async def _update_volume(client: Together, name: str, source: str) -> None: """Update a volume and re-upload files""" source_path = Path(source) - validate_source(source_path) + _validate_source(source_path) try: client.beta.jig.volumes.retrieve(name) except APIStatusError as e: @@ -914,7 +915,6 @@ def _get_current_revision_id(d: Deployment) -> str: def _print_replica_failure(event: ReplicaEvents) -> None: - """Print replica failure details.""" if event.replica_status_reason: click.echo(f" Reason: {event.replica_status_reason}") if event.replica_status_message: @@ -922,7 +922,6 @@ def _print_replica_failure(event: ReplicaEvents) -> None: def _fetch_and_print_logs(client: Together, deployment_name: str, replica_id: str) -> None: - """Fetch and print logs for a specific replica.""" click.echo(f"\n--- Logs for {replica_id} ---") try: if lines := client.beta.jig.retrieve_logs(deployment_name, replica_id=replica_id).lines: From 7fcf252a10e48d1933808170b7775b4edbd2d91f Mon Sep 17 00:00:00 2001 From: technillogue Date: Wed, 18 Feb 2026 00:40:44 -0500 Subject: [PATCH 18/51] remove defensive hasattr checks (16338 tokens) --- src/together/lib/cli/api/beta/jig/jig.py | 52 +++++++++--------------- 1 file changed, 19 insertions(+), 33 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index dfd3bdc1..690511a5 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -133,7 +133,7 @@ def validate(value: Any, value_type: type, path: str = "") -> str | None: return err return None - if origin is Union or origin is types.UnionType: + if origin is Union or origin is getattr(types, "UnionType", None): if value is None or any(validate(value, a, path) is None for a in args if a is not type(None)): return None return f"{path}: expected {value_type}, got {type(value).__name__}" @@ -420,23 +420,15 @@ def _set_secret( try: client.beta.jig.secrets.retrieve(deployment_secret_name) client.beta.jig.secrets.update( - deployment_secret_name, - name=deployment_secret_name, - description=description, - value=value, + deployment_secret_name, name=deployment_secret_name, description=description, value=value ) click.echo(f"\N{CHECK MARK} Updated secret: '{name}'") except APIStatusError as e: - if hasattr(e, "status_code") and e.status_code == 404: - click.echo("\N{ROCKET} Creating new secret") - client.beta.jig.secrets.create( - name=deployment_secret_name, - value=value, - description=description, - ) - click.echo(f"\N{CHECK MARK} Created secret: {name}") - else: + if e.status_code != 404: raise + click.echo("\N{ROCKET} Creating new secret") + client.beta.jig.secrets.create(name=deployment_secret_name, value=value, description=description) + click.echo(f"\N{CHECK MARK} Created secret: {name}") state.secrets[name] = deployment_secret_name state.save() @@ -581,7 +573,7 @@ async def _update_volume(client: Together, name: str, source: str) -> None: try: client.beta.jig.volumes.retrieve(name) except APIStatusError as e: - if hasattr(e, "status_code") and e.status_code == 404: + if e.status_code == 404: raise ValueError(f"Volume '{name}' does not exist") from e raise @@ -591,10 +583,7 @@ async def _update_volume(client: Together, name: str, source: str) -> None: await Uploader(client).upload_files(source_path, volume_name=name) click.echo(f"\N{INFORMATION SOURCE} Updating volume '{name}' with source prefix '{source_prefix}'") - client.beta.jig.volumes.update( - name, - content={"type": "files", "source_prefix": source_prefix}, - ) + client.beta.jig.volumes.update(name, content={"type": "files", "source_prefix": source_prefix}) click.echo("\N{CHECK MARK} Volume updated successfully") @@ -653,10 +642,9 @@ def volumes_delete( client.beta.jig.volumes.delete(name) click.echo(f"\N{CHECK MARK} Deleted volume '{name}'") except APIStatusError as e: - if hasattr(e, "status_code") and e.status_code == 404: - click.echo(f"\N{CROSS MARK} Volume '{name}' not found") - return - raise + if e.status_code != 404: + raise + click.echo(f"\N{CROSS MARK} Volume '{name}' not found") @volumes.command("describe") @@ -674,10 +662,9 @@ def volumes_describe( response = client.beta.jig.volumes.with_raw_response.retrieve(name) click.echo(json.dumps(response.json(), indent=2)) except APIStatusError as e: - if hasattr(e, "status_code") and e.status_code == 404: - click.echo(f"\N{CROSS MARK} Volume '{name}' not found") - return - raise + if e.status_code != 404: + raise + click.echo(f"\N{CROSS MARK} Volume '{name}' not found") @volumes.command("list") @@ -769,7 +756,7 @@ def _generate_dockerfile(config: Config) -> str: def _get_files_to_copy(config: Config) -> list[str]: - """Get list of files to copy""" + """Combine explicitly copied files with git files if requested and valid""" files = set(config.image.copy) if config.image.auto_include_git: try: @@ -1350,12 +1337,11 @@ def handle_create() -> Deployment: response = client.beta.jig.update(config.model_name, **deploy_data) click.echo("\N{CHECK MARK} Applied new deployment configuration") except APIStatusError as e: - if hasattr(e, "status_code") and e.status_code == 404: - old_revision_id = "" - was_scaled_to_zero = False - response = handle_create() - else: + if e.status_code != 404: raise + old_revision_id = "" + was_scaled_to_zero = False + response = handle_create() if detach: return response.model_dump() From 7165ba5c63d93535859d2187bdf53baf7692b859 Mon Sep 17 00:00:00 2001 From: technillogue Date: Wed, 18 Feb 2026 01:11:20 -0500 Subject: [PATCH 19/51] generally use client = ctx.obj.beta.jig (16322 tokens) --- src/together/lib/cli/api/beta/jig/jig.py | 123 ++++++++++------------- 1 file changed, 55 insertions(+), 68 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 690511a5..8f98f60d 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -26,6 +26,7 @@ from together._exceptions import APIStatusError from together.lib.cli.api._utils import handle_api_errors from together.types.beta.deployment import Deployment, ReplicaEvents +from together.resources.beta.jig.jig import JigResource from together.lib.cli.api.beta.jig._uploader import Uploader from together.types.beta.jig.queue_submit_response import QueueSubmitResponse @@ -407,7 +408,7 @@ def format_deployment_status(d: Deployment) -> str: def _set_secret( - client: Together, + client: JigResource, config: Config, state: State, name: str, @@ -418,16 +419,14 @@ def _set_secret( deployment_secret_name = f"{config.model_name}-{name}" try: - client.beta.jig.secrets.retrieve(deployment_secret_name) - client.beta.jig.secrets.update( - deployment_secret_name, name=deployment_secret_name, description=description, value=value - ) + client.secrets.retrieve(deployment_secret_name) + client.secrets.update(deployment_secret_name, name=deployment_secret_name, description=description, value=value) click.echo(f"\N{CHECK MARK} Updated secret: '{name}'") except APIStatusError as e: if e.status_code != 404: raise click.echo("\N{ROCKET} Creating new secret") - client.beta.jig.secrets.create(name=deployment_secret_name, value=value, description=description) + client.secrets.create(name=deployment_secret_name, value=value, description=description) click.echo(f"\N{CHECK MARK} Created secret: {name}") state.secrets[name] = deployment_secret_name @@ -491,7 +490,7 @@ def secrets_list( config_path: str | None, ) -> None: """List all secrets with sync status""" - client: Together = ctx.obj + client: JigResource = ctx.obj.beta.jig config = Config.find(config_path) state = State.load(config._path.parent, config.model_name) @@ -500,7 +499,7 @@ def secrets_list( local_secrets = set(state.secrets.keys()) remote_secrets: set[str] = set() # Get all remote secrets then filter for this deployment - for secret in client.beta.jig.secrets.list().data or []: + for secret in client.secrets.list().data or []: if (name := secret.name) and name.startswith(prefix): # Strip prefix to get local name remote_secrets.add(name.removeprefix(prefix)) @@ -537,7 +536,7 @@ def _validate_source(p: Path) -> None: raise ValueError(f"Source path must be a directory: {p}") -async def _create_volume(client: Together, name: str, source: str) -> None: +async def _create_volume(client: JigResource, name: str, source: str) -> None: """Create a volume and upload files""" source_path = Path(source) _validate_source(source_path) @@ -545,7 +544,7 @@ async def _create_volume(client: Together, name: str, source: str) -> None: click.echo(f"\N{ROCKET} Creating volume '{name}' with source prefix '{source_prefix}'") try: - volume_response = client.beta.jig.volumes.create( + volume_response = client.volumes.create( name=name, type="readOnly", content={"type": "files", "source_prefix": source_prefix}, @@ -555,23 +554,23 @@ async def _create_volume(client: Together, name: str, source: str) -> None: raise RuntimeError(f"Failed to create volume: {e}") from e try: - await Uploader(client).upload_files(source_path, volume_name=name) + await Uploader(client._client).upload_files(source_path, volume_name=name) except Exception as e: click.echo(f"\N{CROSS MARK} Upload failed: {e}") click.echo(f"\N{WASTEBASKET} Cleaning up volume '{name}'") try: - client.beta.jig.volumes.delete(name) + client.volumes.delete(name) except Exception as cleanup_error: click.echo(f"\N{WARNING SIGN} Failed to delete volume: {cleanup_error}") raise -async def _update_volume(client: Together, name: str, source: str) -> None: +async def _update_volume(client: JigResource, name: str, source: str) -> None: """Update a volume and re-upload files""" source_path = Path(source) _validate_source(source_path) try: - client.beta.jig.volumes.retrieve(name) + client.volumes.retrieve(name) except APIStatusError as e: if e.status_code == 404: raise ValueError(f"Volume '{name}' does not exist") from e @@ -580,10 +579,10 @@ async def _update_volume(client: Together, name: str, source: str) -> None: source_prefix = f"{name}/{source_path.name}" click.echo(f"\N{INFORMATION SOURCE} Uploading files for volume '{name}'") - await Uploader(client).upload_files(source_path, volume_name=name) + await Uploader(client._client).upload_files(source_path, volume_name=name) click.echo(f"\N{INFORMATION SOURCE} Updating volume '{name}' with source prefix '{source_prefix}'") - client.beta.jig.volumes.update(name, content={"type": "files", "source_prefix": source_prefix}) + client.volumes.update(name, content={"type": "files", "source_prefix": source_prefix}) click.echo("\N{CHECK MARK} Volume updated successfully") @@ -602,13 +601,9 @@ def volumes(ctx: click.Context) -> None: @click.option("--name", required=True, help="Volume name") @click.option("--source", required=True, help="Source directory path") @handle_api_errors("Volumes") -def volumes_create( - ctx: click.Context, - name: str, - source: str, -) -> None: +def volumes_create(ctx: click.Context, name: str, source: str) -> None: """Create a volume and upload files""" - client: Together = ctx.obj + client: JigResource = ctx.obj.beta.jig asyncio.run(_create_volume(client, name, source)) @@ -617,13 +612,9 @@ def volumes_create( @click.option("--name", required=True, help="Volume name") @click.option("--source", required=True, help="New source directory path") @handle_api_errors("Volumes") -def volumes_update( - ctx: click.Context, - name: str, - source: str, -) -> None: +def volumes_update(ctx: click.Context, name: str, source: str) -> None: """Update a volume and re-upload files""" - client: Together = ctx.obj + client: JigResource = ctx.obj.beta.jig asyncio.run(_update_volume(client, name, source)) @@ -631,15 +622,12 @@ def volumes_update( @click.pass_context @click.option("--name", required=True, help="Volume name") @handle_api_errors("Volumes") -def volumes_delete( - ctx: click.Context, - name: str, -) -> None: +def volumes_delete(ctx: click.Context, name: str) -> None: """Delete a volume""" - client: Together = ctx.obj + client: JigResource = ctx.obj.beta.jig try: - client.beta.jig.volumes.delete(name) + client.volumes.delete(name) click.echo(f"\N{CHECK MARK} Deleted volume '{name}'") except APIStatusError as e: if e.status_code != 404: @@ -656,10 +644,10 @@ def volumes_describe( name: str, ) -> None: """Describe a volume""" - client: Together = ctx.obj + client: JigResource = ctx.obj.beta.jig try: - response = client.beta.jig.volumes.with_raw_response.retrieve(name) + response = client.volumes.with_raw_response.retrieve(name) click.echo(json.dumps(response.json(), indent=2)) except APIStatusError as e: if e.status_code != 404: @@ -672,8 +660,8 @@ def volumes_describe( @handle_api_errors("Volumes") def volumes_list(ctx: click.Context) -> None: """List all volumes""" - client: Together = ctx.obj - response = client.beta.jig.volumes.with_raw_response.list() + client: JigResource = ctx.obj.beta.jig + response = client.volumes.with_raw_response.list() click.echo(json.dumps(response.json(), indent=2)) @@ -908,10 +896,10 @@ def _print_replica_failure(event: ReplicaEvents) -> None: click.echo(f" Message: {event.replica_status_message}") -def _fetch_and_print_logs(client: Together, deployment_name: str, replica_id: str) -> None: +def _fetch_and_print_logs(client: JigResource, deployment_name: str, replica_id: str) -> None: click.echo(f"\n--- Logs for {replica_id} ---") try: - if lines := client.beta.jig.retrieve_logs(deployment_name, replica_id=replica_id).lines: + if lines := client.retrieve_logs(deployment_name, replica_id=replica_id).lines: for line in lines: click.echo(line) else: @@ -935,7 +923,7 @@ def _process_replica_event( states: set[str], replica_ready_wait_start: dict[str, float], ready_timeout: float, - client: Together, + client: JigResource, deployment_name: str, ) -> ReplicaTrackingResult: """Process a single replica event and return the tracking result. @@ -1003,7 +991,7 @@ def _process_replica_event( return ReplicaTrackingResult.CONTINUE -def _track_deployment_progress(deployment_name: str, client: Together) -> dict[str, Any] | None: +def _track_deployment_progress(deployment_name: str, client: JigResource) -> dict[str, Any] | None: """Track deployment progress until ready or failed. Polls deployment status every 3 seconds until: @@ -1024,7 +1012,7 @@ def _track_deployment_progress(deployment_name: str, client: Together) -> dict[s try: while time.time() - start_time < timeout: - deployment = client.beta.jig.retrieve(deployment_name) + deployment = client.retrieve(deployment_name) # Handle scale to zero - no replicas expected if deployment.min_replicas == 0 and deployment.desired_replicas == 0: @@ -1242,10 +1230,10 @@ def deploy( config_path: str | None, ) -> dict[str, Any] | None: """Deploy model""" - client: Together = ctx.obj + client: JigResource = ctx.obj.beta.jig config = Config.find(config_path) state = State.load(config._path.parent, config.model_name) - _ensure_registry_base_path(client, state) + _ensure_registry_base_path(ctx.obj, state) if existing_image: deployment_image = existing_image @@ -1288,10 +1276,10 @@ def deploy( deploy_data["command"] = config.deploy.command env_vars = [{"name": k, "value": v} for k, v in config.deploy.environment_variables.items()] - env_vars.append({"name": "TOGETHER_API_BASE_URL", "value": _get_api_base_url(client)}) + env_vars.append({"name": "TOGETHER_API_BASE_URL", "value": _get_api_base_url(ctx.obj)}) # refactor if "TOGETHER_API_KEY" not in state.secrets: - _set_secret(client, config, state, "TOGETHER_API_KEY", client.api_key, "Auth key for queue API") + _set_secret(client, config, state, "TOGETHER_API_KEY", ctx.obj.api_key, "Auth key for queue API") for name, secret_id in state.secrets.items(): env_vars.append({"name": name, "value_from_secret": secret_id}) @@ -1305,7 +1293,7 @@ def deploy( def handle_create() -> Deployment: click.echo("\N{ROCKET} Creating new deployment") try: - response = client.beta.jig.deploy(**deploy_data) + response = client.deploy(**deploy_data) click.echo(f"\N{CHECK MARK} Deployed: {config.model_name}") return response except APIStatusError as e: @@ -1331,10 +1319,10 @@ def handle_create() -> Deployment: raise try: - existing = client.beta.jig.retrieve(config.model_name) + existing = client.retrieve(config.model_name) old_revision_id = _get_current_revision_id(existing) was_scaled_to_zero = existing.ready_replicas == 0 - response = client.beta.jig.update(config.model_name, **deploy_data) + response = client.update(config.model_name, **deploy_data) click.echo("\N{CHECK MARK} Applied new deployment configuration") except APIStatusError as e: if e.status_code != 404: @@ -1359,9 +1347,9 @@ def handle_create() -> Deployment: @click.option("--json", "json_output", is_flag=True, help="Output raw JSON") def status(ctx: click.Context, config_path: str | None, json_output: bool = False) -> None: """Get deployment status""" - client: Together = ctx.obj + client: JigResource = ctx.obj.beta.jig config = Config.find(config_path) - response = client.beta.jig.retrieve(config.model_name) + response = client.retrieve(config.model_name) if json_output: click.echo(response.model_dump_json(indent=2)) @@ -1373,19 +1361,18 @@ def status(ctx: click.Context, config_path: str | None, json_output: bool = Fals def endpoint(ctx: click.Context, config_path: str | None) -> None: """Get deployment endpoint URL""" client: Together = ctx.obj - config = Config.find(config_path) - click.echo(f"{_get_api_base_url(client)}/v1/deployment-request/{config.model_name}") + click.echo(f"{_get_api_base_url(client)}/v1/deployment-request/{Config.find(config_path).model_name}") @jig_command @click.option("--follow", is_flag=True, help="Follow log output") def logs(ctx: click.Context, follow: bool, config_path: str | None) -> None: """Get deployment logs""" - client: Together = ctx.obj + client: JigResource = ctx.obj.beta.jig config = Config.find(config_path) if not follow: - if lines := client.beta.jig.retrieve_logs(config.model_name).lines: + if lines := client.retrieve_logs(config.model_name).lines: for line in lines: click.echo(line) else: @@ -1394,7 +1381,7 @@ def logs(ctx: click.Context, follow: bool, config_path: str | None) -> None: # Stream logs using SDK streaming response try: - with client.beta.jig.with_streaming_response.retrieve_logs(config.model_name) as stream: + with client.with_streaming_response.retrieve_logs(config.model_name) as stream: for line in stream.iter_lines(): if line: for log_line in json.loads(line).get("lines", []): @@ -1408,9 +1395,9 @@ def logs(ctx: click.Context, follow: bool, config_path: str | None) -> None: @jig_command def destroy(ctx: click.Context, config_path: str | None) -> None: """Destroy deployment""" - client: Together = ctx.obj + client: JigResource = ctx.obj.beta.jig config = Config.find(config_path) - client.beta.jig.destroy(config.model_name) + client.destroy(config.model_name) click.echo(f"\N{WASTEBASKET} Destroyed {config.model_name}") @@ -1426,13 +1413,13 @@ def submit( config_path: str | None, ) -> None: """Submit a job to the deployment""" - client: Together = ctx.obj + client: JigResource = ctx.obj.beta.jig config = Config.find(config_path) if not prompt and not payload: raise click.UsageError("Either --prompt or --payload required") - raw_response = client.beta.jig.queue.with_raw_response.submit( + raw_response = client.queue.with_raw_response.submit( model=config.model_name, payload=json.loads(payload) if payload else {"prompt": prompt}, priority=1, @@ -1452,7 +1439,7 @@ def submit( last_status: str | None = None while True: try: - response = client.beta.jig.queue.retrieve( + response = client.queue.retrieve( model=config.model_name, request_id=submit_response.request_id, ) @@ -1477,10 +1464,10 @@ def submit( @click.option("--request-id", required=True, help="Job request ID") def job_status(ctx: click.Context, request_id: str, config_path: str | None) -> None: """Get status of a specific job""" - client: Together = ctx.obj + client: JigResource = ctx.obj.beta.jig config = Config.find(config_path) - response = client.beta.jig.queue.retrieve( + response = client.queue.retrieve( model=config.model_name, request_id=request_id, ) @@ -1490,10 +1477,10 @@ def job_status(ctx: click.Context, request_id: str, config_path: str | None) -> @jig_command def queue_status(ctx: click.Context, config_path: str | None) -> None: """Get queue metrics for the deployment""" - client: Together = ctx.obj + client: JigResource = ctx.obj.beta.jig config = Config.find(config_path) - response = client.beta.jig.queue.with_raw_response.metrics(model=config.model_name) + response = client.queue.with_raw_response.metrics(model=config.model_name) click.echo(json.dumps(response.json(), indent=2)) @@ -1502,6 +1489,6 @@ def queue_status(ctx: click.Context, config_path: str | None) -> None: @click.pass_context def list_deployments(ctx: click.Context) -> None: """List all deployments""" - client: Together = ctx.obj - response = client.beta.jig.with_raw_response.list() + client: JigResource = ctx.obj.beta.jig + response = client.with_raw_response.list() click.echo(json.dumps(response.json(), indent=2)) From f6f61cbf421f556a5f4b8f45303ba3b43a38e82c Mon Sep 17 00:00:00 2001 From: technillogue Date: Wed, 18 Feb 2026 01:24:58 -0500 Subject: [PATCH 20/51] break out is_not_unique_error and inline handle_createin deploy (16300 tokens) --- src/together/lib/cli/api/beta/jig/jig.py | 56 ++++++++++++------------ 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 8f98f60d..77b7016d 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -1203,6 +1203,23 @@ def push(ctx: click.Context, tag: str, config_path: str | None) -> None: click.echo("\N{CHECK MARK} Pushed") +def _is_not_unique_error(e: APIStatusError) -> bool: + # all errors: + # "min replicas cannot be greater than max replicas" + # "storage cannot be more than %d GB" + # "user does not have access to the specified image" + # "invalid mount_path: %s" + # "only one readOnly volume is allowed per deployment" + # "volume not found" + # gorm tx.Create(...).Save() err (internal server error?) + # "failed to add deployment reference" (failed to add deployment reference to secret or "Failed to delete secret metadata from database",) + # "failed to delete secret" ("Failed to delete secret metadata from database" in logs) + # "failed to delete deployment from kubernetes: %w" + # errors for toKubernetesEnvironmentVariables, toKubernetesVolumeMounts, getCustomScalers, ReconcileWithKubernetes + error_message = error_body.get("error", "") if isinstance(e.body, dict) else "" + return "already exists" in error_message or "must be unique" in error_message + + @jig_command @click.option("--tag", default="latest", help="Image tag") @click.option("--build-only", is_flag=True, help="Build and push only") @@ -1290,34 +1307,6 @@ def deploy( click.echo(json.dumps(deploy_data, indent=2)) click.echo(f"Deploying model: {config.model_name}") - def handle_create() -> Deployment: - click.echo("\N{ROCKET} Creating new deployment") - try: - response = client.deploy(**deploy_data) - click.echo(f"\N{CHECK MARK} Deployed: {config.model_name}") - return response - except APIStatusError as e: - # all errors: - # "min replicas cannot be greater than max replicas" - # "storage cannot be more than %d GB" - # "user does not have access to the specified image" - # "invalid mount_path: %s" - # "only one readOnly volume is allowed per deployment" - # "volume not found" - # gorm tx.Create(...).Save() err (internal server error?) - # "failed to add deployment reference" (failed to add deployment reference to secret or "Failed to delete secret metadata from database",) - # "failed to delete secret" ("Failed to delete secret metadata from database" in logs) - # "failed to delete deployment from kubernetes: %w" - # errors for toKubernetesEnvironmentVariables, toKubernetesVolumeMounts, getCustomScalers, ReconcileWithKubernetes - error_body: Any = getattr(e, "body", None) - error_message = ( # pyright: ignore - error_body.get("error", "") if isinstance(error_body, dict) else "" # pyright: ignore - ) - if "already exists" in error_message or "must be unique" in error_message: - raise RuntimeError(f"Deployment name must be unique. Tip: {config._unique_name_tip}") from None - # TODO: helpful tips for more error cases - raise - try: existing = client.retrieve(config.model_name) old_revision_id = _get_current_revision_id(existing) @@ -1329,7 +1318,16 @@ def handle_create() -> Deployment: raise old_revision_id = "" was_scaled_to_zero = False - response = handle_create() + click.echo("\N{ROCKET} Creating new deployment") + try: + response = client.deploy(**deploy_data) + click.echo(f"\N{CHECK MARK} Deployed: {config.model_name}") + return response + except APIStatusError as e: + if _is_not_unique_error(e): + raise RuntimeError(f"Deployment name must be unique. Tip: {config._unique_name_tip}") from None + # TODO: helpful tips for more error cases + raise if detach: return response.model_dump() From 1b691d058830fbfcfa909f694c1d1a6ed85fe528 Mon Sep 17 00:00:00 2001 From: technillogue Date: Wed, 18 Feb 2026 01:31:38 -0500 Subject: [PATCH 21/51] don't set base url if it's the same as default (16312 tokens) --- src/together/lib/cli/api/beta/jig/jig.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 77b7016d..2cdf492a 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -1292,8 +1292,10 @@ def deploy( if config.deploy.command: deploy_data["command"] = config.deploy.command + if (base_url := _get_api_base_url(ctx.obj)) != "https://api.together.ai": + config.deploy.environment_variables["TOGETHER_API_BASE_URL"] = base_url + env_vars = [{"name": k, "value": v} for k, v in config.deploy.environment_variables.items()] - env_vars.append({"name": "TOGETHER_API_BASE_URL", "value": _get_api_base_url(ctx.obj)}) # refactor if "TOGETHER_API_KEY" not in state.secrets: _set_secret(client, config, state, "TOGETHER_API_KEY", ctx.obj.api_key, "Auth key for queue API") From f2a276a95c039f40e4007b9382af8b2a70f18482 Mon Sep 17 00:00:00 2001 From: technillogue Date: Wed, 18 Feb 2026 01:59:59 -0500 Subject: [PATCH 22/51] no intermediate variables for building jig status message, use Path consistently, use defaultdict for printed states (16236 tokens) --- src/together/lib/cli/api/beta/jig/jig.py | 38 ++++++++++-------------- 1 file changed, 15 insertions(+), 23 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 2cdf492a..5e0d7339 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -17,6 +17,7 @@ from pathlib import Path from datetime import datetime from itertools import groupby +from collections import defaultdict from dataclasses import field, asdict, dataclass, is_dataclass from urllib.parse import urlparse @@ -337,21 +338,18 @@ def format_deployment_status(d: Deployment) -> str: ) if d.autoscaling: - autoscaling_status = ( + status += ( f"\n Autoscaling: {d.autoscaling.get('metric', 'N/A')} {d.autoscaling.get('target', 'N/A')}(target)\n" ) - status += autoscaling_status - replica_status = ( + status += ( "\n" f" Replicas:\n" f" {'Min/Max':<16}: {d.min_replicas}/{d.max_replicas}\n" f" {'Ready/Desired':<16}: {d.ready_replicas}/{d.desired_replicas}\n" ) - status += replica_status - - config_status = ( + status += ( f"\nConfiguration:\n" f" Port: {d.port}\n" f" Command: {d.command}\n" @@ -361,26 +359,24 @@ def format_deployment_status(d: Deployment) -> str: ) if d.gpu_count and d.gpu_type: - config_status += f" GPU: {d.gpu_count}x {d.gpu_type}\n" + status += f" GPU: {d.gpu_count}x {d.gpu_type}\n" if d.volumes: - config_status += f"\n Volumes:\n {'NAME':<28} MOUNT_PATH\n" + status += f"\n Volumes:\n {'NAME':<28} MOUNT_PATH\n" for vol in d.volumes: - config_status += f" {vol.name:<28} {vol.mount_path}\n" + status += f" {vol.name:<28} {vol.mount_path}\n" if d.environment_variables: secrets = [env for env in d.environment_variables if env.value_from_secret] env_vars = [env for env in d.environment_variables if not env.value_from_secret] if secrets: - config_status += f"\n Secrets: {[secret.name for secret in secrets]}\n" + status += f"\n Secrets: {[secret.name for secret in secrets]}\n" if env_vars: - config_status += f"\n Environment Variables:\n {'NAME':<40} VALUE\n" + status += f"\n Environment Variables:\n {'NAME':<40} VALUE\n" for env in env_vars: - config_status += f" {env.name:<40} {env.value}\n" - - status += config_status + status += f" {env.name:<40} {env.value}\n" if d.replica_events: for replica in d.replica_events.values(): @@ -829,7 +825,7 @@ def _build_warm_image(base_image: str) -> None: The cache directory is mounted at /app/torch_cache and the user's code should set the appropriate env var (TORCHINDUCTOR_CACHE_DIR, TKCC_OUTPUT_DIR, etc.) to point there. """ - cache_dir = Path(".") / WARMUP_DEST + cache_dir = Path(WARMUP_DEST) # Clean any existing cache try: shutil.rmtree(cache_dir) @@ -844,7 +840,7 @@ def _build_warm_image(base_image: str) -> None: # Mount cache dir for compile artifacts cmd = ["docker", "run", "--rm", "--gpus", "all", "-e", "RUN_AND_EXIT=1"] cmd.extend(["-e", f"{WARMUP_ENV_NAME}=/app/{WARMUP_DEST}"]) - cmd.extend(["-v", f"{Path.cwd().absolute()}:/app"]) + cmd.extend(["-v", f"{Path.cwd()}:/app"]) # if MODEL_PRELOAD_PATH is set, also mount that (e.g. ~/.cache/huggingface) if weights_path := os.getenv("MODEL_PRELOAD_PATH"): cmd.extend(["-v", f"{weights_path}:{weights_path}"]) @@ -1004,7 +1000,7 @@ def _track_deployment_progress(deployment_name: str, client: JigResource) -> dic ready_timeout = 120 # 2 minutes for Running without ready_since start_time = time.time() - printed_states: dict[str, set[str]] = {} # replica_id -> set of printed states + printed_states: dict[str, set[str]] = defaultdict(set) # replica_id -> set of printed states # replica_id -> when we started waiting for ready replica_ready_wait_start: dict[str, float] = {} @@ -1039,9 +1035,6 @@ def _track_deployment_progress(deployment_name: str, client: JigResource) -> dic continue for replica_id, event in relevant_replicas.items(): - if replica_id not in printed_states: - printed_states[replica_id] = set() - result = _process_replica_event( replica_id=replica_id, event=event, @@ -1216,8 +1209,8 @@ def _is_not_unique_error(e: APIStatusError) -> bool: # "failed to delete secret" ("Failed to delete secret metadata from database" in logs) # "failed to delete deployment from kubernetes: %w" # errors for toKubernetesEnvironmentVariables, toKubernetesVolumeMounts, getCustomScalers, ReconcileWithKubernetes - error_message = error_body.get("error", "") if isinstance(e.body, dict) else "" - return "already exists" in error_message or "must be unique" in error_message + msg = e.body.get("error", "") if isinstance(e.body, dict) else "" # type: ignore + return "already exists" in msg @jig_command @@ -1324,7 +1317,6 @@ def deploy( try: response = client.deploy(**deploy_data) click.echo(f"\N{CHECK MARK} Deployed: {config.model_name}") - return response except APIStatusError as e: if _is_not_unique_error(e): raise RuntimeError(f"Deployment name must be unique. Tip: {config._unique_name_tip}") from None From 4fa75b226599ef71ba0e4eb3ee1d474741a0bbf7 Mon Sep 17 00:00:00 2001 From: technillogue Date: Wed, 18 Feb 2026 02:29:37 -0500 Subject: [PATCH 23/51] mostly use path.read_text/write_text instead of open (16226 tokens) --- src/together/lib/cli/api/beta/jig/jig.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 5e0d7339..691e6f04 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -176,13 +176,13 @@ def find(cls, config_path: str | None = None, init: bool = False) -> Config: if not found_path.exists(): click.echo(f"ERROR: Configuration file not found: {config_path}", err=True) sys.exit(1) - return cls.load(tomllib.load(found_path.open("rb")), found_path) + return cls.load(tomllib.loads(found_path.read_text()), found_path) if (jigfile := Path("jig.toml")).exists(): - return cls.load(tomllib.load(jigfile.open("rb")), jigfile) + return cls.load(tomllib.loads(jigfile.read_text()), jigfile) if (pyproject_path := Path("pyproject.toml")).exists(): - data = tomllib.load(pyproject_path.open("rb")) + data = tomllib.loads(pyproject_path.read_text()) if "tool" in data and "jig" in data["tool"]: return cls.load(data, pyproject_path) @@ -1106,8 +1106,7 @@ def init() -> None: gpu_type = "h100-80gb" gpu_count = 1 """ - with open(pyproject, "w") as f: - f.write(content) + pyproject.write_text(content) click.echo("\N{CHECK MARK} Created pyproject.toml") click.echo(" Edit the configuration and run 'jig deploy'") @@ -1209,7 +1208,7 @@ def _is_not_unique_error(e: APIStatusError) -> bool: # "failed to delete secret" ("Failed to delete secret metadata from database" in logs) # "failed to delete deployment from kubernetes: %w" # errors for toKubernetesEnvironmentVariables, toKubernetesVolumeMounts, getCustomScalers, ReconcileWithKubernetes - msg = e.body.get("error", "") if isinstance(e.body, dict) else "" # type: ignore + msg = e.body.get("error", "") if isinstance(e.body, dict) else "" # type: ignore return "already exists" in msg From 5113df80479273d07646999e21d1125222044068 Mon Sep 17 00:00:00 2001 From: technillogue Date: Wed, 18 Feb 2026 03:29:54 -0500 Subject: [PATCH 24/51] pass cache dockerfile and api key to docker via stdin (16210 tokens) --- src/together/lib/cli/api/beta/jig/jig.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 691e6f04..f0e6e0fa 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -676,6 +676,11 @@ def _run(cmd: list[str]) -> subprocess.CompletedProcess[str]: return subprocess.run(cmd, capture_output=True, text=True, check=True) +def _run_input(cmd: list[str], input: str) -> subprocess.CompletedProcess[str]: + """Run process with input""" + return subprocess.run(cmd, input=input, text=True) + + def _generate_dockerfile(config: Config) -> str: """Generate Dockerfile from config""" apt = "" @@ -860,20 +865,15 @@ def _build_warm_image(base_image: str) -> None: click.echo(f"\N{CHECK MARK} Warmup complete, {len(cache_files)} cache files generated") # Generate cache dockerfile - copy cache to same location used during warmup - cache_dockerfile = Path("Dockerfile.cache") - dockerfile_content = f"""FROM {base_image} + final_dockerfile = f"""FROM {base_image} COPY {cache_dir.name} /app/{WARMUP_DEST} ENV {WARMUP_ENV_NAME}=/app/{WARMUP_DEST}""" - cache_dockerfile.write_text(dockerfile_content) click.echo("\N{PACKAGE} Building final image with cache...") - final_cmd = ["docker", "build", "--platform", "linux/amd64", "-t", base_image] - final_cmd.extend(["-f", str(cache_dockerfile), "."]) + final_cmd = ["docker", "build", "--platform", "linux/amd64", "-t", base_image, "-f", "-", "."] - if subprocess.run(final_cmd).returncode != 0: - cache_dockerfile.unlink(missing_ok=True) + if _run_input(final_cmd, input=final_dockerfile).returncode != 0: raise RuntimeError("Cache image build failed") - cache_dockerfile.unlink(missing_ok=True) click.echo("\N{CHECK MARK} Final image with cache built") @@ -1185,8 +1185,8 @@ def push(ctx: click.Context, tag: str, config_path: str | None) -> None: image = _get_image(state, config, tag) registry = state.registry_base_path.split("/")[0] - login_cmd = f"echo {client.api_key} | docker login {registry} --username user --password-stdin" - if subprocess.run(login_cmd, shell=True, capture_output=True).returncode != 0: + login_cmd = ["docker", "login", registry, "--username", "user", "--password-stdin"] + if _run_input(login_cmd, input=client.api_key).returncode != 0: raise RuntimeError("Registry login failed") click.echo(f"Pushing {image}") From 91810001307541a70d7e1da8eb0ac31380c33745 Mon Sep 17 00:00:00 2001 From: technillogue Date: Wed, 18 Feb 2026 04:06:41 -0500 Subject: [PATCH 25/51] refactor into tracker class (16153 tokens) --- src/together/lib/cli/api/beta/jig/jig.py | 259 +++++++++++------------ 1 file changed, 123 insertions(+), 136 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index f0e6e0fa..b9da39aa 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -913,156 +913,143 @@ class ReplicaTrackingResult(str, Enum): FAILURE = "failure" -def _process_replica_event( - replica_id: str, - event: ReplicaEvents, - states: set[str], - replica_ready_wait_start: dict[str, float], - ready_timeout: float, - client: JigResource, - deployment_name: str, -) -> ReplicaTrackingResult: - """Process a single replica event and return the tracking result. +@dataclass +class Tracker: + client: JigResource + deployment_name: str - Updates `states` and `replica_ready_wait_start` as side effects. - """ - volume_done = not event.volume_preload_status or bool(event.volume_preload_completed_at) - - # Track volume preload progress - if event.volume_preload_status: - if "volume_preload_started" not in states: - click.echo(f"\N{PACKAGE} [{replica_id}] Preloading volume contents...") - states.add("volume_preload_started") - elif volume_done and "volume_preload_completed" not in states: - click.echo( - f"\N{CHECK MARK} [{replica_id}] Successfully preloaded volume contents. " - "Attaching the volume to the container..." - ) - states.add("volume_preload_completed") + poll_interval: int = 3 # seconds + timeout: int = 600 # 10 minutes + ready_timeout: int = 120 # 2 minutes for Running without ready_since - # Skip terminated replicas - if event.replica_status == "Terminated": - return ReplicaTrackingResult.CONTINUE + # replica_id -> set of printed states + printed_states: dict[str, set[str]] = field(default_factory=lambda: defaultdict(set)) + # replica_id -> when we started waiting for ready + replica_wait_start: dict[str, float] = field(default_factory=lambda: defaultdict(time.time)) - # Check if ready - SUCCESS - if event.replica_status == "Running" and event.replica_ready_since: - click.echo(f"\N{CHECK MARK} [{replica_id}] Container is running and ready") - click.echo("\N{ROCKET} Deployment successful!") - click.echo("Note: Additional replicas may still be scaling up.") - return ReplicaTrackingResult.SUCCESS - - # Check for CrashLoopBackOff - if event.replica_status_reason == "CrashLoopBackOff": - click.echo(f"\N{CROSS MARK} [{replica_id}] Container is crash looping") - _print_replica_failure(event) - _fetch_and_print_logs(client, deployment_name, replica_id) - return ReplicaTrackingResult.FAILURE - - # Check for stuck in Running state without becoming ready - if event.replica_status == "Running" and volume_done: - # If wait start time is not set, set it to now - wait_start = replica_ready_wait_start.setdefault(replica_id, time.time()) - if time.time() - wait_start > ready_timeout: - click.echo( - f"\N{CROSS MARK} [{replica_id}] Container is running but " - f"not ready to serve requests after {ready_timeout} seconds" - ) - _print_replica_failure(event) - _fetch_and_print_logs(client, deployment_name, replica_id) - click.echo(f"Deployment '{deployment_name}' may still be in progress.") - return ReplicaTrackingResult.FAILURE + def track_deployment_progress(self) -> dict[str, Any] | None: + """Track deployment progress until ready or failed. - # Print status updates deduplicated by status + reason - # Skip all status updates while volume preload is in progress - if volume_done and event.replica_status_reason: - status_key = f"{event.replica_status}_{event.replica_status_reason}" - if status_key not in states: - states.add(status_key) - click.echo( - f"\N{HOURGLASS WITH FLOWING SAND} [{replica_id}] {event.replica_status}: {event.replica_status_reason}" - ) - if event.replica_status_message: - click.echo(f" {event.replica_status_message}") + Polls deployment status every 3 seconds until: + - Success: At least one replica with the latest revision has replica_ready_since set + - Failure: CrashLoopBackOff or Running without ready_since for > 2 minute + - Timeout: 10 minutes elapsed + """ + start_time = time.time() - return ReplicaTrackingResult.CONTINUE + click.echo("\N{HOURGLASS WITH FLOWING SAND} Deployment in-progress...") + try: + while time.time() - start_time < self.timeout: + deployment = self.client.retrieve(self.deployment_name) -def _track_deployment_progress(deployment_name: str, client: JigResource) -> dict[str, Any] | None: - """Track deployment progress until ready or failed. + # Handle scale to zero - no replicas expected + if deployment.min_replicas == 0 and deployment.desired_replicas == 0: + if str(deployment.status) == "ScaledToZero": + click.echo("\N{CHECK MARK} Deployment scaled to zero replicas") + return None + # Not yet scaled to zero, wait and retry + time.sleep(self.poll_interval) + continue - Polls deployment status every 3 seconds until: - - Success: At least one replica with the latest revision has replica_ready_since set - - Failure: CrashLoopBackOff or Running without ready_since for > 2 minute - - Timeout: 10 minutes elapsed - """ - poll_interval = 3 # seconds - timeout = 600 # 10 minutes - ready_timeout = 120 # 2 minutes for Running without ready_since + current_revision_id = _get_current_revision_id(deployment) - start_time = time.time() - printed_states: dict[str, set[str]] = defaultdict(set) # replica_id -> set of printed states - # replica_id -> when we started waiting for ready - replica_ready_wait_start: dict[str, float] = {} + replica_events = deployment.replica_events or {} - click.echo("\N{HOURGLASS WITH FLOWING SAND} Deployment in-progress...") + # Filter to replicas with matching revision + relevant_replicas = { + replica_id: event + for replica_id, event in replica_events.items() + if event.revision_id == current_revision_id + } - try: - while time.time() - start_time < timeout: - deployment = client.retrieve(deployment_name) - - # Handle scale to zero - no replicas expected - if deployment.min_replicas == 0 and deployment.desired_replicas == 0: - if str(deployment.status) == "ScaledToZero": - click.echo("\N{CHECK MARK} Deployment scaled to zero replicas") - return None - # Not yet scaled to zero, wait and retry - time.sleep(poll_interval) - continue - - current_revision_id = _get_current_revision_id(deployment) - - replica_events = deployment.replica_events or {} - - # Filter to replicas with matching revision - relevant_replicas = { - replica_id: event - for replica_id, event in replica_events.items() - if event.revision_id == current_revision_id - } - - if not relevant_replicas: - time.sleep(poll_interval) - continue - - for replica_id, event in relevant_replicas.items(): - result = _process_replica_event( - replica_id=replica_id, - event=event, - states=printed_states[replica_id], - replica_ready_wait_start=replica_ready_wait_start, - ready_timeout=ready_timeout, - client=client, - deployment_name=deployment_name, - ) + if not relevant_replicas: + time.sleep(self.poll_interval) + continue - if result == ReplicaTrackingResult.SUCCESS: - return None - if result == ReplicaTrackingResult.FAILURE: - raise SystemExit(1) + for replica_id, event in relevant_replicas.items(): + result = self.process_replica_event(replica_id=replica_id, event=event) - time.sleep(poll_interval) + if result == ReplicaTrackingResult.SUCCESS: + return None + if result == ReplicaTrackingResult.FAILURE: + raise SystemExit(1) - # Timeout reached - click.echo("\N{CROSS MARK} Deployment tracking timed out after 10 minutes") - click.echo(f"Deployment '{deployment_name}' may still be in progress.") - click.echo("Run 'jig status' to check current state.") - raise SystemExit(1) + time.sleep(self.poll_interval) - except KeyboardInterrupt: - click.echo("\n\N{WARNING SIGN} Deployment tracking interrupted") - click.echo(f"Deployment '{deployment_name}' may still be in progress.") - click.echo("Run 'jig status' to check current state.") - raise SystemExit(130) from None + # Timeout reached + click.echo("\N{CROSS MARK} Deployment tracking timed out after 10 minutes") + click.echo(f"Deployment '{self.deployment_name}' may still be in progress.") + click.echo("Run 'jig status' to check current state.") + raise SystemExit(1) + + except KeyboardInterrupt: + click.echo("\n\N{WARNING SIGN} Deployment tracking interrupted") + click.echo(f"Deployment '{self.deployment_name}' may still be in progress.") + click.echo("Run 'jig status' to check current state.") + raise SystemExit(130) from None + + def process_replica_event(self, replica_id: str, event: ReplicaEvents) -> ReplicaTrackingResult: + """Process a single replica event and return the tracking result.""" + states = self.printed_states[replica_id] + + volume_done = not event.volume_preload_status or bool(event.volume_preload_completed_at) + # Track volume preload progress + if event.volume_preload_status: + if "volume_preload_started" not in states: + click.echo(f"\N{PACKAGE} [{replica_id}] Preloading volume contents...") + states.add("volume_preload_started") + elif volume_done and "volume_preload_completed" not in states: + click.echo( + f"\N{CHECK MARK} [{replica_id}] Successfully preloaded volume contents. " + "Attaching the volume to the container..." + ) + states.add("volume_preload_completed") + + # Skip terminated replicas + if event.replica_status == "Terminated": + return ReplicaTrackingResult.CONTINUE + + # Check if ready - SUCCESS + if event.replica_status == "Running" and event.replica_ready_since: + click.echo(f"\N{CHECK MARK} [{replica_id}] Container is running and ready") + click.echo("\N{ROCKET} Deployment successful!") + click.echo("Note: Additional replicas may still be scaling up.") + return ReplicaTrackingResult.SUCCESS + + # Check for CrashLoopBackOff + if event.replica_status_reason == "CrashLoopBackOff": + click.echo(f"\N{CROSS MARK} [{replica_id}] Container is crash looping") + _print_replica_failure(event) + _fetch_and_print_logs(self.client, self.deployment_name, replica_id) + return ReplicaTrackingResult.FAILURE + + # Check for stuck in Running state without becoming ready + if event.replica_status == "Running" and volume_done: + # replica_wait_start will default to time.time() + if time.time() - self.replica_wait_start[replica_id] > self.ready_timeout: + click.echo( + f"\N{CROSS MARK} [{replica_id}] Container is running but " + f"not ready to serve requests after {self.ready_timeout} seconds" + ) + _print_replica_failure(event) + _fetch_and_print_logs(self.client, self.deployment_name, replica_id) + click.echo(f"Deployment '{self.deployment_name}' may still be in progress.") + return ReplicaTrackingResult.FAILURE + + # Print status updates deduplicated by status + reason + # Skip all status updates while volume preload is in progress + if volume_done and event.replica_status_reason: + status_key = f"{event.replica_status}_{event.replica_status_reason}" + if status_key not in states: + states.add(status_key) + click.echo( + f"\N{HOURGLASS WITH FLOWING SAND} [{replica_id}] {event.replica_status}: {event.replica_status_reason}" + ) + if event.replica_status_message: + click.echo(f" {event.replica_status_message}") + + return ReplicaTrackingResult.CONTINUE # --- CLI Commands --- @@ -1331,7 +1318,7 @@ def deploy( if old_revision_id and old_revision_id == new_revision_id and not scaling_up: return None - return _track_deployment_progress(config.model_name, client) + return Tracker(client, config.model_name).track_deployment_progress() @jig_command From cbf29277f05444e99e3a51eb2ed4486d9b8c13a1 Mon Sep 17 00:00:00 2001 From: technillogue Date: Wed, 18 Feb 2026 04:53:10 -0500 Subject: [PATCH 26/51] use some write_text, don't mutate replica.image, use load_config_state helper, merge run_input into run (16130 tokens) --- src/together/lib/cli/api/beta/jig/jig.py | 56 ++++++++++++------------ 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index b9da39aa..0123d14f 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -302,9 +302,7 @@ def save(self) -> None: project_data = {k: v for k, v in asdict(self).items() if not k.startswith("_")} all_data[self._project_name] = project_data - # Save back to file - with open(path, "w") as f: - json.dump(all_data, f, indent=2) + path.write_text(json.dumps(all_data, indent=2)) # == Status prettyprint utils == @@ -379,12 +377,10 @@ def format_deployment_status(d: Deployment) -> str: status += f" {env.name:<40} {env.value}\n" if d.replica_events: - for replica in d.replica_events.values(): - replica.image = replica.image or "-" sorted_replicas = sorted(d.replica_events.items(), key=lambda item: item[1].image or "-", reverse=True) events_status = "\nReplica Events:\n" - for image, group in groupby(sorted_replicas, key=lambda item: item[1].image): - events_status += f"{_image_tag(image)}:\n" + for image, group in groupby(sorted_replicas, key=lambda item: item[1].image or "-"): + events_status += f"{_image_tag(image or '-')}:\n" for replica_id, replica in group: events_status += f" {replica_id}: " if replica.volume_preload_status and not replica.volume_preload_completed_at: @@ -399,6 +395,17 @@ def format_deployment_status(d: Deployment) -> str: return status +# == Shared CLI helpers == + +config_option = click.option("-c", "--config", "config_path", default=None, help="Configuration file path") + + +def _load_config_state(config_path: str | None) -> tuple[Config, State]: + """Load config and state from config_path — shared by commands needing both.""" + config = Config.find(config_path) + return config, State.load(config._path.parent, config.model_name) + + # = Secrets and Volumes subcommands = # == Secrets == @@ -451,9 +458,8 @@ def secrets_set( config_path: str | None, ) -> None: """Set a secret (create or update)""" - config = Config.find(config_path) - state = State.load(config._path.parent, config.model_name) - _set_secret(ctx.obj, config, state, name, value, description) + config, state = _load_config_state(config_path) + _set_secret(ctx.obj.beta.jig, config, state, name, value, description) @secrets.command("unset") @@ -467,8 +473,7 @@ def secrets_unset( config_path: str | None, ) -> None: """Remove a secret from both remote and local state""" - config = Config.find(config_path) - state = State.load(config._path.parent, config.model_name) + config, state = _load_config_state(config_path) if state.secrets.pop(name, ""): state.save() @@ -487,8 +492,7 @@ def secrets_list( ) -> None: """List all secrets with sync status""" client: JigResource = ctx.obj.beta.jig - config = Config.find(config_path) - state = State.load(config._path.parent, config.model_name) + config, state = _load_config_state(config_path) prefix = f"{config.model_name}-" @@ -671,16 +675,13 @@ def _get_api_base_url(client: Together) -> str: return f"{parsed.scheme}://{parsed.netloc}" -def _run(cmd: list[str]) -> subprocess.CompletedProcess[str]: - """Run process with defaults""" +def _run(cmd: list[str], *, input: str | None = None) -> subprocess.CompletedProcess[str]: + """Run subprocess. Captures output unless input is provided.""" + if input is not None: + return subprocess.run(cmd, input=input, text=True) return subprocess.run(cmd, capture_output=True, text=True, check=True) -def _run_input(cmd: list[str], input: str) -> subprocess.CompletedProcess[str]: - """Run process with input""" - return subprocess.run(cmd, input=input, text=True) - - def _generate_dockerfile(config: Config) -> str: """Generate Dockerfile from config""" apt = "" @@ -872,7 +873,7 @@ def _build_warm_image(base_image: str) -> None: click.echo("\N{PACKAGE} Building final image with cache...") final_cmd = ["docker", "build", "--platform", "linux/amd64", "-t", base_image, "-f", "-", "."] - if _run_input(final_cmd, input=final_dockerfile).returncode != 0: + if _run(final_cmd, input=final_dockerfile).returncode != 0: raise RuntimeError("Cache image build failed") click.echo("\N{CHECK MARK} Final image with cache built") @@ -1131,8 +1132,7 @@ def build( ) -> None: """Build container image""" client: Together = ctx.obj - config = Config.find(config_path) - state = State.load(config._path.parent, config.model_name) + config, state = _load_config_state(config_path) _ensure_registry_base_path(client, state) image = _get_image(state, config, tag) @@ -1165,15 +1165,14 @@ def build( def push(ctx: click.Context, tag: str, config_path: str | None) -> None: """Push image to registry""" client: Together = ctx.obj - config = Config.find(config_path) - state = State.load(config._path.parent, config.model_name) + config, state = _load_config_state(config_path) _ensure_registry_base_path(client, state) image = _get_image(state, config, tag) registry = state.registry_base_path.split("/")[0] login_cmd = ["docker", "login", registry, "--username", "user", "--password-stdin"] - if _run_input(login_cmd, input=client.api_key).returncode != 0: + if _run(login_cmd, input=client.api_key).returncode != 0: raise RuntimeError("Registry login failed") click.echo(f"Pushing {image}") @@ -1227,8 +1226,7 @@ def deploy( ) -> dict[str, Any] | None: """Deploy model""" client: JigResource = ctx.obj.beta.jig - config = Config.find(config_path) - state = State.load(config._path.parent, config.model_name) + config, state = _load_config_state(config_path) _ensure_registry_base_path(ctx.obj, state) if existing_image: From f25d3a82d511ff1b9d2454521c3b5f2a9ce12d8c Mon Sep 17 00:00:00 2001 From: technillogue Date: Wed, 18 Feb 2026 05:05:48 -0500 Subject: [PATCH 27/51] inline jig_command decorator and draft _jig_options instead (16314 tokens) --- src/together/lib/cli/api/beta/jig/jig.py | 59 +++++++++++++++++------- 1 file changed, 43 insertions(+), 16 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 0123d14f..5630fb6f 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -13,7 +13,7 @@ import asyncio import subprocess from enum import Enum -from typing import TYPE_CHECKING, Any, Union, Callable +from typing import TYPE_CHECKING, Any, Callable, Union from pathlib import Path from datetime import datetime from itertools import groupby @@ -1056,12 +1056,11 @@ def process_replica_event(self, replica_id: str, event: ReplicaEvents) -> Replic # --- CLI Commands --- -# Shared CLI decorator: pass_context + config option + api error handling -def jig_command(f: Callable[..., Any]) -> Any: - f = click.option("-c", "--config", "config_path", default=None, help="Configuration file path")(f) +def _jig_options(f: Callable[..., Any]) -> Any: + """Bundles @click.pass_context + @handle_api_errors("Jig") + @config_option.""" + f = config_option(f) f = handle_api_errors("Jig")(f) f = click.pass_context(f) - f = click.command()(f) return f @@ -1100,7 +1099,7 @@ def init() -> None: @click.command() -@click.option("-c", "--config", "config_path", default=None, help="Configuration file path") +@config_option @handle_api_errors("Jig") def dockerfile(config_path: str | None) -> None: """Generate Dockerfile""" @@ -1115,7 +1114,8 @@ def dockerfile(config_path: str | None) -> None: ) -@jig_command +@click.command() +@_jig_options @click.option("--tag", default="latest", help="Image tag") @click.option("--warmup", is_flag=True, help="Run warmup to build torch compile cache") @click.option( @@ -1160,7 +1160,10 @@ def build( _build_warm_image(image) -@jig_command +@click.command() +@click.pass_context +@handle_api_errors("Jig") +@config_option @click.option("--tag", default="latest", help="Image tag") def push(ctx: click.Context, tag: str, config_path: str | None) -> None: """Push image to registry""" @@ -1198,7 +1201,10 @@ def _is_not_unique_error(e: APIStatusError) -> bool: return "already exists" in msg -@jig_command +@click.command() +@click.pass_context +@handle_api_errors("Jig") +@config_option @click.option("--tag", default="latest", help="Image tag") @click.option("--build-only", is_flag=True, help="Build and push only") @click.option("--warmup", is_flag=True, help="Run warmup to build torch compile cache") @@ -1319,7 +1325,10 @@ def deploy( return Tracker(client, config.model_name).track_deployment_progress() -@jig_command +@click.command() +@click.pass_context +@handle_api_errors("Jig") +@config_option @click.option("--json", "json_output", is_flag=True, help="Output raw JSON") def status(ctx: click.Context, config_path: str | None, json_output: bool = False) -> None: """Get deployment status""" @@ -1333,14 +1342,20 @@ def status(ctx: click.Context, config_path: str | None, json_output: bool = Fals click.echo(format_deployment_status(response)) -@jig_command +@click.command() +@click.pass_context +@handle_api_errors("Jig") +@config_option def endpoint(ctx: click.Context, config_path: str | None) -> None: """Get deployment endpoint URL""" client: Together = ctx.obj click.echo(f"{_get_api_base_url(client)}/v1/deployment-request/{Config.find(config_path).model_name}") -@jig_command +@click.command() +@click.pass_context +@handle_api_errors("Jig") +@config_option @click.option("--follow", is_flag=True, help="Follow log output") def logs(ctx: click.Context, follow: bool, config_path: str | None) -> None: """Get deployment logs""" @@ -1368,7 +1383,10 @@ def logs(ctx: click.Context, follow: bool, config_path: str | None) -> None: click.echo(f"\nConnection ended: {e}") -@jig_command +@click.command() +@click.pass_context +@handle_api_errors("Jig") +@config_option def destroy(ctx: click.Context, config_path: str | None) -> None: """Destroy deployment""" client: JigResource = ctx.obj.beta.jig @@ -1377,7 +1395,10 @@ def destroy(ctx: click.Context, config_path: str | None) -> None: click.echo(f"\N{WASTEBASKET} Destroyed {config.model_name}") -@jig_command +@click.command() +@click.pass_context +@handle_api_errors("Jig") +@config_option @click.option("--prompt", default=None, help="Job prompt") @click.option("--payload", default=None, help="Job payload JSON") @click.option("--watch", is_flag=True, help="Watch job status until completion") @@ -1436,7 +1457,10 @@ def submit( ctx.exit(130) -@jig_command +@click.command() +@click.pass_context +@handle_api_errors("Jig") +@config_option @click.option("--request-id", required=True, help="Job request ID") def job_status(ctx: click.Context, request_id: str, config_path: str | None) -> None: """Get status of a specific job""" @@ -1450,7 +1474,10 @@ def job_status(ctx: click.Context, request_id: str, config_path: str | None) -> click.echo(response.model_dump_json(indent=2)) -@jig_command +@click.command() +@click.pass_context +@handle_api_errors("Jig") +@config_option def queue_status(ctx: click.Context, config_path: str | None) -> None: """Get queue metrics for the deployment""" client: JigResource = ctx.obj.beta.jig From 3dff28c39a6df034cd2cdb1ee0c9d79452c85589 Mon Sep 17 00:00:00 2001 From: technillogue Date: Wed, 18 Feb 2026 05:33:57 -0500 Subject: [PATCH 28/51] refactor build/push/deploy and query commands into a Jig class holding config/state. use _jig_options decorator (16481 tokens) --- src/together/lib/cli/api/beta/jig/jig.py | 657 +++++++++++------------ 1 file changed, 318 insertions(+), 339 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 5630fb6f..3ae565b1 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -13,7 +13,7 @@ import asyncio import subprocess from enum import Enum -from typing import TYPE_CHECKING, Any, Callable, Union +from typing import TYPE_CHECKING, Any, Union, Callable from pathlib import Path from datetime import datetime from itertools import groupby @@ -473,7 +473,7 @@ def secrets_unset( config_path: str | None, ) -> None: """Remove a secret from both remote and local state""" - config, state = _load_config_state(config_path) + _, state = _load_config_state(config_path) if state.secrets.pop(name, ""): state.save() @@ -790,40 +790,6 @@ def _dockerfile(config: Config) -> bool: return True -def _get_image(state: State, config: Config, tag: str = "latest") -> str: - """Get full image name""" - return f"{state.registry_base_path}/{config.model_name}:{tag}" - - -def _get_image_with_digest(state: State, config: Config, tag: str = "latest") -> str: - """Get full image name tagged with digest""" - image_name = _get_image(state, config, tag) - if tag != "latest": - return image_name - try: - cmd = ["docker", "inspect", "--format={{json .RepoDigests}}", image_name] - if (repo_digests := _run(cmd).stdout.strip()) and repo_digests != "null": - registry = image_name.rsplit("/", 2)[0] - for digest in json.loads(repo_digests): - if digest.startswith(registry): - return str(digest) - except subprocess.CalledProcessError as e: - msg = e.stderr.strip() if e.stderr else "Docker command failed" - raise RuntimeError(f"Failed to get digest for {image_name}: {msg}") from e - raise RuntimeError(f"No registry digest found for {image_name}. Make sure the image was pushed to registry first.") - - -def _ensure_registry_base_path(client: Together, state: State) -> None: - """Ensure registry base path is set in state""" - if not state.registry_base_path: - response = client._client.get("/image-repositories/base-path", headers=client.auth_headers) - response.raise_for_status() - data = response.json() - # Strip protocol prefix - Docker tags don't support URLs - state.registry_base_path = data["base-path"].removeprefix("http://").removeprefix("https://") - state.save() - - def _build_warm_image(base_image: str) -> None: """Run a warmup container to generate a cache, then rebuild with cache baked in. @@ -1053,6 +1019,296 @@ def process_replica_event(self, replica_id: str, event: ReplicaEvents) -> Replic return ReplicaTrackingResult.CONTINUE +# --- Jig class: shared state + operations --- + + +def _is_not_unique_error(e: APIStatusError) -> bool: + # all errors: + # "min replicas cannot be greater than max replicas" + # "storage cannot be more than %d GB" + # "user does not have access to the specified image" + # "invalid mount_path: %s" + # "only one readOnly volume is allowed per deployment" + # "volume not found" + # gorm tx.Create(...).Save() err (internal server error?) + # "failed to add deployment reference" (failed to add deployment reference to secret or "Failed to delete secret metadata from database",) + # "failed to delete secret" ("Failed to delete secret metadata from database" in logs) + # "failed to delete deployment from kubernetes: %w" + # errors for toKubernetesEnvironmentVariables, toKubernetesVolumeMounts, getCustomScalers, ReconcileWithKubernetes + msg = e.body.get("error", "") if isinstance(e.body, dict) else "" # type: ignore + return "already exists" in msg + + +class Jig: + """Holds Together client, config, and state. Methods implement the core jig operations.""" + + def __init__(self, client: Together, config_path: str | None = None) -> None: + self.together = client + self.jig: JigResource = client.beta.jig + self.config = Config.find(config_path) + self.state = State.load(self.config._path.parent, self.config.model_name) + + def _ensure_registry(self) -> None: + """Ensure registry base path is set in state""" + if not self.state.registry_base_path: + response = self.together._client.get("/image-repositories/base-path", headers=self.together.auth_headers) + response.raise_for_status() + data = response.json() + # Strip protocol prefix - Docker tags don't support URLs + self.state.registry_base_path = data["base-path"].removeprefix("http://").removeprefix("https://") + self.state.save() + + def _image(self, tag: str = "latest") -> str: + return f"{self.state.registry_base_path}/{self.config.model_name}:{tag}" + + def _image_with_digest(self, tag: str = "latest") -> str: + if tag != "latest": + return image_name + try: + cmd = ["docker", "inspect", "--format={{json .RepoDigests}}", image_name] + if (repo_digests := _run(cmd).stdout.strip()) and repo_digests != "null": + registry = image_name.rsplit("/", 2)[0] + for digest in json.loads(repo_digests): + if digest.startswith(registry): + return str(digest) + except subprocess.CalledProcessError as e: + msg = e.stderr.strip() if e.stderr else "Docker command failed" + raise RuntimeError(f"Failed to get digest for {image_name}: {msg}") from e + raise RuntimeError( + f"No registry digest found for {image_name}. Make sure the image was pushed to registry first." + ) + + # == Build / Push / Deploy == + + def build(self, tag: str = "latest", warmup: bool = False, docker_args: str | None = None) -> None: + self._ensure_registry() + image = self._image(tag) + + if _dockerfile(self.config): + click.echo("\N{CHECK MARK} Generated Dockerfile") + else: + click.echo(f"\N{INFORMATION SOURCE} Using existing {self.config.dockerfile} (not managed by jig)") + + click.echo(f"Building {image}") + cmd = ["docker", "build", "--platform", "linux/amd64", "-t", image, "."] + if self.config.dockerfile != "Dockerfile": + cmd.extend(["-f", self.config.dockerfile]) + + extra_args = docker_args or os.getenv("DOCKER_BUILD_EXTRA_ARGS", "") + if extra_args: + cmd.extend(shlex.split(extra_args)) + if subprocess.run(cmd).returncode != 0: + raise RuntimeError("Build failed") + + click.echo("\N{CHECK MARK} Built") + + if warmup: + _build_warm_image(image) + + def push(self, tag: str = "latest") -> None: + self._ensure_registry() + image = self._image(tag) + + registry = self.state.registry_base_path.split("/")[0] + login_cmd = ["docker", "login", registry, "--username", "user", "--password-stdin"] + if _run(login_cmd, input=self.together.api_key).returncode != 0: + raise RuntimeError("Registry login failed") + + click.echo(f"Pushing {image}") + if subprocess.run(["docker", "push", image]).returncode != 0: + raise RuntimeError("Push failed") + click.echo("\N{CHECK MARK} Pushed") + + def _build_deploy_data(self, image: str) -> dict[str, Any]: + """Build the deployment API payload.""" + deploy_data: dict[str, Any] = { + "name": self.config.model_name, + "description": self.config.deploy.description, + "image": image, + "min_replicas": self.config.deploy.min_replicas, + "max_replicas": self.config.deploy.max_replicas, + "port": self.config.deploy.port, + "gpu_type": self.config.deploy.gpu_type, + "gpu_count": self.config.deploy.gpu_count, + "cpu": self.config.deploy.cpu, + "memory": self.config.deploy.memory, + "storage": self.config.deploy.storage, + "autoscaling": self.config.deploy.autoscaling, + "termination_grace_period_seconds": self.config.deploy.termination_grace_period_seconds, + "volumes": [asdict(vm) for vm in self.config.deploy.volume_mounts], + } + + if self.config.deploy.health_check_path: + deploy_data["health_check_path"] = self.config.deploy.health_check_path + if self.config.deploy.command: + deploy_data["command"] = self.config.deploy.command + + if (base_url := _get_api_base_url(self.together)) != "https://api.together.ai": + self.config.deploy.environment_variables["TOGETHER_API_BASE_URL"] = base_url + + env_vars = [{"name": k, "value": v} for k, v in self.config.deploy.environment_variables.items()] + + if "TOGETHER_API_KEY" not in self.state.secrets: + _set_secret( + self.jig, self.config, self.state, "TOGETHER_API_KEY", self.together.api_key, "Auth key for queue API" + ) + + for name, secret_id in self.state.secrets.items(): + env_vars.append({"name": name, "value_from_secret": secret_id}) + + deploy_data["environment_variables"] = env_vars + return deploy_data + + def deploy( + self, + tag: str = "latest", + build_only: bool = False, + warmup: bool = False, + detach: bool = False, + docker_args: str | None = None, + existing_image: str | None = None, + ) -> dict[str, Any] | None: + self._ensure_registry() + + if existing_image: + deployment_image = existing_image + else: + self.build(tag, warmup, docker_args) + self.push(tag) + deployment_image = self._image_with_digest(tag) + + if build_only: + click.echo("\N{CHECK MARK} Build complete (--build-only)") + return None + + deploy_data = self._build_deploy_data(deployment_image) + + if DEBUG: + click.echo(json.dumps(deploy_data, indent=2)) + click.echo(f"Deploying model: {self.config.model_name}") + + try: + existing = self.jig.retrieve(self.config.model_name) + old_revision_id = _get_current_revision_id(existing) + was_scaled_to_zero = existing.ready_replicas == 0 + response = self.jig.update(self.config.model_name, **deploy_data) + click.echo("\N{CHECK MARK} Applied new deployment configuration") + except APIStatusError as e: + if e.status_code != 404: + raise + old_revision_id = "" + was_scaled_to_zero = False + click.echo("\N{ROCKET} Creating new deployment") + try: + response = self.jig.deploy(**deploy_data) + click.echo(f"\N{CHECK MARK} Deployed: {self.config.model_name}") + except APIStatusError as e: + if _is_not_unique_error(e): + raise RuntimeError(f"Deployment name must be unique. Tip: {self.config._unique_name_tip}") from None + # TODO: helpful tips for more error cases + raise + + if detach: + return response.model_dump() + + new_revision_id = _get_current_revision_id(response) + scaling_up = was_scaled_to_zero and response.min_replicas and response.min_replicas > 0 + if old_revision_id and old_revision_id == new_revision_id and not scaling_up: + return None + + return Tracker(self.jig, self.config.model_name).track_deployment_progress() + + # == Query commands == + + def status(self, json_output: bool = False) -> None: + response = self.jig.retrieve(self.config.model_name) + if json_output: + click.echo(response.model_dump_json(indent=2)) + else: + click.echo(format_deployment_status(response)) + + def endpoint(self) -> None: + base = _get_api_base_url(self.together) + click.echo(f"{base}/v1/deployment-request/{self.config.model_name}") + + def logs(self, follow: bool = False) -> None: + if not follow: + if lines := self.jig.retrieve_logs(self.config.model_name).lines: + for line in lines: + click.echo(line) + else: + click.echo("No logs available") + return + + try: + with self.jig.with_streaming_response.retrieve_logs(self.config.model_name) as stream: + for line in stream.iter_lines(): + if line: + for log_line in json.loads(line).get("lines", []): + click.echo(log_line) + except KeyboardInterrupt: + click.echo("\nStopped following logs") + except Exception as e: + click.echo(f"\nConnection ended: {e}") + + def destroy(self) -> None: + self.jig.destroy(self.config.model_name) + click.echo(f"\N{WASTEBASKET} Destroyed {self.config.model_name}") + + def submit(self, prompt: str | None, payload: str | None, watch: bool) -> int | None: + """Submit a job. Returns exit code if non-zero, else None.""" + if not prompt and not payload: + raise click.UsageError("Either --prompt or --payload required") + + raw_response = self.jig.queue.with_raw_response.submit( + model=self.config.model_name, + payload=json.loads(payload) if payload else {"prompt": prompt}, + priority=1, + ) + + # Raw response due to Stainless limitation with Pydantic aliases + submit_response = QueueSubmitResponse.model_validate_json(raw_response.read()) + + click.echo("\N{CHECK MARK} Submitted job") + click.echo(submit_response.model_dump_json(indent=2)) + + if not watch or not submit_response.request_id: + return None + + click.echo(f"\nWatching job {submit_response.request_id}...") + last_status: str | None = None + while True: + try: + response = self.jig.queue.retrieve( + model=self.config.model_name, + request_id=submit_response.request_id, + ) + current_status = response.status + if current_status != last_status: + click.echo(response.model_dump_json(indent=2)) + last_status = current_status + + if current_status in ["done", "failed", "finished", "error", "canceled"]: + return 1 if current_status != "done" else None + + time.sleep(1) + + except KeyboardInterrupt: + click.echo(f"\nStopped watching {submit_response.request_id}") + return 130 + + def job_status(self, request_id: str) -> None: + response = self.jig.queue.retrieve( + model=self.config.model_name, + request_id=request_id, + ) + click.echo(response.model_dump_json(indent=2)) + + def queue_status(self) -> None: + response = self.jig.queue.with_raw_response.metrics(model=self.config.model_name) + click.echo(json.dumps(response.json(), indent=2)) + + # --- CLI Commands --- @@ -1118,107 +1374,27 @@ def dockerfile(config_path: str | None) -> None: @_jig_options @click.option("--tag", default="latest", help="Image tag") @click.option("--warmup", is_flag=True, help="Run warmup to build torch compile cache") -@click.option( - "--docker-args", - default=None, - help="Extra args for docker build (or use DOCKER_BUILD_EXTRA_ARGS env)", -) -def build( - ctx: click.Context, - tag: str, - warmup: bool, - docker_args: str | None, - config_path: str | None, -) -> None: +@click.option("--docker-args", default=None, help="Extra args for docker build (or use DOCKER_BUILD_EXTRA_ARGS env)") +def build(ctx: click.Context, tag: str, warmup: bool, docker_args: str | None, config_path: str | None) -> None: """Build container image""" - client: Together = ctx.obj - config, state = _load_config_state(config_path) - _ensure_registry_base_path(client, state) - - image = _get_image(state, config, tag) - - if _dockerfile(config): - click.echo("\N{CHECK MARK} Generated Dockerfile") - else: - click.echo(f"\N{INFORMATION SOURCE} Using existing {config.dockerfile} (not managed by jig)") - - click.echo(f"Building {image}") - cmd = ["docker", "build", "--platform", "linux/amd64", "-t", image, "."] - if config.dockerfile != "Dockerfile": - cmd.extend(["-f", config.dockerfile]) - - # Add extra docker args from flag or env - extra_args = docker_args or os.getenv("DOCKER_BUILD_EXTRA_ARGS", "") - if extra_args: - cmd.extend(shlex.split(extra_args)) - if subprocess.run(cmd).returncode != 0: - raise RuntimeError("Build failed") - - click.echo("\N{CHECK MARK} Built") - - if warmup: - _build_warm_image(image) + Jig(ctx.obj, config_path).build(tag, warmup, docker_args) @click.command() -@click.pass_context -@handle_api_errors("Jig") -@config_option +@_jig_options @click.option("--tag", default="latest", help="Image tag") def push(ctx: click.Context, tag: str, config_path: str | None) -> None: """Push image to registry""" - client: Together = ctx.obj - config, state = _load_config_state(config_path) - _ensure_registry_base_path(client, state) - - image = _get_image(state, config, tag) - - registry = state.registry_base_path.split("/")[0] - login_cmd = ["docker", "login", registry, "--username", "user", "--password-stdin"] - if _run(login_cmd, input=client.api_key).returncode != 0: - raise RuntimeError("Registry login failed") - - click.echo(f"Pushing {image}") - if subprocess.run(["docker", "push", image]).returncode != 0: - raise RuntimeError("Push failed") - click.echo("\N{CHECK MARK} Pushed") - - -def _is_not_unique_error(e: APIStatusError) -> bool: - # all errors: - # "min replicas cannot be greater than max replicas" - # "storage cannot be more than %d GB" - # "user does not have access to the specified image" - # "invalid mount_path: %s" - # "only one readOnly volume is allowed per deployment" - # "volume not found" - # gorm tx.Create(...).Save() err (internal server error?) - # "failed to add deployment reference" (failed to add deployment reference to secret or "Failed to delete secret metadata from database",) - # "failed to delete secret" ("Failed to delete secret metadata from database" in logs) - # "failed to delete deployment from kubernetes: %w" - # errors for toKubernetesEnvironmentVariables, toKubernetesVolumeMounts, getCustomScalers, ReconcileWithKubernetes - msg = e.body.get("error", "") if isinstance(e.body, dict) else "" # type: ignore - return "already exists" in msg + Jig(ctx.obj, config_path).push(tag) @click.command() -@click.pass_context -@handle_api_errors("Jig") -@config_option +@_jig_options @click.option("--tag", default="latest", help="Image tag") @click.option("--build-only", is_flag=True, help="Build and push only") @click.option("--warmup", is_flag=True, help="Run warmup to build torch compile cache") -@click.option( - "--docker-args", - default=None, - help="Extra args for docker build (or use DOCKER_BUILD_EXTRA_ARGS env)", -) -@click.option( - "--image", - "existing_image", - default=None, - help="Use existing image (skip build/push)", -) +@click.option("--docker-args", default=None, help="Extra args for docker build (or use DOCKER_BUILD_EXTRA_ARGS env)") +@click.option("--image", "existing_image", default=None, help="Use existing image (skip build/push)") @click.option("--detach", "detach", is_flag=True, help="Do not wait for deployment to complete") def deploy( ctx: click.Context, @@ -1229,262 +1405,65 @@ def deploy( docker_args: str | None, existing_image: str | None, config_path: str | None, -) -> dict[str, Any] | None: +) -> None: """Deploy model""" - client: JigResource = ctx.obj.beta.jig - config, state = _load_config_state(config_path) - _ensure_registry_base_path(ctx.obj, state) - - if existing_image: - deployment_image = existing_image - else: - # Invoke build and push - ctx.invoke( - build, - tag=tag, - warmup=warmup, - docker_args=docker_args, - config_path=config_path, - ) - ctx.invoke(push, tag=tag, config_path=config_path) - deployment_image = _get_image_with_digest(state, config, tag) - - if build_only: - click.echo("\N{CHECK MARK} Build complete (--build-only)") - return None - - deploy_data: dict[str, Any] = { - "name": config.model_name, - "description": config.deploy.description, - "image": deployment_image, - "min_replicas": config.deploy.min_replicas, - "max_replicas": config.deploy.max_replicas, - "port": config.deploy.port, - "gpu_type": config.deploy.gpu_type, - "gpu_count": config.deploy.gpu_count, - "cpu": config.deploy.cpu, - "memory": config.deploy.memory, - "storage": config.deploy.storage, - "autoscaling": config.deploy.autoscaling, - "termination_grace_period_seconds": config.deploy.termination_grace_period_seconds, - "volumes": [asdict(vm) for vm in config.deploy.volume_mounts], - } - - if config.deploy.health_check_path: - deploy_data["health_check_path"] = config.deploy.health_check_path - if config.deploy.command: - deploy_data["command"] = config.deploy.command - - if (base_url := _get_api_base_url(ctx.obj)) != "https://api.together.ai": - config.deploy.environment_variables["TOGETHER_API_BASE_URL"] = base_url - - env_vars = [{"name": k, "value": v} for k, v in config.deploy.environment_variables.items()] - - if "TOGETHER_API_KEY" not in state.secrets: - _set_secret(client, config, state, "TOGETHER_API_KEY", ctx.obj.api_key, "Auth key for queue API") - - for name, secret_id in state.secrets.items(): - env_vars.append({"name": name, "value_from_secret": secret_id}) - - deploy_data["environment_variables"] = env_vars - - if DEBUG: - click.echo(json.dumps(deploy_data, indent=2)) - click.echo(f"Deploying model: {config.model_name}") - - try: - existing = client.retrieve(config.model_name) - old_revision_id = _get_current_revision_id(existing) - was_scaled_to_zero = existing.ready_replicas == 0 - response = client.update(config.model_name, **deploy_data) - click.echo("\N{CHECK MARK} Applied new deployment configuration") - except APIStatusError as e: - if e.status_code != 404: - raise - old_revision_id = "" - was_scaled_to_zero = False - click.echo("\N{ROCKET} Creating new deployment") - try: - response = client.deploy(**deploy_data) - click.echo(f"\N{CHECK MARK} Deployed: {config.model_name}") - except APIStatusError as e: - if _is_not_unique_error(e): - raise RuntimeError(f"Deployment name must be unique. Tip: {config._unique_name_tip}") from None - # TODO: helpful tips for more error cases - raise - - if detach: - return response.model_dump() - - # Skip tracking if revision didn't change and not scaling up from zero - new_revision_id = _get_current_revision_id(response) - scaling_up = was_scaled_to_zero and response.min_replicas and response.min_replicas > 0 - if old_revision_id and old_revision_id == new_revision_id and not scaling_up: - return None - - return Tracker(client, config.model_name).track_deployment_progress() + Jig(ctx.obj, config_path).deploy(tag, build_only, warmup, detach, docker_args, existing_image) @click.command() -@click.pass_context -@handle_api_errors("Jig") -@config_option +@_jig_options @click.option("--json", "json_output", is_flag=True, help="Output raw JSON") def status(ctx: click.Context, config_path: str | None, json_output: bool = False) -> None: """Get deployment status""" - client: JigResource = ctx.obj.beta.jig - config = Config.find(config_path) - response = client.retrieve(config.model_name) - - if json_output: - click.echo(response.model_dump_json(indent=2)) - else: - click.echo(format_deployment_status(response)) + Jig(ctx.obj, config_path).status(json_output) @click.command() -@click.pass_context -@handle_api_errors("Jig") -@config_option +@_jig_options def endpoint(ctx: click.Context, config_path: str | None) -> None: """Get deployment endpoint URL""" - client: Together = ctx.obj - click.echo(f"{_get_api_base_url(client)}/v1/deployment-request/{Config.find(config_path).model_name}") + Jig(ctx.obj, config_path).endpoint() @click.command() -@click.pass_context -@handle_api_errors("Jig") -@config_option +@_jig_options @click.option("--follow", is_flag=True, help="Follow log output") def logs(ctx: click.Context, follow: bool, config_path: str | None) -> None: """Get deployment logs""" - client: JigResource = ctx.obj.beta.jig - config = Config.find(config_path) - - if not follow: - if lines := client.retrieve_logs(config.model_name).lines: - for line in lines: - click.echo(line) - else: - click.echo("No logs available") - return - - # Stream logs using SDK streaming response - try: - with client.with_streaming_response.retrieve_logs(config.model_name) as stream: - for line in stream.iter_lines(): - if line: - for log_line in json.loads(line).get("lines", []): - click.echo(log_line) - except KeyboardInterrupt: - click.echo("\nStopped following logs") - except Exception as e: - click.echo(f"\nConnection ended: {e}") + Jig(ctx.obj, config_path).logs(follow) @click.command() -@click.pass_context -@handle_api_errors("Jig") -@config_option +@_jig_options def destroy(ctx: click.Context, config_path: str | None) -> None: """Destroy deployment""" - client: JigResource = ctx.obj.beta.jig - config = Config.find(config_path) - client.destroy(config.model_name) - click.echo(f"\N{WASTEBASKET} Destroyed {config.model_name}") + Jig(ctx.obj, config_path).destroy() @click.command() -@click.pass_context -@handle_api_errors("Jig") -@config_option +@_jig_options @click.option("--prompt", default=None, help="Job prompt") @click.option("--payload", default=None, help="Job payload JSON") @click.option("--watch", is_flag=True, help="Watch job status until completion") -def submit( - ctx: click.Context, - prompt: str | None, - payload: str | None, - watch: bool, - config_path: str | None, -) -> None: +def submit(ctx: click.Context, prompt: str | None, payload: str | None, watch: bool, config_path: str | None) -> None: """Submit a job to the deployment""" - client: JigResource = ctx.obj.beta.jig - config = Config.find(config_path) - - if not prompt and not payload: - raise click.UsageError("Either --prompt or --payload required") - - raw_response = client.queue.with_raw_response.submit( - model=config.model_name, - payload=json.loads(payload) if payload else {"prompt": prompt}, - priority=1, - ) - - # Getting raw response and parsing ourselves here due to Stainless limitation with - # Pydantic aliases not handled correctly (both fields are present in the model) - submit_response = QueueSubmitResponse.model_validate_json(raw_response.read()) - - click.echo("\N{CHECK MARK} Submitted job") - click.echo(submit_response.model_dump_json(indent=2)) - - if not watch or not submit_response.request_id: - return - - click.echo(f"\nWatching job {submit_response.request_id}...") - last_status: str | None = None - while True: - try: - response = client.queue.retrieve( - model=config.model_name, - request_id=submit_response.request_id, - ) - current_status = response.status - if current_status != last_status: - click.echo(response.model_dump_json(indent=2)) - last_status = current_status - - if current_status in ["done", "failed", "finished", "error", "canceled"]: - if current_status != "done": - ctx.exit(1) - break - - time.sleep(1) - - except KeyboardInterrupt: - click.echo(f"\nStopped watching {submit_response.request_id}") - ctx.exit(130) + if exit_code := Jig(ctx.obj, config_path).submit(prompt, payload, watch): + ctx.exit(exit_code) @click.command() -@click.pass_context -@handle_api_errors("Jig") -@config_option +@_jig_options @click.option("--request-id", required=True, help="Job request ID") def job_status(ctx: click.Context, request_id: str, config_path: str | None) -> None: """Get status of a specific job""" - client: JigResource = ctx.obj.beta.jig - config = Config.find(config_path) - - response = client.queue.retrieve( - model=config.model_name, - request_id=request_id, - ) - click.echo(response.model_dump_json(indent=2)) + Jig(ctx.obj, config_path).job_status(request_id) @click.command() -@click.pass_context -@handle_api_errors("Jig") -@config_option +@_jig_options def queue_status(ctx: click.Context, config_path: str | None) -> None: """Get queue metrics for the deployment""" - client: JigResource = ctx.obj.beta.jig - config = Config.find(config_path) - - response = client.queue.with_raw_response.metrics(model=config.model_name) - click.echo(json.dumps(response.json(), indent=2)) + Jig(ctx.obj, config_path).queue_status() @click.command("list") From 100b333bfce7a40ac63abc187f9d9ef16a0ba283 Mon Sep 17 00:00:00 2001 From: technillogue Date: Wed, 18 Feb 2026 15:10:42 -0500 Subject: [PATCH 29/51] reimplement handle_api_errors for jig (16619 tokens) --- src/together/lib/cli/api/beta/jig/jig.py | 62 ++++++++++++++++-------- 1 file changed, 41 insertions(+), 21 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 3ae565b1..d3b2a9f7 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -16,6 +16,7 @@ from typing import TYPE_CHECKING, Any, Union, Callable from pathlib import Path from datetime import datetime +from functools import wraps from itertools import groupby from collections import defaultdict from dataclasses import field, asdict, dataclass, is_dataclass @@ -24,7 +25,7 @@ import click from together import Together -from together._exceptions import APIStatusError +from together._exceptions import APIError, APIStatusError from together.lib.cli.api._utils import handle_api_errors from together.types.beta.deployment import Deployment, ReplicaEvents from together.resources.beta.jig.jig import JigResource @@ -894,7 +895,7 @@ class Tracker: # replica_id -> when we started waiting for ready replica_wait_start: dict[str, float] = field(default_factory=lambda: defaultdict(time.time)) - def track_deployment_progress(self) -> dict[str, Any] | None: + def track_deployment_progress(self) -> None: """Track deployment progress until ready or failed. Polls deployment status every 3 seconds until: @@ -914,7 +915,7 @@ def track_deployment_progress(self) -> dict[str, Any] | None: if deployment.min_replicas == 0 and deployment.desired_replicas == 0: if str(deployment.status) == "ScaledToZero": click.echo("\N{CHECK MARK} Deployment scaled to zero replicas") - return None + return # Not yet scaled to zero, wait and retry time.sleep(self.poll_interval) continue @@ -938,7 +939,7 @@ def track_deployment_progress(self) -> dict[str, Any] | None: result = self.process_replica_event(replica_id=replica_id, event=event) if result == ReplicaTrackingResult.SUCCESS: - return None + return if result == ReplicaTrackingResult.FAILURE: raise SystemExit(1) @@ -1062,6 +1063,7 @@ def _image(self, tag: str = "latest") -> str: return f"{self.state.registry_base_path}/{self.config.model_name}:{tag}" def _image_with_digest(self, tag: str = "latest") -> str: + image_name = self._image(tag) if tag != "latest": return image_name try: @@ -1167,7 +1169,7 @@ def deploy( detach: bool = False, docker_args: str | None = None, existing_image: str | None = None, - ) -> dict[str, Any] | None: + ) -> None: self._ensure_registry() if existing_image: @@ -1179,7 +1181,7 @@ def deploy( if build_only: click.echo("\N{CHECK MARK} Build complete (--build-only)") - return None + return deploy_data = self._build_deploy_data(deployment_image) @@ -1209,14 +1211,15 @@ def deploy( raise if detach: - return response.model_dump() + click.echo(json.dumps(response.model_dump(), indent=2)) + return new_revision_id = _get_current_revision_id(response) scaling_up = was_scaled_to_zero and response.min_replicas and response.min_replicas > 0 if old_revision_id and old_revision_id == new_revision_id and not scaling_up: - return None + return - return Tracker(self.jig, self.config.model_name).track_deployment_progress() + Tracker(self.jig, self.config.model_name).track_deployment_progress() # == Query commands == @@ -1255,8 +1258,8 @@ def destroy(self) -> None: self.jig.destroy(self.config.model_name) click.echo(f"\N{WASTEBASKET} Destroyed {self.config.model_name}") - def submit(self, prompt: str | None, payload: str | None, watch: bool) -> int | None: - """Submit a job. Returns exit code if non-zero, else None.""" + def submit(self, prompt: str | None, payload: str | None, watch: bool) -> None: + """Submit a job and optionally watch for completion.""" if not prompt and not payload: raise click.UsageError("Either --prompt or --payload required") @@ -1273,7 +1276,7 @@ def submit(self, prompt: str | None, payload: str | None, watch: bool) -> int | click.echo(submit_response.model_dump_json(indent=2)) if not watch or not submit_response.request_id: - return None + return click.echo(f"\nWatching job {submit_response.request_id}...") last_status: str | None = None @@ -1289,13 +1292,15 @@ def submit(self, prompt: str | None, payload: str | None, watch: bool) -> int | last_status = current_status if current_status in ["done", "failed", "finished", "error", "canceled"]: - return 1 if current_status != "done" else None + if current_status != "done": + raise SystemExit(1) + return time.sleep(1) except KeyboardInterrupt: click.echo(f"\nStopped watching {submit_response.request_id}") - return 130 + raise SystemExit(130) from None def job_status(self, request_id: str) -> None: response = self.jig.queue.retrieve( @@ -1313,11 +1318,27 @@ def queue_status(self) -> None: def _jig_options(f: Callable[..., Any]) -> Any: - """Bundles @click.pass_context + @handle_api_errors("Jig") + @config_option.""" - f = config_option(f) - f = handle_api_errors("Jig")(f) - f = click.pass_context(f) - return f + """Bundles @click.pass_context + error handling + @config_option.""" + + @click.pass_context + @config_option + @wraps(f) + def wrapper(*args: Any, **kwargs: Any) -> None: + try: + f(*args, **kwargs) + except click.Abort: + raise SystemExit(0) from None + except click.ClickException: + raise + except APIError as e: + msg = getattr(e.body, "message", str(e.body)) if e.body is not None else str(e) + click.echo(msg, err=True) + raise SystemExit(1) from None + except Exception as e: + click.echo(str(e), err=True) + raise SystemExit(1) from None + + return wrapper @click.command() @@ -1447,8 +1468,7 @@ def destroy(ctx: click.Context, config_path: str | None) -> None: @click.option("--watch", is_flag=True, help="Watch job status until completion") def submit(ctx: click.Context, prompt: str | None, payload: str | None, watch: bool, config_path: str | None) -> None: """Submit a job to the deployment""" - if exit_code := Jig(ctx.obj, config_path).submit(prompt, payload, watch): - ctx.exit(exit_code) + Jig(ctx.obj, config_path).submit(prompt, payload, watch) @click.command() From 2a7999e3f7f860e08d73129971fcdeb0f98a4893 Mon Sep 17 00:00:00 2001 From: technillogue Date: Wed, 18 Feb 2026 16:22:33 -0500 Subject: [PATCH 30/51] add refactor todos (16640 tokens) --- src/together/lib/cli/api/beta/jig/jig.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index d3b2a9f7..aa9e662c 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -153,6 +153,7 @@ def validate(value: Any, value_type: type, path: str = "") -> str | None: return f"{path}: expected {value_type.__name__}, got {value!r}" # pyright: ignore return None +# TODO: make state a property of config @dataclass class Config: @@ -1039,6 +1040,7 @@ def _is_not_unique_error(e: APIStatusError) -> bool: msg = e.body.get("error", "") if isinstance(e.body, dict) else "" # type: ignore return "already exists" in msg +# TODO: merge Tracker into Jig class Jig: """Holds Together client, config, and state. Methods implement the core jig operations.""" From 9cfba6da7e75063b50fb905e92b1ebad09d947ee Mon Sep 17 00:00:00 2001 From: technillogue Date: Wed, 18 Feb 2026 16:27:30 -0500 Subject: [PATCH 31/51] UsageError for missing configs (16610 tokens) --- src/together/lib/cli/api/beta/jig/jig.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index aa9e662c..6431988d 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -176,8 +176,7 @@ def find(cls, config_path: str | None = None, init: bool = False) -> Config: if config_path: found_path = Path(config_path) if not found_path.exists(): - click.echo(f"ERROR: Configuration file not found: {config_path}", err=True) - sys.exit(1) + raise click.UsageError(f"Configuration file not found: {config_path}") return cls.load(tomllib.loads(found_path.read_text()), found_path) if (jigfile := Path("jig.toml")).exists(): @@ -190,11 +189,7 @@ def find(cls, config_path: str | None = None, init: bool = False) -> Config: if init: return cls() - click.echo( - "ERROR: No pyproject.toml or jig.toml found, use --config to specify a config path.", - err=True, - ) - sys.exit(1) + raise click.UsageError("No pyproject.toml or jig.toml found, use --config to specify a config path.") @classmethod def load(cls, data: dict[str, Any], path: Path) -> Config: From 911f275dfa8971a436770d5fa666323e798321a8 Mon Sep 17 00:00:00 2001 From: technillogue Date: Wed, 18 Feb 2026 18:03:39 -0500 Subject: [PATCH 32/51] simplify error wrapper (16597 tokens) --- src/together/lib/cli/api/beta/jig/jig.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 6431988d..20d74fe7 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -1323,9 +1323,7 @@ def _jig_options(f: Callable[..., Any]) -> Any: def wrapper(*args: Any, **kwargs: Any) -> None: try: f(*args, **kwargs) - except click.Abort: - raise SystemExit(0) from None - except click.ClickException: + except (click.Abort, click.ClickException): raise except APIError as e: msg = getattr(e.body, "message", str(e.body)) if e.body is not None else str(e) From 5688a0d4c92589374ed3d18c8a0eb115df5315e7 Mon Sep 17 00:00:00 2001 From: technillogue Date: Wed, 18 Feb 2026 18:28:07 -0500 Subject: [PATCH 33/51] fmt + fmt set_secret (16583 tokens) --- src/together/lib/cli/api/beta/jig/jig.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 20d74fe7..e066fca8 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -153,8 +153,10 @@ def validate(value: Any, value_type: type, path: str = "") -> str | None: return f"{path}: expected {value_type.__name__}, got {value!r}" # pyright: ignore return None + # TODO: make state a property of config + @dataclass class Config: """Main configuration from jig.toml or pyproject.toml""" @@ -407,14 +409,7 @@ def _load_config_state(config_path: str | None) -> tuple[Config, State]: # == Secrets == -def _set_secret( - client: JigResource, - config: Config, - state: State, - name: str, - value: str, - description: str, -) -> None: +def _set_secret(client: JigResource, config: Config, state: State, name: str, value: str, description: str) -> None: """Set secret for the deployment""" deployment_secret_name = f"{config.model_name}-{name}" @@ -1035,8 +1030,10 @@ def _is_not_unique_error(e: APIStatusError) -> bool: msg = e.body.get("error", "") if isinstance(e.body, dict) else "" # type: ignore return "already exists" in msg + # TODO: merge Tracker into Jig + class Jig: """Holds Together client, config, and state. Methods implement the core jig operations.""" From 0f0364238543aa818675487a19c4ae684e33a198 Mon Sep 17 00:00:00 2001 From: technillogue Date: Wed, 18 Feb 2026 18:43:08 -0500 Subject: [PATCH 34/51] tweak state/config loading (16529 tokens) --- src/together/lib/cli/api/beta/jig/jig.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index e066fca8..8fc2edcb 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -197,8 +197,7 @@ def find(cls, config_path: str | None = None, init: bool = False) -> Config: def load(cls, data: dict[str, Any], path: Path) -> Config: """Load configuration from parsed TOML data""" # figure out config location and "Deployment name must be unique. Tip: update ..." message - is_pyproject = path.name.endswith("pyproject.toml") - if is_pyproject: + if path.name.endswith("pyproject.toml"): jig_config = data.get("tool", {}).get("jig", {}) if name := jig_config.get("name"): tip = "update `name` in your pyproject.toml" @@ -267,19 +266,16 @@ def load(cls, config_dir: Path, project_name: str) -> State: } """ - path = config_dir / ".jig.json" try: - all_data = json.loads(path.read_text()) - - # Check if this is the new nested structure (project_name as key) - if project_name in all_data and isinstance(all_data[project_name], dict): - # New structure: extract project-specific state - project_data = all_data[project_name] + all_data = json.loads((config_dir / ".jig.json").read_text()) + # is our project in the nested state format? + if isinstance(project_data := all_data.get(project_name), dict): return cls.from_dict(config_dir, project_name, **project_data) - # Secrets or volumes exist, but not yet migrated (don't care about registry base path) + # top-level secrets/volumes project fields are set, but not migrated + # (don't care about registry base path) if "secrets" in all_data or "volumes" in all_data: return cls.from_dict(config_dir, project_name, **all_data) - # File exists but this project isn't in it yet + # state exists but our project isn't in it return cls(_config_dir=config_dir, _project_name=project_name) except FileNotFoundError: return cls(_config_dir=config_dir, _project_name=project_name) @@ -298,8 +294,7 @@ def save(self) -> None: all_data = {} # Update this project's state - project_data = {k: v for k, v in asdict(self).items() if not k.startswith("_")} - all_data[self._project_name] = project_data + all_data[self._project_name] = {k: v for k, v in asdict(self).items() if not k.startswith("_")} path.write_text(json.dumps(all_data, indent=2)) From 35f84ad1f16ee48ee644c979dd4c040ae2077c7b Mon Sep 17 00:00:00 2001 From: technillogue Date: Wed, 18 Feb 2026 19:47:43 -0500 Subject: [PATCH 35/51] Jig.jig -> Jig.api (16515 tokens) --- src/together/lib/cli/api/beta/jig/jig.py | 28 ++++++++++++------------ 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 8fc2edcb..36da5b2b 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -1034,7 +1034,7 @@ class Jig: def __init__(self, client: Together, config_path: str | None = None) -> None: self.together = client - self.jig: JigResource = client.beta.jig + self.api: JigResource = client.beta.jig self.config = Config.find(config_path) self.state = State.load(self.config._path.parent, self.config.model_name) @@ -1141,7 +1141,7 @@ def _build_deploy_data(self, image: str) -> dict[str, Any]: if "TOGETHER_API_KEY" not in self.state.secrets: _set_secret( - self.jig, self.config, self.state, "TOGETHER_API_KEY", self.together.api_key, "Auth key for queue API" + self.api, self.config, self.state, "TOGETHER_API_KEY", self.together.api_key, "Auth key for queue API" ) for name, secret_id in self.state.secrets.items(): @@ -1179,10 +1179,10 @@ def deploy( click.echo(f"Deploying model: {self.config.model_name}") try: - existing = self.jig.retrieve(self.config.model_name) + existing = self.api.retrieve(self.config.model_name) old_revision_id = _get_current_revision_id(existing) was_scaled_to_zero = existing.ready_replicas == 0 - response = self.jig.update(self.config.model_name, **deploy_data) + response = self.api.update(self.config.model_name, **deploy_data) click.echo("\N{CHECK MARK} Applied new deployment configuration") except APIStatusError as e: if e.status_code != 404: @@ -1191,7 +1191,7 @@ def deploy( was_scaled_to_zero = False click.echo("\N{ROCKET} Creating new deployment") try: - response = self.jig.deploy(**deploy_data) + response = self.api.deploy(**deploy_data) click.echo(f"\N{CHECK MARK} Deployed: {self.config.model_name}") except APIStatusError as e: if _is_not_unique_error(e): @@ -1208,12 +1208,12 @@ def deploy( if old_revision_id and old_revision_id == new_revision_id and not scaling_up: return - Tracker(self.jig, self.config.model_name).track_deployment_progress() + Tracker(self.api, self.config.model_name).track_deployment_progress() # == Query commands == def status(self, json_output: bool = False) -> None: - response = self.jig.retrieve(self.config.model_name) + response = self.api.retrieve(self.config.model_name) if json_output: click.echo(response.model_dump_json(indent=2)) else: @@ -1225,7 +1225,7 @@ def endpoint(self) -> None: def logs(self, follow: bool = False) -> None: if not follow: - if lines := self.jig.retrieve_logs(self.config.model_name).lines: + if lines := self.api.retrieve_logs(self.config.model_name).lines: for line in lines: click.echo(line) else: @@ -1233,7 +1233,7 @@ def logs(self, follow: bool = False) -> None: return try: - with self.jig.with_streaming_response.retrieve_logs(self.config.model_name) as stream: + with self.api.with_streaming_response.retrieve_logs(self.config.model_name) as stream: for line in stream.iter_lines(): if line: for log_line in json.loads(line).get("lines", []): @@ -1244,7 +1244,7 @@ def logs(self, follow: bool = False) -> None: click.echo(f"\nConnection ended: {e}") def destroy(self) -> None: - self.jig.destroy(self.config.model_name) + self.api.destroy(self.config.model_name) click.echo(f"\N{WASTEBASKET} Destroyed {self.config.model_name}") def submit(self, prompt: str | None, payload: str | None, watch: bool) -> None: @@ -1252,7 +1252,7 @@ def submit(self, prompt: str | None, payload: str | None, watch: bool) -> None: if not prompt and not payload: raise click.UsageError("Either --prompt or --payload required") - raw_response = self.jig.queue.with_raw_response.submit( + raw_response = self.api.queue.with_raw_response.submit( model=self.config.model_name, payload=json.loads(payload) if payload else {"prompt": prompt}, priority=1, @@ -1271,7 +1271,7 @@ def submit(self, prompt: str | None, payload: str | None, watch: bool) -> None: last_status: str | None = None while True: try: - response = self.jig.queue.retrieve( + response = self.api.queue.retrieve( model=self.config.model_name, request_id=submit_response.request_id, ) @@ -1292,14 +1292,14 @@ def submit(self, prompt: str | None, payload: str | None, watch: bool) -> None: raise SystemExit(130) from None def job_status(self, request_id: str) -> None: - response = self.jig.queue.retrieve( + response = self.api.queue.retrieve( model=self.config.model_name, request_id=request_id, ) click.echo(response.model_dump_json(indent=2)) def queue_status(self) -> None: - response = self.jig.queue.with_raw_response.metrics(model=self.config.model_name) + response = self.api.queue.with_raw_response.metrics(model=self.config.model_name) click.echo(json.dumps(response.json(), indent=2)) From 3e14116e2e49b782498f84584feddad8a787fbcb Mon Sep 17 00:00:00 2001 From: technillogue Date: Wed, 18 Feb 2026 20:02:41 -0500 Subject: [PATCH 36/51] refactor _jig_options into _pass_jig and _print_errors (16198 tokens) --- src/together/lib/cli/api/beta/jig/jig.py | 246 ++++++++++------------- 1 file changed, 111 insertions(+), 135 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 36da5b2b..4548ddba 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -389,38 +389,57 @@ def format_deployment_status(d: Deployment) -> str: return status -# == Shared CLI helpers == - -config_option = click.option("-c", "--config", "config_path", default=None, help="Configuration file path") - - -def _load_config_state(config_path: str | None) -> tuple[Config, State]: - """Load config and state from config_path — shared by commands needing both.""" - config = Config.find(config_path) - return config, State.load(config._path.parent, config.model_name) - - # = Secrets and Volumes subcommands = # == Secrets == -def _set_secret(client: JigResource, config: Config, state: State, name: str, value: str, description: str) -> None: +def _set_secret(jig: Jig, name: str, value: str, description: str) -> None: """Set secret for the deployment""" - deployment_secret_name = f"{config.model_name}-{name}" + scoped_name = f"{jig.config.model_name}-{name}" try: - client.secrets.retrieve(deployment_secret_name) - client.secrets.update(deployment_secret_name, name=deployment_secret_name, description=description, value=value) + jig.api.secrets.retrieve(scoped_name) + jig.api.secrets.update(id=scoped_name, name=scoped_name, description=description, value=value) click.echo(f"\N{CHECK MARK} Updated secret: '{name}'") except APIStatusError as e: if e.status_code != 404: raise click.echo("\N{ROCKET} Creating new secret") - client.secrets.create(name=deployment_secret_name, value=value, description=description) + jig.api.secrets.create(name=scoped_name, value=value, description=description) click.echo(f"\N{CHECK MARK} Created secret: {name}") - state.secrets[name] = deployment_secret_name - state.save() + jig.state.secrets[name] = scoped_name + jig.state.save() + + +# should this have the same prefix behavior as handle_api_errors? +def _print_errors(f: Callable[..., Any]) -> Any: + @wraps(f) + def wrapper(*args: Any, **kwargs: Any) -> None: + try: + f(*args, **kwargs) + except (click.Abort, click.ClickException): + raise + except APIError as e: + msg = getattr(e.body, "message", str(e.body)) if e.body is not None else str(e) + click.echo(msg, err=True) + raise SystemExit(1) from None + except Exception as e: + click.echo(str(e), err=True) + raise SystemExit(1) from None + + return wrapper + + +def _pass_jig(f: Callable[..., Any]) -> Any: + @click.pass_context + @click.option("-c", "--config", "config_path", default=None, help="Configuration file path") + @_print_errors + @wraps(f) + def wrapper(ctx: click.Context, config_path: str | None, *args: Any, **kwargs: Any) -> None: + f(Jig(ctx.obj, config_path), *args, **kwargs) + + return wrapper @click.group() @@ -431,71 +450,50 @@ def secrets(ctx: click.Context) -> None: @secrets.command("set") -@click.pass_context +@_pass_jig +@_print_errors @click.option("--name", required=True, help="Secret name") @click.option("--value", required=True, help="Secret value") @click.option("--description", default="", help="Secret description") -@click.option("-c", "--config", "config_path", default=None, help="Configuration file path") -@handle_api_errors("Secrets") -def secrets_set( - ctx: click.Context, - name: str, - value: str, - description: str, - config_path: str | None, -) -> None: +def secrets_set(jig: Jig, name: str, value: str, description: str) -> None: """Set a secret (create or update)""" - config, state = _load_config_state(config_path) - _set_secret(ctx.obj.beta.jig, config, state, name, value, description) + _set_secret(jig, name, value, description) @secrets.command("unset") -@click.pass_context +@_pass_jig +@_print_errors @click.option("--name", required=True, help="Secret name to remove") -@click.option("-c", "--config", "config_path", default=None, help="Configuration file path") -@handle_api_errors("Secrets") -def secrets_unset( - ctx: click.Context, # noqa: ARG001 - name: str, - config_path: str | None, -) -> None: +def secrets_unset(jig: Jig, name: str) -> None: """Remove a secret from both remote and local state""" - _, state = _load_config_state(config_path) - - if state.secrets.pop(name, ""): - state.save() + try: + del jig.state.secrets[name] + jig.state.save() click.echo(f"\N{CHECK MARK} Deleted secret '{name}' from local state") - else: + except KeyError: click.echo(f"\N{CROSS MARK} Secret '{name}' is not set") @secrets.command("list") -@click.pass_context -@click.option("-c", "--config", "config_path", default=None, help="Configuration file path") -@handle_api_errors("Secrets") -def secrets_list( - ctx: click.Context, - config_path: str | None, -) -> None: +@_pass_jig +@_print_errors +def secrets_list(jig: Jig) -> None: """List all secrets with sync status""" - client: JigResource = ctx.obj.beta.jig - config, state = _load_config_state(config_path) + prefix = f"{jig.config.model_name}-" - prefix = f"{config.model_name}-" - - local_secrets = set(state.secrets.keys()) + local_secrets = set(jig.state.secrets.keys()) remote_secrets: set[str] = set() # Get all remote secrets then filter for this deployment - for secret in client.secrets.list().data or []: + for secret in jig.api.secrets.list().data or []: if (name := secret.name) and name.startswith(prefix): # Strip prefix to get local name remote_secrets.add(name.removeprefix(prefix)) if not local_secrets and not remote_secrets: - click.echo(f"\N{INFORMATION SOURCE} No secrets configured for deployment '{config.model_name}'") + click.echo(f"\N{INFORMATION SOURCE} No secrets configured for deployment '{jig.config.model_name}'") return - click.echo(f"\N{INFORMATION SOURCE} Secrets for deployment '{config.model_name}':") + click.echo(f"\N{INFORMATION SOURCE} Secrets for deployment '{jig.config.model_name}':") click.echo() for name in sorted(local_secrets | remote_secrets): @@ -587,7 +585,7 @@ def volumes(ctx: click.Context) -> None: @click.pass_context @click.option("--name", required=True, help="Volume name") @click.option("--source", required=True, help="Source directory path") -@handle_api_errors("Volumes") +@handle_api_errors("Volumes") # fixme def volumes_create(ctx: click.Context, name: str, source: str) -> None: """Create a volume and upload files""" client: JigResource = ctx.obj.beta.jig @@ -598,7 +596,7 @@ def volumes_create(ctx: click.Context, name: str, source: str) -> None: @click.pass_context @click.option("--name", required=True, help="Volume name") @click.option("--source", required=True, help="New source directory path") -@handle_api_errors("Volumes") +@handle_api_errors("Volumes") # fixme def volumes_update(ctx: click.Context, name: str, source: str) -> None: """Update a volume and re-upload files""" client: JigResource = ctx.obj.beta.jig @@ -608,7 +606,7 @@ def volumes_update(ctx: click.Context, name: str, source: str) -> None: @volumes.command("delete") @click.pass_context @click.option("--name", required=True, help="Volume name") -@handle_api_errors("Volumes") +@handle_api_errors("Volumes") # fixme def volumes_delete(ctx: click.Context, name: str) -> None: """Delete a volume""" client: JigResource = ctx.obj.beta.jig @@ -625,7 +623,7 @@ def volumes_delete(ctx: click.Context, name: str) -> None: @volumes.command("describe") @click.pass_context @click.option("--name", required=True, help="Volume name") -@handle_api_errors("Volumes") +@handle_api_errors("Volumes") # fixme def volumes_describe( ctx: click.Context, name: str, @@ -644,7 +642,7 @@ def volumes_describe( @volumes.command("list") @click.pass_context -@handle_api_errors("Volumes") +@handle_api_errors("Volumes") # fixme def volumes_list(ctx: click.Context) -> None: """List all volumes""" client: JigResource = ctx.obj.beta.jig @@ -1022,7 +1020,7 @@ def _is_not_unique_error(e: APIStatusError) -> bool: # "failed to delete secret" ("Failed to delete secret metadata from database" in logs) # "failed to delete deployment from kubernetes: %w" # errors for toKubernetesEnvironmentVariables, toKubernetesVolumeMounts, getCustomScalers, ReconcileWithKubernetes - msg = e.body.get("error", "") if isinstance(e.body, dict) else "" # type: ignore + msg = e.body.get("error", "") if isinstance(e.body, dict) else "" # type: ignore return "already exists" in msg @@ -1140,9 +1138,7 @@ def _build_deploy_data(self, image: str) -> dict[str, Any]: env_vars = [{"name": k, "value": v} for k, v in self.config.deploy.environment_variables.items()] if "TOGETHER_API_KEY" not in self.state.secrets: - _set_secret( - self.api, self.config, self.state, "TOGETHER_API_KEY", self.together.api_key, "Auth key for queue API" - ) + _set_secret(self, "TOGETHER_API_KEY", self.together.api_key, "Auth key for queue API") for name, secret_id in self.state.secrets.items(): env_vars.append({"name": name, "value_from_secret": secret_id}) @@ -1292,10 +1288,7 @@ def submit(self, prompt: str | None, payload: str | None, watch: bool) -> None: raise SystemExit(130) from None def job_status(self, request_id: str) -> None: - response = self.api.queue.retrieve( - model=self.config.model_name, - request_id=request_id, - ) + response = self.api.queue.retrieve(model=self.config.model_name, request_id=request_id) click.echo(response.model_dump_json(indent=2)) def queue_status(self) -> None: @@ -1306,28 +1299,6 @@ def queue_status(self) -> None: # --- CLI Commands --- -def _jig_options(f: Callable[..., Any]) -> Any: - """Bundles @click.pass_context + error handling + @config_option.""" - - @click.pass_context - @config_option - @wraps(f) - def wrapper(*args: Any, **kwargs: Any) -> None: - try: - f(*args, **kwargs) - except (click.Abort, click.ClickException): - raise - except APIError as e: - msg = getattr(e.body, "message", str(e.body)) if e.body is not None else str(e) - click.echo(msg, err=True) - raise SystemExit(1) from None - except Exception as e: - click.echo(str(e), err=True) - raise SystemExit(1) from None - - return wrapper - - @click.command() def init() -> None: """Initialize jig configuration""" @@ -1363,41 +1334,40 @@ def init() -> None: @click.command() -@config_option -@handle_api_errors("Jig") -def dockerfile(config_path: str | None) -> None: +@_pass_jig +@_print_errors +def dockerfile(jig: Jig) -> None: """Generate Dockerfile""" - config = Config.find(config_path) - if _dockerfile(config): + if _dockerfile(jig.config): click.echo("\N{CHECK MARK} Generated Dockerfile") else: - click.echo( - f"ERROR: {config.dockerfile} exists and is not managed by jig. " - f"Remove or rename the file to allow jig to manage dockerfile.", - err=True, - ) + msg = f"ERROR: {jig.config.dockerfile} exists and is not managed by jig. Remove or rename the file to allow jig to manage dockerfile." + click.echo(msg, err=True) @click.command() -@_jig_options +@_pass_jig +@_print_errors @click.option("--tag", default="latest", help="Image tag") @click.option("--warmup", is_flag=True, help="Run warmup to build torch compile cache") @click.option("--docker-args", default=None, help="Extra args for docker build (or use DOCKER_BUILD_EXTRA_ARGS env)") -def build(ctx: click.Context, tag: str, warmup: bool, docker_args: str | None, config_path: str | None) -> None: +def build(jig: Jig, tag: str, warmup: bool, docker_args: str | None) -> None: """Build container image""" - Jig(ctx.obj, config_path).build(tag, warmup, docker_args) + jig.build(tag, warmup, docker_args) @click.command() -@_jig_options +@_pass_jig +@_print_errors @click.option("--tag", default="latest", help="Image tag") -def push(ctx: click.Context, tag: str, config_path: str | None) -> None: +def push(jig: Jig, tag: str) -> None: """Push image to registry""" - Jig(ctx.obj, config_path).push(tag) + jig.push(tag) @click.command() -@_jig_options +@_pass_jig +@_print_errors @click.option("--tag", default="latest", help="Image tag") @click.option("--build-only", is_flag=True, help="Build and push only") @click.option("--warmup", is_flag=True, help="Run warmup to build torch compile cache") @@ -1405,76 +1375,82 @@ def push(ctx: click.Context, tag: str, config_path: str | None) -> None: @click.option("--image", "existing_image", default=None, help="Use existing image (skip build/push)") @click.option("--detach", "detach", is_flag=True, help="Do not wait for deployment to complete") def deploy( - ctx: click.Context, + jig: Jig, tag: str, build_only: bool, warmup: bool, detach: bool, docker_args: str | None, existing_image: str | None, - config_path: str | None, ) -> None: """Deploy model""" - Jig(ctx.obj, config_path).deploy(tag, build_only, warmup, detach, docker_args, existing_image) + jig.deploy(tag, build_only, warmup, detach, docker_args, existing_image) @click.command() -@_jig_options +@_pass_jig +@_print_errors @click.option("--json", "json_output", is_flag=True, help="Output raw JSON") -def status(ctx: click.Context, config_path: str | None, json_output: bool = False) -> None: +def status(jig: Jig, json_output: bool = False) -> None: """Get deployment status""" - Jig(ctx.obj, config_path).status(json_output) + jig.status(json_output) @click.command() -@_jig_options -def endpoint(ctx: click.Context, config_path: str | None) -> None: +@_pass_jig +@_print_errors +def endpoint(jig: Jig) -> None: """Get deployment endpoint URL""" - Jig(ctx.obj, config_path).endpoint() + jig.endpoint() @click.command() -@_jig_options +@_pass_jig +@_print_errors @click.option("--follow", is_flag=True, help="Follow log output") -def logs(ctx: click.Context, follow: bool, config_path: str | None) -> None: +def logs(jig: Jig, follow: bool) -> None: """Get deployment logs""" - Jig(ctx.obj, config_path).logs(follow) + jig.logs(follow) @click.command() -@_jig_options -def destroy(ctx: click.Context, config_path: str | None) -> None: +@_pass_jig +@_print_errors +def destroy(jig: Jig) -> None: """Destroy deployment""" - Jig(ctx.obj, config_path).destroy() + jig.destroy() @click.command() -@_jig_options +@_pass_jig +@_print_errors @click.option("--prompt", default=None, help="Job prompt") @click.option("--payload", default=None, help="Job payload JSON") @click.option("--watch", is_flag=True, help="Watch job status until completion") -def submit(ctx: click.Context, prompt: str | None, payload: str | None, watch: bool, config_path: str | None) -> None: +def submit(jig: Jig, prompt: str | None, payload: str | None, watch: bool) -> None: """Submit a job to the deployment""" - Jig(ctx.obj, config_path).submit(prompt, payload, watch) + jig.submit(prompt, payload, watch) @click.command() -@_jig_options +@_pass_jig +@_print_errors @click.option("--request-id", required=True, help="Job request ID") -def job_status(ctx: click.Context, request_id: str, config_path: str | None) -> None: +def job_status(jig: Jig, request_id: str) -> None: """Get status of a specific job""" - Jig(ctx.obj, config_path).job_status(request_id) + jig.job_status(request_id) @click.command() -@_jig_options -def queue_status(ctx: click.Context, config_path: str | None) -> None: +@_pass_jig +@_print_errors +def queue_status(jig: Jig) -> None: """Get queue metrics for the deployment""" - Jig(ctx.obj, config_path).queue_status() + jig.queue_status() @click.command("list") -@handle_api_errors("Jig") +@handle_api_errors("Jig") # fixme @click.pass_context def list_deployments(ctx: click.Context) -> None: """List all deployments""" From bf8b282b9cb3f3b0706b96e499bf3443ec54ffc5 Mon Sep 17 00:00:00 2001 From: technillogue Date: Wed, 18 Feb 2026 20:12:26 -0500 Subject: [PATCH 37/51] move trivial commands out of jig class (16089 tokens) --- src/together/lib/cli/api/beta/jig/jig.py | 42 ++++++++---------------- 1 file changed, 13 insertions(+), 29 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 4548ddba..28fafbcf 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -1020,7 +1020,7 @@ def _is_not_unique_error(e: APIStatusError) -> bool: # "failed to delete secret" ("Failed to delete secret metadata from database" in logs) # "failed to delete deployment from kubernetes: %w" # errors for toKubernetesEnvironmentVariables, toKubernetesVolumeMounts, getCustomScalers, ReconcileWithKubernetes - msg = e.body.get("error", "") if isinstance(e.body, dict) else "" # type: ignore + msg = e.body.get("error", "") if isinstance(e.body, dict) else "" # type: ignore return "already exists" in msg @@ -1208,17 +1208,6 @@ def deploy( # == Query commands == - def status(self, json_output: bool = False) -> None: - response = self.api.retrieve(self.config.model_name) - if json_output: - click.echo(response.model_dump_json(indent=2)) - else: - click.echo(format_deployment_status(response)) - - def endpoint(self) -> None: - base = _get_api_base_url(self.together) - click.echo(f"{base}/v1/deployment-request/{self.config.model_name}") - def logs(self, follow: bool = False) -> None: if not follow: if lines := self.api.retrieve_logs(self.config.model_name).lines: @@ -1239,10 +1228,6 @@ def logs(self, follow: bool = False) -> None: except Exception as e: click.echo(f"\nConnection ended: {e}") - def destroy(self) -> None: - self.api.destroy(self.config.model_name) - click.echo(f"\N{WASTEBASKET} Destroyed {self.config.model_name}") - def submit(self, prompt: str | None, payload: str | None, watch: bool) -> None: """Submit a job and optionally watch for completion.""" if not prompt and not payload: @@ -1287,14 +1272,6 @@ def submit(self, prompt: str | None, payload: str | None, watch: bool) -> None: click.echo(f"\nStopped watching {submit_response.request_id}") raise SystemExit(130) from None - def job_status(self, request_id: str) -> None: - response = self.api.queue.retrieve(model=self.config.model_name, request_id=request_id) - click.echo(response.model_dump_json(indent=2)) - - def queue_status(self) -> None: - response = self.api.queue.with_raw_response.metrics(model=self.config.model_name) - click.echo(json.dumps(response.json(), indent=2)) - # --- CLI Commands --- @@ -1393,7 +1370,11 @@ def deploy( @click.option("--json", "json_output", is_flag=True, help="Output raw JSON") def status(jig: Jig, json_output: bool = False) -> None: """Get deployment status""" - jig.status(json_output) + response = jig.api.retrieve(jig.config.model_name) + if json_output: + click.echo(response.model_dump_json(indent=2)) + else: + click.echo(format_deployment_status(response)) @click.command() @@ -1401,7 +1382,7 @@ def status(jig: Jig, json_output: bool = False) -> None: @_print_errors def endpoint(jig: Jig) -> None: """Get deployment endpoint URL""" - jig.endpoint() + click.echo(f"{_get_api_base_url(jig.together)}/v1/deployment-request/{jig.config.model_name}") @click.command() @@ -1418,7 +1399,8 @@ def logs(jig: Jig, follow: bool) -> None: @_print_errors def destroy(jig: Jig) -> None: """Destroy deployment""" - jig.destroy() + jig.api.destroy(jig.config.model_name) + click.echo(f"\N{WASTEBASKET} Destroyed {jig.config.model_name}") @click.command() @@ -1438,7 +1420,8 @@ def submit(jig: Jig, prompt: str | None, payload: str | None, watch: bool) -> No @click.option("--request-id", required=True, help="Job request ID") def job_status(jig: Jig, request_id: str) -> None: """Get status of a specific job""" - jig.job_status(request_id) + response = jig.api.queue.retrieve(model=jig.config.model_name, request_id=request_id) + click.echo(response.model_dump_json(indent=2)) @click.command() @@ -1446,7 +1429,8 @@ def job_status(jig: Jig, request_id: str) -> None: @_print_errors def queue_status(jig: Jig) -> None: """Get queue metrics for the deployment""" - jig.queue_status() + response = jig.api.queue.with_raw_response.metrics(model=jig.config.model_name) + click.echo(json.dumps(response.json(), indent=2)) @click.command("list") From c4691cb19d518021688b338dd0276a3211176079 Mon Sep 17 00:00:00 2001 From: technillogue Date: Wed, 18 Feb 2026 20:57:06 -0500 Subject: [PATCH 38/51] SystemExit -> click.exceptions.Exit, use click.BadParameter and click.UsageError more --- src/together/lib/cli/api/beta/jig/jig.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 28fafbcf..f6481cd5 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -23,6 +23,7 @@ from urllib.parse import urlparse import click +from click.exceptions import Exit from together import Together from together._exceptions import APIError, APIStatusError @@ -423,10 +424,10 @@ def wrapper(*args: Any, **kwargs: Any) -> None: except APIError as e: msg = getattr(e.body, "message", str(e.body)) if e.body is not None else str(e) click.echo(msg, err=True) - raise SystemExit(1) from None + raise Exit(1) from None except Exception as e: click.echo(str(e), err=True) - raise SystemExit(1) from None + raise Exit(1) from None return wrapper @@ -516,9 +517,9 @@ def secrets_list(jig: Jig) -> None: def _validate_source(p: Path) -> None: if not p.exists(): - raise ValueError(f"Source path does not exist: {p}") + raise click.BadParameter(f"Source path does not exist: {p}") if not p.is_dir(): - raise ValueError(f"Source path must be a directory: {p}") + raise click.BadParameter(f"Source path must be a directory: {p}") async def _create_volume(client: JigResource, name: str, source: str) -> None: @@ -736,14 +737,14 @@ def _get_files_to_copy(config: Config) -> list[str]: if config.image.auto_include_git: try: if _run(["git", "status", "--porcelain"]).stdout.strip(): - raise RuntimeError("Git repository has uncommitted changes: auto_include_git not allowed.") + raise click.UsageError("Git repository has uncommitted changes: auto_include_git not allowed.") git_files = _run(["git", "ls-files"]).stdout.strip().split("\n") files.update(f for f in git_files if f and f != ".") except subprocess.CalledProcessError: pass if "." in files: - raise ValueError("Copying '.' is not allowed. Please enumerate specific files.") + raise click.UsageError("Copying '.' is not allowed. Please enumerate specific files.") return sorted(files) @@ -925,7 +926,7 @@ def track_deployment_progress(self) -> None: if result == ReplicaTrackingResult.SUCCESS: return if result == ReplicaTrackingResult.FAILURE: - raise SystemExit(1) + raise Exit(1) time.sleep(self.poll_interval) @@ -933,13 +934,13 @@ def track_deployment_progress(self) -> None: click.echo("\N{CROSS MARK} Deployment tracking timed out after 10 minutes") click.echo(f"Deployment '{self.deployment_name}' may still be in progress.") click.echo("Run 'jig status' to check current state.") - raise SystemExit(1) + raise Exit(1) except KeyboardInterrupt: click.echo("\n\N{WARNING SIGN} Deployment tracking interrupted") click.echo(f"Deployment '{self.deployment_name}' may still be in progress.") click.echo("Run 'jig status' to check current state.") - raise SystemExit(130) from None + raise Exit(130) from None def process_replica_event(self, replica_id: str, event: ReplicaEvents) -> ReplicaTrackingResult: """Process a single replica event and return the tracking result.""" @@ -1263,14 +1264,14 @@ def submit(self, prompt: str | None, payload: str | None, watch: bool) -> None: if current_status in ["done", "failed", "finished", "error", "canceled"]: if current_status != "done": - raise SystemExit(1) + raise Exit(1) return time.sleep(1) except KeyboardInterrupt: click.echo(f"\nStopped watching {submit_response.request_id}") - raise SystemExit(130) from None + raise Exit(130) from None # --- CLI Commands --- From df0398b341c7b2f8fa55a0d42dca8ef26f624f7b Mon Sep 17 00:00:00 2001 From: technillogue Date: Wed, 18 Feb 2026 21:10:13 -0500 Subject: [PATCH 39/51] use same prefix pattern as handle_api_errors and add JigError --- src/together/lib/cli/api/beta/jig/jig.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index f6481cd5..d0dd2530 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -413,7 +413,16 @@ def _set_secret(jig: Jig, name: str, value: str, description: str) -> None: jig.state.save() -# should this have the same prefix behavior as handle_api_errors? +class JigError(Exception): + """Actionable runtime error""" + + +def jig_fail(msg: str) -> None: + prefix = click.style("Jig: ", fg="blue") + click.echo(prefix + click.style("Failed", fg="red"), err=True) + click.echo(prefix + click.style(msg, fg="red"), err=True) + + def _print_errors(f: Callable[..., Any]) -> Any: @wraps(f) def wrapper(*args: Any, **kwargs: Any) -> None: @@ -423,11 +432,16 @@ def wrapper(*args: Any, **kwargs: Any) -> None: raise except APIError as e: msg = getattr(e.body, "message", str(e.body)) if e.body is not None else str(e) - click.echo(msg, err=True) + fail(msg) raise Exit(1) from None - except Exception as e: - click.echo(str(e), err=True) + except JigError as e: + fail(str(e)) raise Exit(1) from None + except Exception as e: + if DEBUG: + raise + fail(f"Unexpected error: {e}") + raise click.exceptions.Exit(1) from None return wrapper @@ -435,7 +449,6 @@ def wrapper(*args: Any, **kwargs: Any) -> None: def _pass_jig(f: Callable[..., Any]) -> Any: @click.pass_context @click.option("-c", "--config", "config_path", default=None, help="Configuration file path") - @_print_errors @wraps(f) def wrapper(ctx: click.Context, config_path: str | None, *args: Any, **kwargs: Any) -> None: f(Jig(ctx.obj, config_path), *args, **kwargs) From c96306e852c1fe1d8d28d565c5fa2e80f18bbefa Mon Sep 17 00:00:00 2001 From: technillogue Date: Wed, 18 Feb 2026 21:26:08 -0500 Subject: [PATCH 40/51] fix print_errors --- src/together/lib/cli/api/beta/jig/jig.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index d0dd2530..d36b71f3 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -428,20 +428,20 @@ def _print_errors(f: Callable[..., Any]) -> Any: def wrapper(*args: Any, **kwargs: Any) -> None: try: f(*args, **kwargs) - except (click.Abort, click.ClickException): + except (Exit, click.Abort, click.ClickException): raise except APIError as e: msg = getattr(e.body, "message", str(e.body)) if e.body is not None else str(e) - fail(msg) + jig_fail(msg) raise Exit(1) from None except JigError as e: - fail(str(e)) + jig_fail(str(e)) raise Exit(1) from None except Exception as e: if DEBUG: raise - fail(f"Unexpected error: {e}") - raise click.exceptions.Exit(1) from None + jig_fail(f"Unexpected error: {e}") + raise Exit(1) from None return wrapper From dd1a270879fdea07ba3a45f3a29d63056ad7758b Mon Sep 17 00:00:00 2001 From: technillogue Date: Thu, 19 Feb 2026 00:21:09 -0500 Subject: [PATCH 41/51] use JigError class as appropriate --- src/together/lib/cli/api/beta/jig/jig.py | 45 +++++++++++------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index d36b71f3..d7f5596c 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -27,7 +27,6 @@ from together import Together from together._exceptions import APIError, APIStatusError -from together.lib.cli.api._utils import handle_api_errors from together.types.beta.deployment import Deployment, ReplicaEvents from together.resources.beta.jig.jig import JigResource from together.lib.cli.api.beta.jig._uploader import Uploader @@ -550,7 +549,7 @@ async def _create_volume(client: JigResource, name: str, source: str) -> None: ) click.echo(f"\N{CHECK MARK} Volume created: {volume_response.id}") except Exception as e: - raise RuntimeError(f"Failed to create volume: {e}") from e + raise JigError(f"Failed to create volume: {e}") from e try: await Uploader(client._client).upload_files(source_path, volume_name=name) @@ -561,7 +560,7 @@ async def _create_volume(client: JigResource, name: str, source: str) -> None: client.volumes.delete(name) except Exception as cleanup_error: click.echo(f"\N{WARNING SIGN} Failed to delete volume: {cleanup_error}") - raise + raise Exit(1) from None async def _update_volume(client: JigResource, name: str, source: str) -> None: @@ -572,7 +571,7 @@ async def _update_volume(client: JigResource, name: str, source: str) -> None: client.volumes.retrieve(name) except APIStatusError as e: if e.status_code == 404: - raise ValueError(f"Volume '{name}' does not exist") from e + raise JigError(f"Volume '{name}' does not exist") from e raise source_prefix = f"{name}/{source_path.name}" @@ -599,7 +598,7 @@ def volumes(ctx: click.Context) -> None: @click.pass_context @click.option("--name", required=True, help="Volume name") @click.option("--source", required=True, help="Source directory path") -@handle_api_errors("Volumes") # fixme +@_print_errors def volumes_create(ctx: click.Context, name: str, source: str) -> None: """Create a volume and upload files""" client: JigResource = ctx.obj.beta.jig @@ -610,7 +609,7 @@ def volumes_create(ctx: click.Context, name: str, source: str) -> None: @click.pass_context @click.option("--name", required=True, help="Volume name") @click.option("--source", required=True, help="New source directory path") -@handle_api_errors("Volumes") # fixme +@_print_errors def volumes_update(ctx: click.Context, name: str, source: str) -> None: """Update a volume and re-upload files""" client: JigResource = ctx.obj.beta.jig @@ -620,7 +619,7 @@ def volumes_update(ctx: click.Context, name: str, source: str) -> None: @volumes.command("delete") @click.pass_context @click.option("--name", required=True, help="Volume name") -@handle_api_errors("Volumes") # fixme +@_print_errors def volumes_delete(ctx: click.Context, name: str) -> None: """Delete a volume""" client: JigResource = ctx.obj.beta.jig @@ -637,7 +636,7 @@ def volumes_delete(ctx: click.Context, name: str) -> None: @volumes.command("describe") @click.pass_context @click.option("--name", required=True, help="Volume name") -@handle_api_errors("Volumes") # fixme +@_print_errors def volumes_describe( ctx: click.Context, name: str, @@ -656,7 +655,7 @@ def volumes_describe( @volumes.command("list") @click.pass_context -@handle_api_errors("Volumes") # fixme +@_print_errors def volumes_list(ctx: click.Context) -> None: """List all volumes""" client: JigResource = ctx.obj.beta.jig @@ -821,12 +820,12 @@ def _build_warm_image(base_image: str) -> None: click.echo(f"Running: {' '.join(cmd)}") result = subprocess.run(cmd) if result.returncode != 0: - raise RuntimeError(f"Warmup failed with code {result.returncode}") + raise JigError(f"Warmup failed with code {result.returncode}") # Check cache was generated cache_files = list(cache_dir.rglob("*")) if not cache_files: - raise RuntimeError("Warmup completed but no cache files were generated") + raise JigError("Warmup completed but no cache files were generated") click.echo(f"\N{CHECK MARK} Warmup complete, {len(cache_files)} cache files generated") @@ -839,7 +838,7 @@ def _build_warm_image(base_image: str) -> None: final_cmd = ["docker", "build", "--platform", "linux/amd64", "-t", base_image, "-f", "-", "."] if _run(final_cmd, input=final_dockerfile).returncode != 0: - raise RuntimeError("Cache image build failed") + raise JigError("Cache image build failed") click.echo("\N{CHECK MARK} Final image with cache built") @@ -1054,7 +1053,8 @@ def _ensure_registry(self) -> None: """Ensure registry base path is set in state""" if not self.state.registry_base_path: response = self.together._client.get("/image-repositories/base-path", headers=self.together.auth_headers) - response.raise_for_status() + if not response.is_success: + raise JigError(f"Failed to get registry path (HTTP {response.status_code})") data = response.json() # Strip protocol prefix - Docker tags don't support URLs self.state.registry_base_path = data["base-path"].removeprefix("http://").removeprefix("https://") @@ -1076,10 +1076,8 @@ def _image_with_digest(self, tag: str = "latest") -> str: return str(digest) except subprocess.CalledProcessError as e: msg = e.stderr.strip() if e.stderr else "Docker command failed" - raise RuntimeError(f"Failed to get digest for {image_name}: {msg}") from e - raise RuntimeError( - f"No registry digest found for {image_name}. Make sure the image was pushed to registry first." - ) + raise JigError(f"Failed to get digest for {image_name}: {msg}") from e + raise JigError(f"No registry digest found for {image_name}. Make sure the image was pushed to registry first.") # == Build / Push / Deploy == @@ -1101,7 +1099,7 @@ def build(self, tag: str = "latest", warmup: bool = False, docker_args: str | No if extra_args: cmd.extend(shlex.split(extra_args)) if subprocess.run(cmd).returncode != 0: - raise RuntimeError("Build failed") + raise JigError("Build failed") click.echo("\N{CHECK MARK} Built") @@ -1115,11 +1113,11 @@ def push(self, tag: str = "latest") -> None: registry = self.state.registry_base_path.split("/")[0] login_cmd = ["docker", "login", registry, "--username", "user", "--password-stdin"] if _run(login_cmd, input=self.together.api_key).returncode != 0: - raise RuntimeError("Registry login failed") + raise JigError("Registry login failed") click.echo(f"Pushing {image}") if subprocess.run(["docker", "push", image]).returncode != 0: - raise RuntimeError("Push failed") + raise JigError("Push failed") click.echo("\N{CHECK MARK} Pushed") def _build_deploy_data(self, image: str) -> dict[str, Any]: @@ -1205,7 +1203,7 @@ def deploy( click.echo(f"\N{CHECK MARK} Deployed: {self.config.model_name}") except APIStatusError as e: if _is_not_unique_error(e): - raise RuntimeError(f"Deployment name must be unique. Tip: {self.config._unique_name_tip}") from None + raise JigError(f"Deployment name must be unique. Tip: {self.config._unique_name_tip}") from None # TODO: helpful tips for more error cases raise @@ -1448,10 +1446,9 @@ def queue_status(jig: Jig) -> None: @click.command("list") -@handle_api_errors("Jig") # fixme +@_print_errors @click.pass_context def list_deployments(ctx: click.Context) -> None: """List all deployments""" - client: JigResource = ctx.obj.beta.jig - response = client.with_raw_response.list() + response = ctx.obj.beta.jig.with_raw_response.list() click.echo(json.dumps(response.json(), indent=2)) From 6c0a098067962fc60565477ed40c01991612bc42 Mon Sep 17 00:00:00 2001 From: technillogue Date: Thu, 19 Feb 2026 00:38:04 -0500 Subject: [PATCH 42/51] rename _pass_jig to _jig_command and add error handling --- src/together/lib/cli/api/beta/jig/jig.py | 64 +++++++++--------------- 1 file changed, 24 insertions(+), 40 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index d7f5596c..6a20db19 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -422,7 +422,7 @@ def jig_fail(msg: str) -> None: click.echo(prefix + click.style(msg, fg="red"), err=True) -def _print_errors(f: Callable[..., Any]) -> Any: +def _handle_jig_errors(f: Callable[..., Any]) -> Any: @wraps(f) def wrapper(*args: Any, **kwargs: Any) -> None: try: @@ -445,7 +445,8 @@ def wrapper(*args: Any, **kwargs: Any) -> None: return wrapper -def _pass_jig(f: Callable[..., Any]) -> Any: +def _jig_command(f: Callable[..., Any]) -> Any: + @_handle_jig_errors @click.pass_context @click.option("-c", "--config", "config_path", default=None, help="Configuration file path") @wraps(f) @@ -463,8 +464,7 @@ def secrets(ctx: click.Context) -> None: @secrets.command("set") -@_pass_jig -@_print_errors +@_jig_command @click.option("--name", required=True, help="Secret name") @click.option("--value", required=True, help="Secret value") @click.option("--description", default="", help="Secret description") @@ -474,8 +474,7 @@ def secrets_set(jig: Jig, name: str, value: str, description: str) -> None: @secrets.command("unset") -@_pass_jig -@_print_errors +@_jig_command @click.option("--name", required=True, help="Secret name to remove") def secrets_unset(jig: Jig, name: str) -> None: """Remove a secret from both remote and local state""" @@ -488,8 +487,7 @@ def secrets_unset(jig: Jig, name: str) -> None: @secrets.command("list") -@_pass_jig -@_print_errors +@_jig_command def secrets_list(jig: Jig) -> None: """List all secrets with sync status""" prefix = f"{jig.config.model_name}-" @@ -598,7 +596,7 @@ def volumes(ctx: click.Context) -> None: @click.pass_context @click.option("--name", required=True, help="Volume name") @click.option("--source", required=True, help="Source directory path") -@_print_errors +@_handle_jig_errors def volumes_create(ctx: click.Context, name: str, source: str) -> None: """Create a volume and upload files""" client: JigResource = ctx.obj.beta.jig @@ -609,7 +607,7 @@ def volumes_create(ctx: click.Context, name: str, source: str) -> None: @click.pass_context @click.option("--name", required=True, help="Volume name") @click.option("--source", required=True, help="New source directory path") -@_print_errors +@_handle_jig_errors def volumes_update(ctx: click.Context, name: str, source: str) -> None: """Update a volume and re-upload files""" client: JigResource = ctx.obj.beta.jig @@ -619,7 +617,7 @@ def volumes_update(ctx: click.Context, name: str, source: str) -> None: @volumes.command("delete") @click.pass_context @click.option("--name", required=True, help="Volume name") -@_print_errors +@_handle_jig_errors def volumes_delete(ctx: click.Context, name: str) -> None: """Delete a volume""" client: JigResource = ctx.obj.beta.jig @@ -636,11 +634,8 @@ def volumes_delete(ctx: click.Context, name: str) -> None: @volumes.command("describe") @click.pass_context @click.option("--name", required=True, help="Volume name") -@_print_errors -def volumes_describe( - ctx: click.Context, - name: str, -) -> None: +@_handle_jig_errors +def volumes_describe(ctx: click.Context, name: str) -> None: """Describe a volume""" client: JigResource = ctx.obj.beta.jig @@ -655,7 +650,7 @@ def volumes_describe( @volumes.command("list") @click.pass_context -@_print_errors +@_handle_jig_errors def volumes_list(ctx: click.Context) -> None: """List all volumes""" client: JigResource = ctx.obj.beta.jig @@ -1323,8 +1318,7 @@ def init() -> None: @click.command() -@_pass_jig -@_print_errors +@_jig_command def dockerfile(jig: Jig) -> None: """Generate Dockerfile""" if _dockerfile(jig.config): @@ -1335,8 +1329,7 @@ def dockerfile(jig: Jig) -> None: @click.command() -@_pass_jig -@_print_errors +@_jig_command @click.option("--tag", default="latest", help="Image tag") @click.option("--warmup", is_flag=True, help="Run warmup to build torch compile cache") @click.option("--docker-args", default=None, help="Extra args for docker build (or use DOCKER_BUILD_EXTRA_ARGS env)") @@ -1346,8 +1339,7 @@ def build(jig: Jig, tag: str, warmup: bool, docker_args: str | None) -> None: @click.command() -@_pass_jig -@_print_errors +@_jig_command @click.option("--tag", default="latest", help="Image tag") def push(jig: Jig, tag: str) -> None: """Push image to registry""" @@ -1355,8 +1347,7 @@ def push(jig: Jig, tag: str) -> None: @click.command() -@_pass_jig -@_print_errors +@_jig_command @click.option("--tag", default="latest", help="Image tag") @click.option("--build-only", is_flag=True, help="Build and push only") @click.option("--warmup", is_flag=True, help="Run warmup to build torch compile cache") @@ -1377,8 +1368,7 @@ def deploy( @click.command() -@_pass_jig -@_print_errors +@_jig_command @click.option("--json", "json_output", is_flag=True, help="Output raw JSON") def status(jig: Jig, json_output: bool = False) -> None: """Get deployment status""" @@ -1390,16 +1380,14 @@ def status(jig: Jig, json_output: bool = False) -> None: @click.command() -@_pass_jig -@_print_errors +@_jig_command def endpoint(jig: Jig) -> None: """Get deployment endpoint URL""" click.echo(f"{_get_api_base_url(jig.together)}/v1/deployment-request/{jig.config.model_name}") @click.command() -@_pass_jig -@_print_errors +@_jig_command @click.option("--follow", is_flag=True, help="Follow log output") def logs(jig: Jig, follow: bool) -> None: """Get deployment logs""" @@ -1407,8 +1395,7 @@ def logs(jig: Jig, follow: bool) -> None: @click.command() -@_pass_jig -@_print_errors +@_jig_command def destroy(jig: Jig) -> None: """Destroy deployment""" jig.api.destroy(jig.config.model_name) @@ -1416,8 +1403,7 @@ def destroy(jig: Jig) -> None: @click.command() -@_pass_jig -@_print_errors +@_jig_command @click.option("--prompt", default=None, help="Job prompt") @click.option("--payload", default=None, help="Job payload JSON") @click.option("--watch", is_flag=True, help="Watch job status until completion") @@ -1427,8 +1413,7 @@ def submit(jig: Jig, prompt: str | None, payload: str | None, watch: bool) -> No @click.command() -@_pass_jig -@_print_errors +@_jig_command @click.option("--request-id", required=True, help="Job request ID") def job_status(jig: Jig, request_id: str) -> None: """Get status of a specific job""" @@ -1437,8 +1422,7 @@ def job_status(jig: Jig, request_id: str) -> None: @click.command() -@_pass_jig -@_print_errors +@_jig_command def queue_status(jig: Jig) -> None: """Get queue metrics for the deployment""" response = jig.api.queue.with_raw_response.metrics(model=jig.config.model_name) @@ -1446,7 +1430,7 @@ def queue_status(jig: Jig) -> None: @click.command("list") -@_print_errors +@_handle_jig_errors @click.pass_context def list_deployments(ctx: click.Context) -> None: """List all deployments""" From b6eb5e0e4d9ebf204cde16b0a001f2cf916780bf Mon Sep 17 00:00:00 2001 From: technillogue Date: Thu, 19 Feb 2026 00:42:15 -0500 Subject: [PATCH 43/51] remove explicit ctx hints (16051 tokens) --- src/together/lib/cli/api/beta/jig/jig.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 6a20db19..27311fd4 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -541,9 +541,7 @@ async def _create_volume(client: JigResource, name: str, source: str) -> None: click.echo(f"\N{ROCKET} Creating volume '{name}' with source prefix '{source_prefix}'") try: volume_response = client.volumes.create( - name=name, - type="readOnly", - content={"type": "files", "source_prefix": source_prefix}, + name=name, type="readOnly", content={"type": "files", "source_prefix": source_prefix} ) click.echo(f"\N{CHECK MARK} Volume created: {volume_response.id}") except Exception as e: @@ -599,8 +597,7 @@ def volumes(ctx: click.Context) -> None: @_handle_jig_errors def volumes_create(ctx: click.Context, name: str, source: str) -> None: """Create a volume and upload files""" - client: JigResource = ctx.obj.beta.jig - asyncio.run(_create_volume(client, name, source)) + asyncio.run(_create_volume(ctx.obj.beta.jig, name, source)) @volumes.command("update") @@ -610,8 +607,7 @@ def volumes_create(ctx: click.Context, name: str, source: str) -> None: @_handle_jig_errors def volumes_update(ctx: click.Context, name: str, source: str) -> None: """Update a volume and re-upload files""" - client: JigResource = ctx.obj.beta.jig - asyncio.run(_update_volume(client, name, source)) + asyncio.run(_update_volume(ctx.obj.beta.jig, name, source)) @volumes.command("delete") @@ -620,10 +616,8 @@ def volumes_update(ctx: click.Context, name: str, source: str) -> None: @_handle_jig_errors def volumes_delete(ctx: click.Context, name: str) -> None: """Delete a volume""" - client: JigResource = ctx.obj.beta.jig - try: - client.volumes.delete(name) + ctx.obj.beta.jig.volumes.delete(name) click.echo(f"\N{CHECK MARK} Deleted volume '{name}'") except APIStatusError as e: if e.status_code != 404: @@ -637,10 +631,8 @@ def volumes_delete(ctx: click.Context, name: str) -> None: @_handle_jig_errors def volumes_describe(ctx: click.Context, name: str) -> None: """Describe a volume""" - client: JigResource = ctx.obj.beta.jig - try: - response = client.volumes.with_raw_response.retrieve(name) + response = ctx.obj.beta.jig.volumes.with_raw_response.retrieve(name) click.echo(json.dumps(response.json(), indent=2)) except APIStatusError as e: if e.status_code != 404: @@ -653,8 +645,7 @@ def volumes_describe(ctx: click.Context, name: str) -> None: @_handle_jig_errors def volumes_list(ctx: click.Context) -> None: """List all volumes""" - client: JigResource = ctx.obj.beta.jig - response = client.volumes.with_raw_response.list() + response = ctx.obj.beta.jig.volumes.with_raw_response.list() click.echo(json.dumps(response.json(), indent=2)) From 3c373d9e474f3b7ce68ac9efda5d850a6f794a8f Mon Sep 17 00:00:00 2001 From: technillogue Date: Thu, 19 Feb 2026 00:47:51 -0500 Subject: [PATCH 44/51] factor out repeated options --- src/together/lib/cli/api/beta/jig/jig.py | 28 ++++++++++++++---------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 27311fd4..bfd64fb3 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -589,10 +589,12 @@ def volumes(ctx: click.Context) -> None: """Manage volumes""" pass +volume_name_option = click.option("--name", required=True, help="Volume name") + @volumes.command("create") @click.pass_context -@click.option("--name", required=True, help="Volume name") +@volume_name_option @click.option("--source", required=True, help="Source directory path") @_handle_jig_errors def volumes_create(ctx: click.Context, name: str, source: str) -> None: @@ -602,7 +604,7 @@ def volumes_create(ctx: click.Context, name: str, source: str) -> None: @volumes.command("update") @click.pass_context -@click.option("--name", required=True, help="Volume name") +@volume_name_option @click.option("--source", required=True, help="New source directory path") @_handle_jig_errors def volumes_update(ctx: click.Context, name: str, source: str) -> None: @@ -612,7 +614,7 @@ def volumes_update(ctx: click.Context, name: str, source: str) -> None: @volumes.command("delete") @click.pass_context -@click.option("--name", required=True, help="Volume name") +@volume_name_option @_handle_jig_errors def volumes_delete(ctx: click.Context, name: str) -> None: """Delete a volume""" @@ -627,7 +629,7 @@ def volumes_delete(ctx: click.Context, name: str) -> None: @volumes.command("describe") @click.pass_context -@click.option("--name", required=True, help="Volume name") +@volume_name_option @_handle_jig_errors def volumes_describe(ctx: click.Context, name: str) -> None: """Describe a volume""" @@ -1318,12 +1320,16 @@ def dockerfile(jig: Jig) -> None: msg = f"ERROR: {jig.config.dockerfile} exists and is not managed by jig. Remove or rename the file to allow jig to manage dockerfile." click.echo(msg, err=True) +tag_option = click.option("--tag", default="latest", help="Image tag") +warmup_option = click.option("--warmup", is_flag=True, help="Run warmup to build torch compile cache") +docker_args_option = click.option("--docker-args", default=None, help="Extra args for docker build (or use DOCKER_BUILD_EXTRA_ARGS env)") +build_options = lambda @click.command() @_jig_command -@click.option("--tag", default="latest", help="Image tag") -@click.option("--warmup", is_flag=True, help="Run warmup to build torch compile cache") -@click.option("--docker-args", default=None, help="Extra args for docker build (or use DOCKER_BUILD_EXTRA_ARGS env)") +@tag_option +@warmup_option +@docker_args_option def build(jig: Jig, tag: str, warmup: bool, docker_args: str | None) -> None: """Build container image""" jig.build(tag, warmup, docker_args) @@ -1331,7 +1337,7 @@ def build(jig: Jig, tag: str, warmup: bool, docker_args: str | None) -> None: @click.command() @_jig_command -@click.option("--tag", default="latest", help="Image tag") +@tag_option def push(jig: Jig, tag: str) -> None: """Push image to registry""" jig.push(tag) @@ -1339,10 +1345,10 @@ def push(jig: Jig, tag: str) -> None: @click.command() @_jig_command -@click.option("--tag", default="latest", help="Image tag") +@tag_option @click.option("--build-only", is_flag=True, help="Build and push only") -@click.option("--warmup", is_flag=True, help="Run warmup to build torch compile cache") -@click.option("--docker-args", default=None, help="Extra args for docker build (or use DOCKER_BUILD_EXTRA_ARGS env)") +@warmup_option +@docker_args_option @click.option("--image", "existing_image", default=None, help="Use existing image (skip build/push)") @click.option("--detach", "detach", is_flag=True, help="Do not wait for deployment to complete") def deploy( From 05811c0db40690135f5374c91ab3a97b266c1b4d Mon Sep 17 00:00:00 2001 From: technillogue Date: Thu, 19 Feb 2026 00:52:42 -0500 Subject: [PATCH 45/51] import echo and Context from click (15794 tokens) --- src/together/lib/cli/api/beta/jig/jig.py | 209 ++++++++++++----------- 1 file changed, 107 insertions(+), 102 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index bfd64fb3..256b583e 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -23,6 +23,7 @@ from urllib.parse import urlparse import click +from click import Context, echo from click.exceptions import Exit from together import Together @@ -206,7 +207,7 @@ def load(cls, data: dict[str, Any], path: Path) -> Config: else: name = path.resolve().parent.name tip = "rename your folder or add `project.name` to your pyproject.toml" - click.echo(f"\N{PACKAGE} Name not set in {path} - defaulting to {name}") + echo(f"\N{PACKAGE} Name not set in {path} - defaulting to {name}") else: jig_config = data if name := jig_config.get("name"): @@ -214,7 +215,7 @@ def load(cls, data: dict[str, Any], path: Path) -> Config: else: name = path.resolve().parent.name tip = f"rename your folder or add `name` to {path}" - click.echo(f"\N{PACKAGE} Name not set in {path} - defaulting to {name}") + echo(f"\N{PACKAGE} Name not set in {path} - defaulting to {name}") if autoscaling := jig_config.get("autoscaling", {}): autoscaling["model"] = name @@ -400,13 +401,13 @@ def _set_secret(jig: Jig, name: str, value: str, description: str) -> None: try: jig.api.secrets.retrieve(scoped_name) jig.api.secrets.update(id=scoped_name, name=scoped_name, description=description, value=value) - click.echo(f"\N{CHECK MARK} Updated secret: '{name}'") + echo(f"\N{CHECK MARK} Updated secret: '{name}'") except APIStatusError as e: if e.status_code != 404: raise - click.echo("\N{ROCKET} Creating new secret") + echo("\N{ROCKET} Creating new secret") jig.api.secrets.create(name=scoped_name, value=value, description=description) - click.echo(f"\N{CHECK MARK} Created secret: {name}") + echo(f"\N{CHECK MARK} Created secret: {name}") jig.state.secrets[name] = scoped_name jig.state.save() @@ -418,8 +419,8 @@ class JigError(Exception): def jig_fail(msg: str) -> None: prefix = click.style("Jig: ", fg="blue") - click.echo(prefix + click.style("Failed", fg="red"), err=True) - click.echo(prefix + click.style(msg, fg="red"), err=True) + echo(prefix + click.style("Failed", fg="red"), err=True) + echo(prefix + click.style(msg, fg="red"), err=True) def _handle_jig_errors(f: Callable[..., Any]) -> Any: @@ -450,7 +451,7 @@ def _jig_command(f: Callable[..., Any]) -> Any: @click.pass_context @click.option("-c", "--config", "config_path", default=None, help="Configuration file path") @wraps(f) - def wrapper(ctx: click.Context, config_path: str | None, *args: Any, **kwargs: Any) -> None: + def wrapper(ctx: Context, config_path: str | None, *args: Any, **kwargs: Any) -> None: f(Jig(ctx.obj, config_path), *args, **kwargs) return wrapper @@ -458,7 +459,7 @@ def wrapper(ctx: click.Context, config_path: str | None, *args: Any, **kwargs: A @click.group() @click.pass_context -def secrets(ctx: click.Context) -> None: +def secrets(ctx: Context) -> None: """Manage deployment secrets""" pass @@ -481,9 +482,9 @@ def secrets_unset(jig: Jig, name: str) -> None: try: del jig.state.secrets[name] jig.state.save() - click.echo(f"\N{CHECK MARK} Deleted secret '{name}' from local state") + echo(f"\N{CHECK MARK} Deleted secret '{name}' from local state") except KeyError: - click.echo(f"\N{CROSS MARK} Secret '{name}' is not set") + echo(f"\N{CROSS MARK} Secret '{name}' is not set") @secrets.command("list") @@ -501,11 +502,11 @@ def secrets_list(jig: Jig) -> None: remote_secrets.add(name.removeprefix(prefix)) if not local_secrets and not remote_secrets: - click.echo(f"\N{INFORMATION SOURCE} No secrets configured for deployment '{jig.config.model_name}'") + echo(f"\N{INFORMATION SOURCE} No secrets configured for deployment '{jig.config.model_name}'") return - click.echo(f"\N{INFORMATION SOURCE} Secrets for deployment '{jig.config.model_name}':") - click.echo() + echo(f"\N{INFORMATION SOURCE} Secrets for deployment '{jig.config.model_name}':") + echo() for name in sorted(local_secrets | remote_secrets): in_local = name in local_secrets @@ -518,7 +519,7 @@ def secrets_list(jig: Jig) -> None: else: status = click.style("remote only", fg="yellow") - click.echo(f" - {name} [{status}]") + echo(f" - {name} [{status}]") # == Volumes == @@ -538,24 +539,24 @@ async def _create_volume(client: JigResource, name: str, source: str) -> None: _validate_source(source_path) source_prefix = f"{name}/{source_path.name}" - click.echo(f"\N{ROCKET} Creating volume '{name}' with source prefix '{source_prefix}'") + echo(f"\N{ROCKET} Creating volume '{name}' with source prefix '{source_prefix}'") try: volume_response = client.volumes.create( name=name, type="readOnly", content={"type": "files", "source_prefix": source_prefix} ) - click.echo(f"\N{CHECK MARK} Volume created: {volume_response.id}") + echo(f"\N{CHECK MARK} Volume created: {volume_response.id}") except Exception as e: raise JigError(f"Failed to create volume: {e}") from e try: await Uploader(client._client).upload_files(source_path, volume_name=name) except Exception as e: - click.echo(f"\N{CROSS MARK} Upload failed: {e}") - click.echo(f"\N{WASTEBASKET} Cleaning up volume '{name}'") + echo(f"\N{CROSS MARK} Upload failed: {e}") + echo(f"\N{WASTEBASKET} Cleaning up volume '{name}'") try: client.volumes.delete(name) except Exception as cleanup_error: - click.echo(f"\N{WARNING SIGN} Failed to delete volume: {cleanup_error}") + echo(f"\N{WARNING SIGN} Failed to delete volume: {cleanup_error}") raise Exit(1) from None @@ -572,12 +573,12 @@ async def _update_volume(client: JigResource, name: str, source: str) -> None: source_prefix = f"{name}/{source_path.name}" - click.echo(f"\N{INFORMATION SOURCE} Uploading files for volume '{name}'") + echo(f"\N{INFORMATION SOURCE} Uploading files for volume '{name}'") await Uploader(client._client).upload_files(source_path, volume_name=name) - click.echo(f"\N{INFORMATION SOURCE} Updating volume '{name}' with source prefix '{source_prefix}'") + echo(f"\N{INFORMATION SOURCE} Updating volume '{name}' with source prefix '{source_prefix}'") client.volumes.update(name, content={"type": "files", "source_prefix": source_prefix}) - click.echo("\N{CHECK MARK} Volume updated successfully") + echo("\N{CHECK MARK} Volume updated successfully") # --- Volumes CLI Commands --- @@ -585,10 +586,11 @@ async def _update_volume(client: JigResource, name: str, source: str) -> None: @click.group() @click.pass_context -def volumes(ctx: click.Context) -> None: +def volumes(ctx: Context) -> None: """Manage volumes""" pass + volume_name_option = click.option("--name", required=True, help="Volume name") @@ -597,7 +599,7 @@ def volumes(ctx: click.Context) -> None: @volume_name_option @click.option("--source", required=True, help="Source directory path") @_handle_jig_errors -def volumes_create(ctx: click.Context, name: str, source: str) -> None: +def volumes_create(ctx: Context, name: str, source: str) -> None: """Create a volume and upload files""" asyncio.run(_create_volume(ctx.obj.beta.jig, name, source)) @@ -607,7 +609,7 @@ def volumes_create(ctx: click.Context, name: str, source: str) -> None: @volume_name_option @click.option("--source", required=True, help="New source directory path") @_handle_jig_errors -def volumes_update(ctx: click.Context, name: str, source: str) -> None: +def volumes_update(ctx: Context, name: str, source: str) -> None: """Update a volume and re-upload files""" asyncio.run(_update_volume(ctx.obj.beta.jig, name, source)) @@ -616,39 +618,39 @@ def volumes_update(ctx: click.Context, name: str, source: str) -> None: @click.pass_context @volume_name_option @_handle_jig_errors -def volumes_delete(ctx: click.Context, name: str) -> None: +def volumes_delete(ctx: Context, name: str) -> None: """Delete a volume""" try: ctx.obj.beta.jig.volumes.delete(name) - click.echo(f"\N{CHECK MARK} Deleted volume '{name}'") + echo(f"\N{CHECK MARK} Deleted volume '{name}'") except APIStatusError as e: if e.status_code != 404: raise - click.echo(f"\N{CROSS MARK} Volume '{name}' not found") + echo(f"\N{CROSS MARK} Volume '{name}' not found") @volumes.command("describe") @click.pass_context @volume_name_option @_handle_jig_errors -def volumes_describe(ctx: click.Context, name: str) -> None: +def volumes_describe(ctx: Context, name: str) -> None: """Describe a volume""" try: response = ctx.obj.beta.jig.volumes.with_raw_response.retrieve(name) - click.echo(json.dumps(response.json(), indent=2)) + echo(json.dumps(response.json(), indent=2)) except APIStatusError as e: if e.status_code != 404: raise - click.echo(f"\N{CROSS MARK} Volume '{name}' not found") + echo(f"\N{CROSS MARK} Volume '{name}' not found") @volumes.command("list") @click.pass_context @_handle_jig_errors -def volumes_list(ctx: click.Context) -> None: +def volumes_list(ctx: Context) -> None: """List all volumes""" response = ctx.obj.beta.jig.volumes.with_raw_response.list() - click.echo(json.dumps(response.json(), indent=2)) + echo(json.dumps(response.json(), indent=2)) # == Main CLI == @@ -791,7 +793,7 @@ def _build_warm_image(base_image: str) -> None: pass cache_dir.mkdir(exist_ok=True) - click.echo("\N{FIRE} Running warmup to generate compile cache...") + echo("\N{FIRE} Running warmup to generate compile cache...") # Run container with GPU and RUN_AND_EXIT=1 # Mount current dir as /app so warmup_inputs can reference local weights @@ -805,7 +807,7 @@ def _build_warm_image(base_image: str) -> None: cmd.extend(["-e", f"MODEL_PRELOAD_PATH={weights_path}"]) cmd.append(base_image) - click.echo(f"Running: {' '.join(cmd)}") + echo(f"Running: {' '.join(cmd)}") result = subprocess.run(cmd) if result.returncode != 0: raise JigError(f"Warmup failed with code {result.returncode}") @@ -815,19 +817,19 @@ def _build_warm_image(base_image: str) -> None: if not cache_files: raise JigError("Warmup completed but no cache files were generated") - click.echo(f"\N{CHECK MARK} Warmup complete, {len(cache_files)} cache files generated") + echo(f"\N{CHECK MARK} Warmup complete, {len(cache_files)} cache files generated") # Generate cache dockerfile - copy cache to same location used during warmup final_dockerfile = f"""FROM {base_image} COPY {cache_dir.name} /app/{WARMUP_DEST} ENV {WARMUP_ENV_NAME}=/app/{WARMUP_DEST}""" - click.echo("\N{PACKAGE} Building final image with cache...") + echo("\N{PACKAGE} Building final image with cache...") final_cmd = ["docker", "build", "--platform", "linux/amd64", "-t", base_image, "-f", "-", "."] if _run(final_cmd, input=final_dockerfile).returncode != 0: raise JigError("Cache image build failed") - click.echo("\N{CHECK MARK} Final image with cache built") + echo("\N{CHECK MARK} Final image with cache built") def _get_current_revision_id(d: Deployment) -> str: @@ -840,22 +842,22 @@ def _get_current_revision_id(d: Deployment) -> str: def _print_replica_failure(event: ReplicaEvents) -> None: if event.replica_status_reason: - click.echo(f" Reason: {event.replica_status_reason}") + echo(f" Reason: {event.replica_status_reason}") if event.replica_status_message: - click.echo(f" Message: {event.replica_status_message}") + echo(f" Message: {event.replica_status_message}") def _fetch_and_print_logs(client: JigResource, deployment_name: str, replica_id: str) -> None: - click.echo(f"\n--- Logs for {replica_id} ---") + echo(f"\n--- Logs for {replica_id} ---") try: if lines := client.retrieve_logs(deployment_name, replica_id=replica_id).lines: for line in lines: - click.echo(line) + echo(line) else: - click.echo("No logs available") + echo("No logs available") except Exception as e: - click.echo(f"Failed to fetch logs: {e}") - click.echo("--- End of logs ---\n") + echo(f"Failed to fetch logs: {e}") + echo("--- End of logs ---\n") class ReplicaTrackingResult(str, Enum): @@ -890,7 +892,7 @@ def track_deployment_progress(self) -> None: """ start_time = time.time() - click.echo("\N{HOURGLASS WITH FLOWING SAND} Deployment in-progress...") + echo("\N{HOURGLASS WITH FLOWING SAND} Deployment in-progress...") try: while time.time() - start_time < self.timeout: @@ -899,7 +901,7 @@ def track_deployment_progress(self) -> None: # Handle scale to zero - no replicas expected if deployment.min_replicas == 0 and deployment.desired_replicas == 0: if str(deployment.status) == "ScaledToZero": - click.echo("\N{CHECK MARK} Deployment scaled to zero replicas") + echo("\N{CHECK MARK} Deployment scaled to zero replicas") return # Not yet scaled to zero, wait and retry time.sleep(self.poll_interval) @@ -931,15 +933,15 @@ def track_deployment_progress(self) -> None: time.sleep(self.poll_interval) # Timeout reached - click.echo("\N{CROSS MARK} Deployment tracking timed out after 10 minutes") - click.echo(f"Deployment '{self.deployment_name}' may still be in progress.") - click.echo("Run 'jig status' to check current state.") + echo("\N{CROSS MARK} Deployment tracking timed out after 10 minutes") + echo(f"Deployment '{self.deployment_name}' may still be in progress.") + echo("Run 'jig status' to check current state.") raise Exit(1) except KeyboardInterrupt: - click.echo("\n\N{WARNING SIGN} Deployment tracking interrupted") - click.echo(f"Deployment '{self.deployment_name}' may still be in progress.") - click.echo("Run 'jig status' to check current state.") + echo("\n\N{WARNING SIGN} Deployment tracking interrupted") + echo(f"Deployment '{self.deployment_name}' may still be in progress.") + echo("Run 'jig status' to check current state.") raise Exit(130) from None def process_replica_event(self, replica_id: str, event: ReplicaEvents) -> ReplicaTrackingResult: @@ -950,10 +952,10 @@ def process_replica_event(self, replica_id: str, event: ReplicaEvents) -> Replic # Track volume preload progress if event.volume_preload_status: if "volume_preload_started" not in states: - click.echo(f"\N{PACKAGE} [{replica_id}] Preloading volume contents...") + echo(f"\N{PACKAGE} [{replica_id}] Preloading volume contents...") states.add("volume_preload_started") elif volume_done and "volume_preload_completed" not in states: - click.echo( + echo( f"\N{CHECK MARK} [{replica_id}] Successfully preloaded volume contents. " "Attaching the volume to the container..." ) @@ -965,14 +967,14 @@ def process_replica_event(self, replica_id: str, event: ReplicaEvents) -> Replic # Check if ready - SUCCESS if event.replica_status == "Running" and event.replica_ready_since: - click.echo(f"\N{CHECK MARK} [{replica_id}] Container is running and ready") - click.echo("\N{ROCKET} Deployment successful!") - click.echo("Note: Additional replicas may still be scaling up.") + echo(f"\N{CHECK MARK} [{replica_id}] Container is running and ready") + echo("\N{ROCKET} Deployment successful!") + echo("Note: Additional replicas may still be scaling up.") return ReplicaTrackingResult.SUCCESS # Check for CrashLoopBackOff if event.replica_status_reason == "CrashLoopBackOff": - click.echo(f"\N{CROSS MARK} [{replica_id}] Container is crash looping") + echo(f"\N{CROSS MARK} [{replica_id}] Container is crash looping") _print_replica_failure(event) _fetch_and_print_logs(self.client, self.deployment_name, replica_id) return ReplicaTrackingResult.FAILURE @@ -981,13 +983,13 @@ def process_replica_event(self, replica_id: str, event: ReplicaEvents) -> Replic if event.replica_status == "Running" and volume_done: # replica_wait_start will default to time.time() if time.time() - self.replica_wait_start[replica_id] > self.ready_timeout: - click.echo( + echo( f"\N{CROSS MARK} [{replica_id}] Container is running but " f"not ready to serve requests after {self.ready_timeout} seconds" ) _print_replica_failure(event) _fetch_and_print_logs(self.client, self.deployment_name, replica_id) - click.echo(f"Deployment '{self.deployment_name}' may still be in progress.") + echo(f"Deployment '{self.deployment_name}' may still be in progress.") return ReplicaTrackingResult.FAILURE # Print status updates deduplicated by status + reason @@ -996,11 +998,11 @@ def process_replica_event(self, replica_id: str, event: ReplicaEvents) -> Replic status_key = f"{event.replica_status}_{event.replica_status_reason}" if status_key not in states: states.add(status_key) - click.echo( + echo( f"\N{HOURGLASS WITH FLOWING SAND} [{replica_id}] {event.replica_status}: {event.replica_status_reason}" ) if event.replica_status_message: - click.echo(f" {event.replica_status_message}") + echo(f" {event.replica_status_message}") return ReplicaTrackingResult.CONTINUE @@ -1074,11 +1076,11 @@ def build(self, tag: str = "latest", warmup: bool = False, docker_args: str | No image = self._image(tag) if _dockerfile(self.config): - click.echo("\N{CHECK MARK} Generated Dockerfile") + echo("\N{CHECK MARK} Generated Dockerfile") else: - click.echo(f"\N{INFORMATION SOURCE} Using existing {self.config.dockerfile} (not managed by jig)") + echo(f"\N{INFORMATION SOURCE} Using existing {self.config.dockerfile} (not managed by jig)") - click.echo(f"Building {image}") + echo(f"Building {image}") cmd = ["docker", "build", "--platform", "linux/amd64", "-t", image, "."] if self.config.dockerfile != "Dockerfile": cmd.extend(["-f", self.config.dockerfile]) @@ -1089,7 +1091,7 @@ def build(self, tag: str = "latest", warmup: bool = False, docker_args: str | No if subprocess.run(cmd).returncode != 0: raise JigError("Build failed") - click.echo("\N{CHECK MARK} Built") + echo("\N{CHECK MARK} Built") if warmup: _build_warm_image(image) @@ -1103,10 +1105,10 @@ def push(self, tag: str = "latest") -> None: if _run(login_cmd, input=self.together.api_key).returncode != 0: raise JigError("Registry login failed") - click.echo(f"Pushing {image}") + echo(f"Pushing {image}") if subprocess.run(["docker", "push", image]).returncode != 0: raise JigError("Push failed") - click.echo("\N{CHECK MARK} Pushed") + echo("\N{CHECK MARK} Pushed") def _build_deploy_data(self, image: str) -> dict[str, Any]: """Build the deployment API payload.""" @@ -1165,30 +1167,30 @@ def deploy( deployment_image = self._image_with_digest(tag) if build_only: - click.echo("\N{CHECK MARK} Build complete (--build-only)") + echo("\N{CHECK MARK} Build complete (--build-only)") return deploy_data = self._build_deploy_data(deployment_image) if DEBUG: - click.echo(json.dumps(deploy_data, indent=2)) - click.echo(f"Deploying model: {self.config.model_name}") + echo(json.dumps(deploy_data, indent=2)) + echo(f"Deploying model: {self.config.model_name}") try: existing = self.api.retrieve(self.config.model_name) old_revision_id = _get_current_revision_id(existing) was_scaled_to_zero = existing.ready_replicas == 0 response = self.api.update(self.config.model_name, **deploy_data) - click.echo("\N{CHECK MARK} Applied new deployment configuration") + echo("\N{CHECK MARK} Applied new deployment configuration") except APIStatusError as e: if e.status_code != 404: raise old_revision_id = "" was_scaled_to_zero = False - click.echo("\N{ROCKET} Creating new deployment") + echo("\N{ROCKET} Creating new deployment") try: response = self.api.deploy(**deploy_data) - click.echo(f"\N{CHECK MARK} Deployed: {self.config.model_name}") + echo(f"\N{CHECK MARK} Deployed: {self.config.model_name}") except APIStatusError as e: if _is_not_unique_error(e): raise JigError(f"Deployment name must be unique. Tip: {self.config._unique_name_tip}") from None @@ -1196,7 +1198,7 @@ def deploy( raise if detach: - click.echo(json.dumps(response.model_dump(), indent=2)) + echo(json.dumps(response.model_dump(), indent=2)) return new_revision_id = _get_current_revision_id(response) @@ -1212,9 +1214,9 @@ def logs(self, follow: bool = False) -> None: if not follow: if lines := self.api.retrieve_logs(self.config.model_name).lines: for line in lines: - click.echo(line) + echo(line) else: - click.echo("No logs available") + echo("No logs available") return try: @@ -1222,11 +1224,11 @@ def logs(self, follow: bool = False) -> None: for line in stream.iter_lines(): if line: for log_line in json.loads(line).get("lines", []): - click.echo(log_line) + echo(log_line) except KeyboardInterrupt: - click.echo("\nStopped following logs") + echo("\nStopped following logs") except Exception as e: - click.echo(f"\nConnection ended: {e}") + echo(f"\nConnection ended: {e}") def submit(self, prompt: str | None, payload: str | None, watch: bool) -> None: """Submit a job and optionally watch for completion.""" @@ -1242,13 +1244,13 @@ def submit(self, prompt: str | None, payload: str | None, watch: bool) -> None: # Raw response due to Stainless limitation with Pydantic aliases submit_response = QueueSubmitResponse.model_validate_json(raw_response.read()) - click.echo("\N{CHECK MARK} Submitted job") - click.echo(submit_response.model_dump_json(indent=2)) + echo("\N{CHECK MARK} Submitted job") + echo(submit_response.model_dump_json(indent=2)) if not watch or not submit_response.request_id: return - click.echo(f"\nWatching job {submit_response.request_id}...") + echo(f"\nWatching job {submit_response.request_id}...") last_status: str | None = None while True: try: @@ -1258,7 +1260,7 @@ def submit(self, prompt: str | None, payload: str | None, watch: bool) -> None: ) current_status = response.status if current_status != last_status: - click.echo(response.model_dump_json(indent=2)) + echo(response.model_dump_json(indent=2)) last_status = current_status if current_status in ["done", "failed", "finished", "error", "canceled"]: @@ -1269,7 +1271,7 @@ def submit(self, prompt: str | None, payload: str | None, watch: bool) -> None: time.sleep(1) except KeyboardInterrupt: - click.echo(f"\nStopped watching {submit_response.request_id}") + echo(f"\nStopped watching {submit_response.request_id}") raise Exit(130) from None @@ -1280,7 +1282,7 @@ def submit(self, prompt: str | None, payload: str | None, watch: bool) -> None: def init() -> None: """Initialize jig configuration""" if (pyproject := Path("pyproject.toml")).exists(): - click.echo("pyproject.toml already exists") + echo("pyproject.toml already exists") return content = """[project] @@ -1306,8 +1308,8 @@ def init() -> None: gpu_count = 1 """ pyproject.write_text(content) - click.echo("\N{CHECK MARK} Created pyproject.toml") - click.echo(" Edit the configuration and run 'jig deploy'") + echo("\N{CHECK MARK} Created pyproject.toml") + echo(" Edit the configuration and run 'jig deploy'") @click.command() @@ -1315,15 +1317,18 @@ def init() -> None: def dockerfile(jig: Jig) -> None: """Generate Dockerfile""" if _dockerfile(jig.config): - click.echo("\N{CHECK MARK} Generated Dockerfile") + echo("\N{CHECK MARK} Generated Dockerfile") else: msg = f"ERROR: {jig.config.dockerfile} exists and is not managed by jig. Remove or rename the file to allow jig to manage dockerfile." - click.echo(msg, err=True) + echo(msg, err=True) + tag_option = click.option("--tag", default="latest", help="Image tag") warmup_option = click.option("--warmup", is_flag=True, help="Run warmup to build torch compile cache") -docker_args_option = click.option("--docker-args", default=None, help="Extra args for docker build (or use DOCKER_BUILD_EXTRA_ARGS env)") -build_options = lambda +docker_args_option = click.option( + "--docker-args", default=None, help="Extra args for docker build (or use DOCKER_BUILD_EXTRA_ARGS env)" +) + @click.command() @_jig_command @@ -1371,16 +1376,16 @@ def status(jig: Jig, json_output: bool = False) -> None: """Get deployment status""" response = jig.api.retrieve(jig.config.model_name) if json_output: - click.echo(response.model_dump_json(indent=2)) + echo(response.model_dump_json(indent=2)) else: - click.echo(format_deployment_status(response)) + echo(format_deployment_status(response)) @click.command() @_jig_command def endpoint(jig: Jig) -> None: """Get deployment endpoint URL""" - click.echo(f"{_get_api_base_url(jig.together)}/v1/deployment-request/{jig.config.model_name}") + echo(f"{_get_api_base_url(jig.together)}/v1/deployment-request/{jig.config.model_name}") @click.command() @@ -1396,7 +1401,7 @@ def logs(jig: Jig, follow: bool) -> None: def destroy(jig: Jig) -> None: """Destroy deployment""" jig.api.destroy(jig.config.model_name) - click.echo(f"\N{WASTEBASKET} Destroyed {jig.config.model_name}") + echo(f"\N{WASTEBASKET} Destroyed {jig.config.model_name}") @click.command() @@ -1415,7 +1420,7 @@ def submit(jig: Jig, prompt: str | None, payload: str | None, watch: bool) -> No def job_status(jig: Jig, request_id: str) -> None: """Get status of a specific job""" response = jig.api.queue.retrieve(model=jig.config.model_name, request_id=request_id) - click.echo(response.model_dump_json(indent=2)) + echo(response.model_dump_json(indent=2)) @click.command() @@ -1423,13 +1428,13 @@ def job_status(jig: Jig, request_id: str) -> None: def queue_status(jig: Jig) -> None: """Get queue metrics for the deployment""" response = jig.api.queue.with_raw_response.metrics(model=jig.config.model_name) - click.echo(json.dumps(response.json(), indent=2)) + echo(json.dumps(response.json(), indent=2)) @click.command("list") @_handle_jig_errors @click.pass_context -def list_deployments(ctx: click.Context) -> None: +def list_deployments(ctx: Context) -> None: """List all deployments""" response = ctx.obj.beta.jig.with_raw_response.list() - click.echo(json.dumps(response.json(), indent=2)) + echo(json.dumps(response.json(), indent=2)) From 1113839ccb2319ad59bdd7b17aedf04f3fa85350 Mon Sep 17 00:00:00 2001 From: technillogue Date: Thu, 19 Feb 2026 01:13:27 -0500 Subject: [PATCH 46/51] strengthen error handling --- src/together/lib/cli/api/beta/jig/jig.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 256b583e..b205081c 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -431,7 +431,12 @@ def wrapper(*args: Any, **kwargs: Any) -> None: except (Exit, click.Abort, click.ClickException): raise except APIError as e: - msg = getattr(e.body, "message", str(e.body)) if e.body is not None else str(e) + body = e.body + if isinstance(body, dict): + err = body.get("error", body) + msg = err.get("message", str(err)) if isinstance(err, dict) else str(err) + else: + msg = e.message jig_fail(msg) raise Exit(1) from None except JigError as e: From f3f94652efe0e4175639dab2d3a12d599d799d0e Mon Sep 17 00:00:00 2001 From: technillogue Date: Thu, 19 Feb 2026 01:16:55 -0500 Subject: [PATCH 47/51] setdefault for volumes before autoscaling (15827 tokens) --- src/together/lib/cli/api/beta/jig/jig.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index b205081c..80f16e07 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -217,13 +217,13 @@ def load(cls, data: dict[str, Any], path: Path) -> Config: tip = f"rename your folder or add `name` to {path}" echo(f"\N{PACKAGE} Name not set in {path} - defaulting to {name}") + # Support volume_mounts at jig level (merge into deploy config) + jig_config.setdefault("deploy", {})["volume_mounts"] = jig_config.get("volume_mounts", []) + if autoscaling := jig_config.get("autoscaling", {}): autoscaling["model"] = name jig_config["deploy"]["autoscaling"] = autoscaling - # Support volume_mounts at jig level (merge into deploy config) - jig_config.setdefault("deploy", {})["volume_mounts"] = jig_config.get("volume_mounts", []) - return cls( image=ImageConfig.from_dict(jig_config.get("image", {})), deploy=DeployConfig.from_dict(jig_config.get("deploy", {})), From 1700425e9a287778007dd588c50fc10ed622aab3 Mon Sep 17 00:00:00 2001 From: technillogue Date: Thu, 19 Feb 2026 02:50:22 -0500 Subject: [PATCH 48/51] extract _api_err_msg (15855 tokens) --- src/together/lib/cli/api/beta/jig/jig.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 80f16e07..6f54047d 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -423,6 +423,13 @@ def jig_fail(msg: str) -> None: echo(prefix + click.style(msg, fg="red"), err=True) +def _api_err_msg(body: dict[str, Any] | None) -> str | None: + if not isinstance(body, dict): + return None + err = body.get("error", body) + return str(err) if isinstance(err, str) else str(err.get("message", err)) + + def _handle_jig_errors(f: Callable[..., Any]) -> Any: @wraps(f) def wrapper(*args: Any, **kwargs: Any) -> None: @@ -431,13 +438,7 @@ def wrapper(*args: Any, **kwargs: Any) -> None: except (Exit, click.Abort, click.ClickException): raise except APIError as e: - body = e.body - if isinstance(body, dict): - err = body.get("error", body) - msg = err.get("message", str(err)) if isinstance(err, dict) else str(err) - else: - msg = e.message - jig_fail(msg) + jig_fail(_api_err_msg(e.body) or e.message) # type: ignore raise Exit(1) from None except JigError as e: jig_fail(str(e)) From d4aeafd9bae81f6a31076fa12e73e56707964cd7 Mon Sep 17 00:00:00 2001 From: technillogue Date: Thu, 19 Feb 2026 02:56:27 -0500 Subject: [PATCH 49/51] jig.name = jig.config.model_name --- src/together/lib/cli/api/beta/jig/jig.py | 45 ++++++++++++------------ 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 6f54047d..06e61ae4 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -396,7 +396,7 @@ def format_deployment_status(d: Deployment) -> str: def _set_secret(jig: Jig, name: str, value: str, description: str) -> None: """Set secret for the deployment""" - scoped_name = f"{jig.config.model_name}-{name}" + scoped_name = f"{jig.name}-{name}" try: jig.api.secrets.retrieve(scoped_name) @@ -497,7 +497,7 @@ def secrets_unset(jig: Jig, name: str) -> None: @_jig_command def secrets_list(jig: Jig) -> None: """List all secrets with sync status""" - prefix = f"{jig.config.model_name}-" + prefix = f"{jig.name}-" local_secrets = set(jig.state.secrets.keys()) remote_secrets: set[str] = set() @@ -508,10 +508,10 @@ def secrets_list(jig: Jig) -> None: remote_secrets.add(name.removeprefix(prefix)) if not local_secrets and not remote_secrets: - echo(f"\N{INFORMATION SOURCE} No secrets configured for deployment '{jig.config.model_name}'") + echo(f"\N{INFORMATION SOURCE} No secrets configured for deployment '{jig.name}'") return - echo(f"\N{INFORMATION SOURCE} Secrets for deployment '{jig.config.model_name}':") + echo(f"\N{INFORMATION SOURCE} Secrets for deployment '{jig.name}':") echo() for name in sorted(local_secrets | remote_secrets): @@ -1043,7 +1043,8 @@ def __init__(self, client: Together, config_path: str | None = None) -> None: self.together = client self.api: JigResource = client.beta.jig self.config = Config.find(config_path) - self.state = State.load(self.config._path.parent, self.config.model_name) + self.name = self.config.model_name + self.state = State.load(self.config._path.parent, self.name) def _ensure_registry(self) -> None: """Ensure registry base path is set in state""" @@ -1057,7 +1058,7 @@ def _ensure_registry(self) -> None: self.state.save() def _image(self, tag: str = "latest") -> str: - return f"{self.state.registry_base_path}/{self.config.model_name}:{tag}" + return f"{self.state.registry_base_path}/{self.name}:{tag}" def _image_with_digest(self, tag: str = "latest") -> str: image_name = self._image(tag) @@ -1119,7 +1120,7 @@ def push(self, tag: str = "latest") -> None: def _build_deploy_data(self, image: str) -> dict[str, Any]: """Build the deployment API payload.""" deploy_data: dict[str, Any] = { - "name": self.config.model_name, + "name": self.name, "description": self.config.deploy.description, "image": image, "min_replicas": self.config.deploy.min_replicas, @@ -1180,13 +1181,13 @@ def deploy( if DEBUG: echo(json.dumps(deploy_data, indent=2)) - echo(f"Deploying model: {self.config.model_name}") + echo(f"Deploying model: {self.name}") try: - existing = self.api.retrieve(self.config.model_name) + existing = self.api.retrieve(self.name) old_revision_id = _get_current_revision_id(existing) was_scaled_to_zero = existing.ready_replicas == 0 - response = self.api.update(self.config.model_name, **deploy_data) + response = self.api.update(self.name, **deploy_data) echo("\N{CHECK MARK} Applied new deployment configuration") except APIStatusError as e: if e.status_code != 404: @@ -1196,7 +1197,7 @@ def deploy( echo("\N{ROCKET} Creating new deployment") try: response = self.api.deploy(**deploy_data) - echo(f"\N{CHECK MARK} Deployed: {self.config.model_name}") + echo(f"\N{CHECK MARK} Deployed: {self.name}") except APIStatusError as e: if _is_not_unique_error(e): raise JigError(f"Deployment name must be unique. Tip: {self.config._unique_name_tip}") from None @@ -1212,13 +1213,13 @@ def deploy( if old_revision_id and old_revision_id == new_revision_id and not scaling_up: return - Tracker(self.api, self.config.model_name).track_deployment_progress() + Tracker(self.api, self.name).track_deployment_progress() # == Query commands == def logs(self, follow: bool = False) -> None: if not follow: - if lines := self.api.retrieve_logs(self.config.model_name).lines: + if lines := self.api.retrieve_logs(self.name).lines: for line in lines: echo(line) else: @@ -1226,7 +1227,7 @@ def logs(self, follow: bool = False) -> None: return try: - with self.api.with_streaming_response.retrieve_logs(self.config.model_name) as stream: + with self.api.with_streaming_response.retrieve_logs(self.name) as stream: for line in stream.iter_lines(): if line: for log_line in json.loads(line).get("lines", []): @@ -1242,7 +1243,7 @@ def submit(self, prompt: str | None, payload: str | None, watch: bool) -> None: raise click.UsageError("Either --prompt or --payload required") raw_response = self.api.queue.with_raw_response.submit( - model=self.config.model_name, + model=self.name, payload=json.loads(payload) if payload else {"prompt": prompt}, priority=1, ) @@ -1261,7 +1262,7 @@ def submit(self, prompt: str | None, payload: str | None, watch: bool) -> None: while True: try: response = self.api.queue.retrieve( - model=self.config.model_name, + model=self.name, request_id=submit_response.request_id, ) current_status = response.status @@ -1380,7 +1381,7 @@ def deploy( @click.option("--json", "json_output", is_flag=True, help="Output raw JSON") def status(jig: Jig, json_output: bool = False) -> None: """Get deployment status""" - response = jig.api.retrieve(jig.config.model_name) + response = jig.api.retrieve(jig.name) if json_output: echo(response.model_dump_json(indent=2)) else: @@ -1391,7 +1392,7 @@ def status(jig: Jig, json_output: bool = False) -> None: @_jig_command def endpoint(jig: Jig) -> None: """Get deployment endpoint URL""" - echo(f"{_get_api_base_url(jig.together)}/v1/deployment-request/{jig.config.model_name}") + echo(f"{_get_api_base_url(jig.together)}/v1/deployment-request/{jig.name}") @click.command() @@ -1406,8 +1407,8 @@ def logs(jig: Jig, follow: bool) -> None: @_jig_command def destroy(jig: Jig) -> None: """Destroy deployment""" - jig.api.destroy(jig.config.model_name) - echo(f"\N{WASTEBASKET} Destroyed {jig.config.model_name}") + jig.api.destroy(jig.name) + echo(f"\N{WASTEBASKET} Destroyed {jig.name}") @click.command() @@ -1425,7 +1426,7 @@ def submit(jig: Jig, prompt: str | None, payload: str | None, watch: bool) -> No @click.option("--request-id", required=True, help="Job request ID") def job_status(jig: Jig, request_id: str) -> None: """Get status of a specific job""" - response = jig.api.queue.retrieve(model=jig.config.model_name, request_id=request_id) + response = jig.api.queue.retrieve(model=jig.name, request_id=request_id) echo(response.model_dump_json(indent=2)) @@ -1433,7 +1434,7 @@ def job_status(jig: Jig, request_id: str) -> None: @_jig_command def queue_status(jig: Jig) -> None: """Get queue metrics for the deployment""" - response = jig.api.queue.with_raw_response.metrics(model=jig.config.model_name) + response = jig.api.queue.with_raw_response.metrics(model=jig.name) echo(json.dumps(response.json(), indent=2)) From 17c6cfa2c80a05c8a3128e58be1158001fd58c69 Mon Sep 17 00:00:00 2001 From: technillogue Date: Thu, 19 Feb 2026 17:42:26 -0500 Subject: [PATCH 50/51] fix pyright being dumb about fields (https://github.com/microsoft/pyright/issues/10277) --- src/together/lib/cli/api/beta/jig/jig.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 06e61ae4..829335cf 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -60,11 +60,11 @@ class ImageConfig: """Container image configuration from pyproject.toml""" python_version: str = "3.11" - system_packages: list[str] = field(default_factory=list) - environment: dict[str, str] = field(default_factory=dict) - run: list[str] = field(default_factory=list) + system_packages: list[str] = field(default_factory=list[str]) + environment: dict[str, str] = field(default_factory=dict[str, str]) + run: list[str] = field(default_factory=list[str]) cmd: str = "python app.py" - copy: list[str] = field(default_factory=list) + copy: list[str] = field(default_factory=list[str]) auto_include_git: bool = False @classmethod @@ -100,12 +100,12 @@ class DeployConfig: min_replicas: int = 1 max_replicas: int = 1 port: int = 8000 - environment_variables: dict[str, str] = field(default_factory=dict) + environment_variables: dict[str, str] = field(default_factory=dict[str, str]) command: list[str] | None = None - autoscaling: dict[str, str] = field(default_factory=dict) + autoscaling: dict[str, str] = field(default_factory=dict[str, str]) health_check_path: str = "/health" termination_grace_period_seconds: int = 300 - volume_mounts: list[VolumeMount] = field(default_factory=list) + volume_mounts: list[VolumeMount] = field(default_factory=list[VolumeMount]) @classmethod def from_dict(cls, data: dict[str, Any]) -> DeployConfig: @@ -244,8 +244,8 @@ class State: _config_dir: Path _project_name: str registry_base_path: str = "" - secrets: dict[str, str] = field(default_factory=dict) - volumes: dict[str, str] = field(default_factory=dict) + secrets: dict[str, str] = field(default_factory=dict[str, str]) + volumes: dict[str, str] = field(default_factory=dict[str, str]) @classmethod def from_dict(cls, config_dir: Path, project_name: str, **data: Any) -> State: From d9c0906e81f5b3b0eea3a2e1b316ad55738a75b4 Mon Sep 17 00:00:00 2001 From: technillogue Date: Thu, 19 Feb 2026 17:42:39 -0500 Subject: [PATCH 51/51] simplify NotFoundError checks --- src/together/lib/cli/api/beta/jig/jig.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 829335cf..c638b04a 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -27,7 +27,7 @@ from click.exceptions import Exit from together import Together -from together._exceptions import APIError, APIStatusError +from together._exceptions import APIError, NotFoundError, APIStatusError from together.types.beta.deployment import Deployment, ReplicaEvents from together.resources.beta.jig.jig import JigResource from together.lib.cli.api.beta.jig._uploader import Uploader @@ -402,9 +402,7 @@ def _set_secret(jig: Jig, name: str, value: str, description: str) -> None: jig.api.secrets.retrieve(scoped_name) jig.api.secrets.update(id=scoped_name, name=scoped_name, description=description, value=value) echo(f"\N{CHECK MARK} Updated secret: '{name}'") - except APIStatusError as e: - if e.status_code != 404: - raise + except NotFoundError: echo("\N{ROCKET} Creating new secret") jig.api.secrets.create(name=scoped_name, value=value, description=description) echo(f"\N{CHECK MARK} Created secret: {name}") @@ -438,7 +436,7 @@ def wrapper(*args: Any, **kwargs: Any) -> None: except (Exit, click.Abort, click.ClickException): raise except APIError as e: - jig_fail(_api_err_msg(e.body) or e.message) # type: ignore + jig_fail(_api_err_msg(e.body) or e.message) # type: ignore raise Exit(1) from None except JigError as e: jig_fail(str(e)) @@ -572,10 +570,8 @@ async def _update_volume(client: JigResource, name: str, source: str) -> None: _validate_source(source_path) try: client.volumes.retrieve(name) - except APIStatusError as e: - if e.status_code == 404: - raise JigError(f"Volume '{name}' does not exist") from e - raise + except NotFoundError: + raise JigError(f"Volume '{name}' does not exist") from None source_prefix = f"{name}/{source_path.name}"