diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index db4bde2a..4c20f132 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,6 +30,9 @@ jobs: - name: Run lints run: ./scripts/lint + - name: Run formatting + run: uv run ruff format --check + build: if: github.event_name == 'push' || github.event.pull_request.head.repo.fork timeout-minutes: 10 diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 19588bdd..bfc26f9c 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "2.1.1" + ".": "2.2.0" } \ No newline at end of file diff --git a/.stats.yml b/.stats.yml index f28922fe..64b08e45 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 74 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-af83378ff78b22014ab7358ae8aa060cc25e4b818e798f2e09d6deb1226e0ba6.yml -openapi_spec_hash: 113f84b407b43bd991ee6d1afb6efb49 -config_hash: 67b76d1064bef2e591cadf50de08ad19 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-da5b9df3bfe0d31a76c91444c9eba060ad607d7d5a4e7483c5cc3fe2cac0f25e.yml +openapi_spec_hash: 7efd2ae2111f3a9bf190485828a13252 +config_hash: b66198d27b4d5c152688ff6cccfdeab5 diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e6ff35c..463657d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,36 @@ # Changelog +## 2.2.0 (2026-02-19) + +Full Changelog: [v2.1.1...v2.2.0](https://github.com/togethercomputer/together-py/compare/v2.1.1...v2.2.0) + +### Features + +* **cli:** Add json mode to `fine-tuning list --json` ([#269](https://github.com/togethercomputer/together-py/issues/269)) ([13d3551](https://github.com/togethercomputer/together-py/commit/13d35511bb039b3053ecf3e7a90c04e2e2d91237)) +* Improve file uploads and FT create flows with checksums ([#253](https://github.com/togethercomputer/together-py/issues/253)) ([3095b9a](https://github.com/togethercomputer/together-py/commit/3095b9af2ba564cefe6b64a7ee65450aacfbfa4c)) +* Update descriptions for endpoints ([70900c6](https://github.com/togethercomputer/together-py/commit/70900c6da2e8f60bfd0f70a5497cf41c18008ee5)) + + +### Bug Fixes + +* **cli:** fine-tuning create regression ([#270](https://github.com/togethercomputer/together-py/issues/270)) ([59d0c33](https://github.com/togethercomputer/together-py/commit/59d0c3399643c42e1c6ee9cf74c70aa99104218c)) + + +### Chores + +* Add documentation and changelog to project.urls ([#264](https://github.com/togethercomputer/together-py/issues/264)) ([7b9e574](https://github.com/togethercomputer/together-py/commit/7b9e5749e448042f548a0fbcd5db5ff5bfbb99d7)) +* Better jig deployment progress ([#242](https://github.com/togethercomputer/together-py/issues/242)) ([ba9c50a](https://github.com/togethercomputer/together-py/commit/ba9c50a8b9855ec95e871525a33932e46f470379)) +* **cli:** Improve messaging when attempting to cancel finetune that is not cancellable ([#268](https://github.com/togethercomputer/together-py/issues/268)) ([6502acc](https://github.com/togethercomputer/together-py/commit/6502acc911413abceff3870f620a2bed742e9b08)) +* configure new SDK language ([b312b50](https://github.com/togethercomputer/together-py/commit/b312b502fcff52aa3b877e03928ef6f5a34ed88a)) +* Fix various docstrings ([2e1bd13](https://github.com/togethercomputer/together-py/commit/2e1bd13a49a1ddeb717c072e3b4a4e4c1669f2de)) +* format all `api.md` files ([c16f892](https://github.com/togethercomputer/together-py/commit/c16f89205ebc2a371dfa468bfb9b3b1081e41a4f)) +* format files ([#266](https://github.com/togethercomputer/together-py/issues/266)) ([2a452df](https://github.com/togethercomputer/together-py/commit/2a452df565a93a32963c615a5be3eb23a2e6b713)) +* Refactor argument options with CLI file downloads ([#267](https://github.com/togethercomputer/together-py/issues/267)) ([642adbd](https://github.com/togethercomputer/together-py/commit/642adbda9f113bf815d63b90a9829367c4fac82e)) +* Remove broken field LineCount from FileResponse ([778a7d9](https://github.com/togethercomputer/together-py/commit/778a7d9e61f1f69feff51a5c908a1d2221e8133d)) +* Remove line_count field from files sdks/clis ([#265](https://github.com/togethercomputer/together-py/issues/265)) ([62c9da6](https://github.com/togethercomputer/together-py/commit/62c9da6efd0c8e8c5f686b45736b8765030e5e5f)) +* Revert adding mcp code. Code additions were unexpected. ([7a322f7](https://github.com/togethercomputer/together-py/commit/7a322f7f3388149418e3a576d93cb0017f5fdecd)) +* update mock server docs ([5bcfbdf](https://github.com/togethercomputer/together-py/commit/5bcfbdf4cd2ff84de834c8df0ecdccb18cac1e35)) + ## 2.1.1 (2026-02-12) Full Changelog: [v2.1.0...v2.1.1](https://github.com/togethercomputer/together-py/compare/v2.1.0...v2.1.1) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f72b4f31..6da2d0c8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -88,8 +88,7 @@ $ pip install ./path-to-wheel-file.whl Most tests require you to [set up a mock server](https://github.com/stoplightio/prism) against the OpenAPI spec to run the tests. ```sh -# you will need npm installed -$ npx prism mock path/to/your/openapi.yml +$ ./scripts/mock ``` ```sh diff --git a/api.md b/api.md index a4e075a4..3cd4cd0d 100644 --- a/api.md +++ b/api.md @@ -158,14 +158,7 @@ Methods: Types: ```python -from together.types import ( - FileList, - FileObject, - FilePurpose, - FileResponse, - FileType, - FileDeleteResponse, -) +from together.types import FileList, FilePurpose, FileResponse, FileType, FileDeleteResponse ``` Methods: diff --git a/pyproject.toml b/pyproject.toml index 0bff01d7..a8ffb9e2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "together" -version = "2.1.1" +version = "2.2.0" description = "The official Python library for the together API" dynamic = ["readme"] license = "Apache-2.0" @@ -49,6 +49,8 @@ classifiers = [ [project.urls] Homepage = "https://github.com/togethercomputer/together-py" Repository = "https://github.com/togethercomputer/together-py" +Documentation = "https://docs.together.ai/" +Changelog = "https://github.com/togethercomputer/together-py/blob/main/CHANGELOG.md" [project.optional-dependencies] aiohttp = ["aiohttp", "httpx_aiohttp>=0.1.9"] diff --git a/scripts/format b/scripts/format index 1d2f9c6e..c8e1f69d 100755 --- a/scripts/format +++ b/scripts/format @@ -11,4 +11,4 @@ uv run ruff check --fix . uv run ruff format echo "==> Formatting docs" -uv run python scripts/utils/ruffen-docs.py README.md api.md +uv run python scripts/utils/ruffen-docs.py README.md $(find . -type f -name api.md) diff --git a/src/together/_version.py b/src/together/_version.py index 3e1eed3a..636c0c9b 100644 --- a/src/together/_version.py +++ b/src/together/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "together" -__version__ = "2.1.1" # x-release-please-version +__version__ = "2.2.0" # x-release-please-version diff --git a/src/together/lib/cli/__init__.py b/src/together/lib/cli/__init__.py index dda83675..7140e050 100644 --- a/src/together/lib/cli/__init__.py +++ b/src/together/lib/cli/__init__.py @@ -10,6 +10,7 @@ import together from together._version import __version__ from together._constants import DEFAULT_TIMEOUT +from together._utils._logs import setup_logging from together.lib.cli.api.beta import beta from together.lib.cli.api.evals import evals from together.lib.cli.api.files import files @@ -58,7 +59,10 @@ def main( max_retries: int | None, ) -> None: """This is a sample CLI tool.""" - os.environ.setdefault("TOGETHER_LOG", "debug" if debug else "info") + if debug: + os.environ.setdefault("TOGETHER_LOG", "debug") + setup_logging() # Must run this again here to allow the new logging configuration to take effect + try: ctx.obj = together.Together( api_key=api_key, diff --git a/src/together/lib/cli/api/_utils.py b/src/together/lib/cli/api/_utils.py index e4195e00..9dc4f471 100644 --- a/src/together/lib/cli/api/_utils.py +++ b/src/together/lib/cli/api/_utils.py @@ -148,8 +148,9 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: try: return f(*args, **kwargs) # User aborted the command - except click.Abort: - sys.exit(0) + # Re-raise abort and usage errore so it displays a proper click message + except (click.Abort, click.UsageError) as e: + raise e except APIError as e: error_msg = "" if e.body is not None: diff --git a/src/together/lib/cli/api/beta/clusters/list_regions.py b/src/together/lib/cli/api/beta/clusters/list_regions.py index 3427ae89..8e669912 100644 --- a/src/together/lib/cli/api/beta/clusters/list_regions.py +++ b/src/together/lib/cli/api/beta/clusters/list_regions.py @@ -30,7 +30,9 @@ def list_regions(ctx: click.Context, json: bool) -> None: data.append( { "Name": region.name, - "Availability Zones": ", ".join(region.availability_zones) if region.availability_zones else "", + "Supported GPU Types": ", ".join(region.supported_instance_types) + if region.supported_instance_types + else "", "Driver Versions": ", ".join(region.driver_versions) if region.driver_versions else "", } ) diff --git a/src/together/lib/cli/api/beta/jig/_utils.py b/src/together/lib/cli/api/beta/jig/_utils.py new file mode 100644 index 00000000..60a13d61 --- /dev/null +++ b/src/together/lib/cli/api/beta/jig/_utils.py @@ -0,0 +1,107 @@ +"""Utility functions for jig CLI commands.""" + +from __future__ import annotations + +from datetime import datetime + +from together.types.beta.deployment import Deployment + + +def _format_timestamp(timestamp_str: str | None) -> str: + """Format ISO timestamp for display""" + if not timestamp_str: + return "-" + try: + ts = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00")) + return ts.strftime("%Y-%m-%d %H:%M:%S") + except (ValueError, TypeError): + return timestamp_str or "-" + + +def _image_tag(image: str | None) -> str: + if image is None: + return "unknown" + tag = image.rsplit(":", 1)[-1] if ":" in image else image + if "@sha256:" in image: + tag = f"sha256:{tag[:8]}" + + return tag + + +def format_deployment_status(d: Deployment) -> str: + """Format d status for CLI display""" + status = ( + "App:\n" + f" {'Name':<8}: {d.name} ┃ ID: {d.id}\n" + f" {'Image':<8}: {d.image}\n" + f" {'Status':<8}: {d.status}\n" + f" Created : {_format_timestamp(d.created_at)}" + f" ┃ Updated : {_format_timestamp(d.updated_at)}\n" + ) + + if d.autoscaling: + autoscaling_status = ( + f"\n Autoscaling: {d.autoscaling.get('metric', 'N/A')} {d.autoscaling.get('target', 'N/A')}(target)\n" + ) + status += autoscaling_status + + replica_status = ( + "\n" + f" Replicas:\n" + f" {'Min/Max':<16}: {d.min_replicas}/{d.max_replicas}\n" + f" {'Ready/Desired':<16}: {d.ready_replicas}/{d.desired_replicas}\n" + ) + + status += replica_status + + config_status = ( + f"\nConfiguration:\n" + f" Port: {d.port}\n" + f" Command: {d.command}\n" + f" Args: {d.args}\n" + f" Health Check Path: {d.health_check_path}\n" + f" Resources: {d.cpu} core CPU ┃ {d.memory}GB Memory ┃ {d.storage}GB Storage \n" + ) + + if d.gpu_count and d.gpu_type: + config_status += f" GPU: {d.gpu_count}x {d.gpu_type}\n" + + if d.volumes: + config_status += f"\n Volumes:\n {'NAME':<28} MOUNT_PATH\n" + for vol in d.volumes: + config_status += f" {vol.name:<28} {vol.mount_path}\n" + + if d.environment_variables: + secrets = [env for env in d.environment_variables if env.value_from_secret] + env_vars = [env for env in d.environment_variables if not env.value_from_secret] + + if secrets: + config_status += f"\n Secrets: {[secret.name for secret in secrets]}\n" + + if env_vars: + config_status += f"\n Environment Variables:\n {'NAME':<40} VALUE\n" + for env in env_vars: + config_status += f" {env.name:<40} {env.value}\n" + + status += config_status + + if d.replica_events: + events_status = "\nReplica Events:\n" + images = set(map(lambda x: x.image or "-", d.replica_events.values())) + for image in reversed(sorted(images)): + events = filter(lambda x: ((x[1].image or "-") == image), d.replica_events.items()) + events_status += f"{_image_tag(image)}:\n" + for replica_id, event in events: + events_status += f" {replica_id}: " + + if event.volume_preload_status and not event.volume_preload_completed_at: + events_status += f"Volume Preloading" + else: + events_status += f"{event.replica_status}" + if event.replica_status == "Running": + events_status += f", ready since {_format_timestamp(event.replica_ready_since)}" + events_status += "\n" + + status += events_status + + return status diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py index 1b64bfb2..f540ee35 100644 --- a/src/together/lib/cli/api/beta/jig/jig.py +++ b/src/together/lib/cli/api/beta/jig/jig.py @@ -7,6 +7,7 @@ import shlex import shutil import subprocess +from enum import Enum from typing import Any, Callable, Optional from pathlib import Path from dataclasses import asdict @@ -17,6 +18,8 @@ from together import Together from together._exceptions import APIStatusError from together.lib.cli.api._utils import handle_api_errors +from together.types.beta.deployment import Deployment +from together.lib.cli.api.beta.jig._utils import format_deployment_status from together.lib.cli.api.beta.jig._config import ( DEBUG, WARMUP_DEST, @@ -293,6 +296,209 @@ def _build_warm_image(base_image: str) -> None: click.echo("\N{CHECK MARK} Final image with cache built") +def _get_current_revision_id(deployment: Any) -> str: + """Extract current revision ID from deployment environment variables.""" + env_vars: list[Any] = deployment.environment_variables or [] + for env_var in env_vars: + if env_var.name == "TOGETHER_DEPLOYMENT_REVISION_ID": + return str(env_var.value) + return "" + + +def _print_replica_failure(event: Any) -> None: + """Print replica failure details.""" + if event.replica_status_reason: + click.echo(f" Reason: {event.replica_status_reason}") + if event.replica_status_message: + click.echo(f" Message: {event.replica_status_message}") + + +def _fetch_and_print_logs(client: Together, deployment_name: str, replica_id: str) -> None: + """Fetch and print logs for a specific replica.""" + click.echo(f"\n--- Logs for {replica_id} ---") + try: + response = client.beta.jig.retrieve_logs(deployment_name, replica_id=replica_id) + if hasattr(response, "lines") and response.lines: + for log_line in response.lines: + click.echo(log_line) + else: + click.echo("No logs available") + except Exception as e: + click.echo(f"Failed to fetch logs: {e}") + click.echo("--- End of logs ---\n") + + +def _is_volume_preload_done(event: Any) -> bool: + """Check if volume preload is complete or not applicable.""" + if not event.volume_preload_status: + return True # No volume preload + return bool(event.volume_preload_completed_at) + + +class ReplicaTrackingResult(Enum): + """Result of processing a single replica event.""" + + CONTINUE = "continue" + SUCCESS = "success" + FAILURE = "failure" + + +def _process_replica_event( + replica_id: str, + event: Any, + states: set[str], + replica_ready_wait_start: dict[str, float], + ready_timeout: float, + client: Together, + deployment_name: str, +) -> ReplicaTrackingResult: + """Process a single replica event and return the tracking result. + + Updates `states` and `replica_ready_wait_start` as side effects. + """ + volume_done = _is_volume_preload_done(event) + + # Track volume preload progress + if event.volume_preload_status: + if "volume_preload_started" not in states: + click.echo(f"\N{PACKAGE} [{replica_id}] Preloading volume contents...") + states.add("volume_preload_started") + elif volume_done and "volume_preload_completed" not in states: + click.echo( + f"\N{CHECK MARK} [{replica_id}] Successfully preloaded volume contents. " + "Attaching the volume to the container..." + ) + states.add("volume_preload_completed") + + # Skip terminated replicas + if event.replica_status == "Terminated": + return ReplicaTrackingResult.CONTINUE + + # Check if ready - SUCCESS + if event.replica_status == "Running" and event.replica_ready_since: + click.echo(f"\N{CHECK MARK} [{replica_id}] Container is running and ready") + click.echo("\N{ROCKET} Deployment successful!") + click.echo("Note: Additional replicas may still be scaling up.") + return ReplicaTrackingResult.SUCCESS + + # Check for CrashLoopBackOff + if event.replica_status_reason == "CrashLoopBackOff": + click.echo(f"\N{CROSS MARK} [{replica_id}] Container is crash looping") + _print_replica_failure(event) + _fetch_and_print_logs(client, deployment_name, replica_id) + return ReplicaTrackingResult.FAILURE + + # Check for stuck in Running state without becoming ready + if event.replica_status == "Running" and volume_done: + if replica_id not in replica_ready_wait_start: + replica_ready_wait_start[replica_id] = time.time() + + wait_duration = time.time() - replica_ready_wait_start[replica_id] + if wait_duration > ready_timeout: + click.echo( + f"\N{CROSS MARK} [{replica_id}] Container is running but " + f"not ready to serve requests after {ready_timeout} seconds" + ) + _print_replica_failure(event) + _fetch_and_print_logs(client, deployment_name, replica_id) + click.echo(f"Deployment '{deployment_name}' may still be in progress.") + return ReplicaTrackingResult.FAILURE + + # Print status updates deduplicated by status + reason + # Skip all status updates while volume preload is in progress + if volume_done and event.replica_status_reason: + status_key = f"{event.replica_status}_{event.replica_status_reason}" + if status_key not in states: + states.add(status_key) + click.echo( + f"\N{HOURGLASS WITH FLOWING SAND} [{replica_id}] {event.replica_status}: {event.replica_status_reason}" + ) + if event.replica_status_message: + click.echo(f" {event.replica_status_message}") + + return ReplicaTrackingResult.CONTINUE + + +def _track_deployment_progress(deployment_name: str, client: Together) -> Optional[dict[str, Any]]: + """Track deployment progress until ready or failed. + + Polls deployment status every 3 seconds until: + - Success: At least one replica with the latest revision has replica_ready_since set + - Failure: CrashLoopBackOff or Running without ready_since for > 2 minute + - Timeout: 10 minutes elapsed + """ + poll_interval = 3 # seconds + timeout = 600 # 10 minutes + ready_timeout = 120 # 2 minutes for Running without ready_since + + start_time = time.time() + printed_states: dict[str, set[str]] = {} # replica_id -> set of printed states + replica_ready_wait_start: dict[str, float] = {} # replica_id -> when we started waiting for ready + + click.echo("\N{HOURGLASS WITH FLOWING SAND} Deployment in-progress...") + + try: + while time.time() - start_time < timeout: + deployment = client.beta.jig.retrieve(deployment_name) + + # Handle scale to zero - no replicas expected + if deployment.min_replicas == 0 and deployment.desired_replicas == 0: + if str(deployment.status) == "ScaledToZero": + click.echo("\N{CHECK MARK} Deployment scaled to zero replicas") + return None + # Not yet scaled to zero, wait and retry + time.sleep(poll_interval) + continue + + current_revision_id = _get_current_revision_id(deployment) + + replica_events = deployment.replica_events or {} + + # Filter to replicas with matching revision + relevant_replicas = { + replica_id: event + for replica_id, event in replica_events.items() + if event.revision_id == current_revision_id + } + + if not relevant_replicas: + time.sleep(poll_interval) + continue + + for replica_id, event in relevant_replicas.items(): + if replica_id not in printed_states: + printed_states[replica_id] = set() + + result = _process_replica_event( + replica_id=replica_id, + event=event, + states=printed_states[replica_id], + replica_ready_wait_start=replica_ready_wait_start, + ready_timeout=ready_timeout, + client=client, + deployment_name=deployment_name, + ) + + if result == ReplicaTrackingResult.SUCCESS: + return None + if result == ReplicaTrackingResult.FAILURE: + raise SystemExit(1) + + time.sleep(poll_interval) + + # Timeout reached + click.echo("\N{CROSS MARK} Deployment tracking timed out after 10 minutes") + click.echo(f"Deployment '{deployment_name}' may still be in progress.") + click.echo("Run 'jig status' to check current state.") + raise SystemExit(1) + + except KeyboardInterrupt: + click.echo("\n\N{WARNING SIGN} Deployment tracking interrupted") + click.echo(f"Deployment '{deployment_name}' may still be in progress.") + click.echo("Run 'jig status' to check current state.") + raise SystemExit(130) from None + + # --- CLI Commands --- @@ -359,7 +565,11 @@ def dockerfile(config_path: str | None) -> None: @jig_command @click.option("--tag", default="latest", help="Image tag") @click.option("--warmup", is_flag=True, help="Run warmup to build torch compile cache") -@click.option("--docker-args", default=None, help="Extra args for docker build (or use DOCKER_BUILD_EXTRA_ARGS env)") +@click.option( + "--docker-args", + default=None, + help="Extra args for docker build (or use DOCKER_BUILD_EXTRA_ARGS env)", +) def build( ctx: click.Context, tag: str, @@ -427,13 +637,24 @@ def push(ctx: click.Context, tag: str, config_path: str | None) -> None: @click.option("--tag", default="latest", help="Image tag") @click.option("--build-only", is_flag=True, help="Build and push only") @click.option("--warmup", is_flag=True, help="Run warmup to build torch compile cache") -@click.option("--docker-args", default=None, help="Extra args for docker build (or use DOCKER_BUILD_EXTRA_ARGS env)") -@click.option("--image", "existing_image", default=None, help="Use existing image (skip build/push)") +@click.option( + "--docker-args", + default=None, + help="Extra args for docker build (or use DOCKER_BUILD_EXTRA_ARGS env)", +) +@click.option( + "--image", + "existing_image", + default=None, + help="Use existing image (skip build/push)", +) +@click.option("--detach", "detach", is_flag=True, help="Do not wait for deployment to complete") def deploy( ctx: click.Context, tag: str, build_only: bool, warmup: bool, + detach: bool, docker_args: str | None, existing_image: str | None, config_path: str | None, @@ -506,12 +727,12 @@ def deploy( click.echo(json.dumps(deploy_data, indent=2)) click.echo(f"Deploying model: {config.model_name}") - def handle_create() -> dict[str, Any]: + def handle_create() -> Deployment: click.echo("\N{ROCKET} Creating new deployment") try: response = client.beta.jig.deploy(**deploy_data) click.echo(f"\N{CHECK MARK} Deployed: {config.model_name}") - return response.model_dump() + return response except APIStatusError as e: # all errors: # "min replicas cannot be greater than max replicas" @@ -526,30 +747,52 @@ def handle_create() -> dict[str, Any]: # "failed to delete deployment from kubernetes: %w" # errors for toKubernetesEnvironmentVariables, toKubernetesVolumeMounts, getCustomScalers, ReconcileWithKubernetes error_body: Any = getattr(e, "body", None) - error_message = error_body.get("error", "") if isinstance(error_body, dict) else "" # pyright: ignore + error_message = ( # pyright: ignore + error_body.get("error", "") if isinstance(error_body, dict) else "" # pyright: ignore + ) if "already exists" in error_message or "must be unique" in error_message: raise RuntimeError(f"Deployment name must be unique. Tip: {config._unique_name_tip}") from None # TODO: helpful tips for more error cases raise try: + existing = client.beta.jig.retrieve(config.model_name) + old_revision_id = _get_current_revision_id(existing) + was_scaled_to_zero = existing.ready_replicas == 0 response = client.beta.jig.update(config.model_name, **deploy_data) - click.echo("\N{CHECK MARK} Updated deployment") + click.echo("\N{CHECK MARK} Applied new deployment configuration") except APIStatusError as e: if hasattr(e, "status_code") and e.status_code == 404: - return handle_create() - raise + old_revision_id = "" + was_scaled_to_zero = False + response = handle_create() + else: + raise - return response.model_dump() + if detach: + return response.model_dump() + + # Skip tracking if revision didn't change and not scaling up from zero + new_revision_id = _get_current_revision_id(response) + scaling_up = was_scaled_to_zero and response.min_replicas and response.min_replicas > 0 + if old_revision_id and old_revision_id == new_revision_id and not scaling_up: + return None + + return _track_deployment_progress(config.model_name, client) @jig_command -def status(ctx: click.Context, config_path: str | None) -> None: +@click.option("--json", "json_output", is_flag=True, help="Output raw JSON") +def status(ctx: click.Context, config_path: str | None, json_output: bool = False) -> None: """Get deployment status""" client: Together = ctx.obj config = Config.find(config_path) - response = client.beta.jig.with_raw_response.retrieve(config.model_name) - click.echo(json.dumps(response.json(), indent=2)) + response = client.beta.jig.retrieve(config.model_name) + + if json_output: + click.echo(response.model_dump_json(indent=2)) + else: + click.echo(format_deployment_status(response)) @jig_command @@ -633,7 +876,7 @@ def submit( return click.echo(f"\nWatching job {submit_response.request_id}...") - last_status = None + last_status: str | None = None while True: try: response = client.beta.jig.queue.retrieve( diff --git a/src/together/lib/cli/api/files/list.py b/src/together/lib/cli/api/files/list.py index d9d3eb7b..722749f1 100644 --- a/src/together/lib/cli/api/files/list.py +++ b/src/together/lib/cli/api/files/list.py @@ -26,7 +26,6 @@ def list(ctx: click.Context) -> None: "File ID": i.id, "Size": convert_bytes(float(str(i.bytes))), # convert to string for mypy typing "Created At": convert_unix_timestamp(i.created_at or 0), - "Line Count": i.line_count, } ) table = tabulate(display_list, headers="keys", tablefmt="grid", showindex=True) diff --git a/src/together/lib/cli/api/files/retrieve_content.py b/src/together/lib/cli/api/files/retrieve_content.py index d26a50dc..9eb751f7 100644 --- a/src/together/lib/cli/api/files/retrieve_content.py +++ b/src/together/lib/cli/api/files/retrieve_content.py @@ -1,3 +1,7 @@ +import os +from typing import Union +from pathlib import Path + import click from together import Together @@ -7,19 +11,34 @@ @click.command() @click.pass_context @click.argument("id", type=str, required=True) -@click.option("--output", type=str, default=None, help="Output filename") +@click.option("--output", type=click.Path(file_okay=False, writable=True, dir_okay=True), help="Output filename") +@click.option("--stdout", is_flag=True, default=False, help="Output to stdout") @handle_api_errors("Files") -def retrieve_content(ctx: click.Context, id: str, output: str) -> None: +def retrieve_content(ctx: click.Context, id: str, output: Union[str, None], stdout: bool) -> None: """Retrieve file content and output to file""" client: Together = ctx.obj - response = client.files.content(id=id) + if stdout is True: + response = client.files.content(id=id) + click.echo(response.read().decode("utf-8")) + + elif output is not None: + os.makedirs(os.path.dirname(output), exist_ok=True) + + # If the user specified an output with an extension - that is a file name so write to that + # If they only specified a directory, write to that directory with the file name from our api + has_extension = Path(output).suffix != "" + output = output if has_extension else f"{output}/{get_filename(client, id)}" - if output: with open(output, "wb") as f: + response = client.files.content(id=id) f.write(response.read()) click.echo(f"File saved to {output}") else: - click.echo(response.read().decode("utf-8")) + raise click.UsageError("Either --output or --stdout must be specified") + + +def get_filename(client: Together, id: str) -> str: + return client.files.retrieve(id=id).filename diff --git a/src/together/lib/cli/api/fine_tuning/cancel.py b/src/together/lib/cli/api/fine_tuning/cancel.py index cf4dd992..0aa872b0 100644 --- a/src/together/lib/cli/api/fine_tuning/cancel.py +++ b/src/together/lib/cli/api/fine_tuning/cancel.py @@ -6,6 +6,8 @@ from together.lib.cli.api._utils import handle_api_errors from together.lib.utils.serializer import datetime_serializer +NON_CANCELLABLE_STATES = ["cancel_requested", "cancelled", "error", "completed", "user_error"] + @click.command() @click.pass_context @@ -15,6 +17,15 @@ def cancel(ctx: click.Context, fine_tune_id: str, quiet: bool = False) -> None: """Cancel fine-tuning job""" client: Together = ctx.obj + job = client.fine_tuning.retrieve(fine_tune_id) + if job.status in NON_CANCELLABLE_STATES: + click.echo( + click.style(f"Fine-tuning: ", fg="blue") + + f"Training is not currently cancellable. Current status is " + + click.style(job.status, fg="yellow") + ) + return + if not quiet: confirm_response = input( "You will be billed for any completed training steps upon cancellation. " diff --git a/src/together/lib/cli/api/fine_tuning/create.py b/src/together/lib/cli/api/fine_tuning/create.py index 132ee72b..4ec8804b 100644 --- a/src/together/lib/cli/api/fine_tuning/create.py +++ b/src/together/lib/cli/api/fine_tuning/create.py @@ -1,6 +1,7 @@ from __future__ import annotations from typing import Any, Literal +from pathlib import Path import click from rich import print as rprint @@ -37,11 +38,16 @@ "-t", type=str, required=True, - help="Training file ID from Files API", + help="Training file ID from Files API or local path to a file to be uploaded.", ) @click.option("--model", "-m", type=str, help="Base model name") @click.option("--n-epochs", "-ne", type=int, default=1, help="Number of epochs to train for") -@click.option("--validation-file", type=str, default="", help="Validation file ID from Files API") +@click.option( + "--validation-file", + type=str, + default="", + help="Validation file ID from Files API or local path to a file to be uploaded.", +) @click.option("--n-evals", type=int, default=0, help="Number of evaluation loops") @click.option("--n-checkpoints", "-c", type=int, default=1, help="Number of checkpoints to save") @click.option("--batch-size", "-b", type=INT_WITH_MAX, default="max", help="Train batch size") @@ -355,9 +361,25 @@ def create( # Don't show price estimation for multimodal models yet confirm = True + # If the user passes a path to a file, try to upload it to the files API first + # Uploads are idompotent so we can depend on this API always giving us a file ID + if _check_path_exists(training_args["training_file"]): + file_upload = client.files.upload(Path(training_args["training_file"]), purpose="fine-tune") + + # Update the local variables to the uploaded file ID. + training_args["training_file"] = file_upload.id + + # If the user passes a path to a file, try to upload it to the files API first + # Uploads are idompotent so we can depend on this API always giving us a file ID + if _check_path_exists(training_args["validation_file"]): + file_upload = client.files.upload(Path(training_args["validation_file"]), purpose="fine-tune") + + # Update the local variables to the uploaded file ID. + training_args["validation_file"] = file_upload.id + finetune_price_estimation_result = client.fine_tuning.estimate_price( - training_file=training_file, - validation_file=validation_file, + training_file=training_args["training_file"], + validation_file=training_args["validation_file"], model=model or "", from_checkpoint=from_checkpoint or "", n_epochs=n_epochs, @@ -392,3 +414,17 @@ def create( rprint(report_string) else: click.echo("No confirmation received, stopping job launch") + + +def _check_path_exists(path_string: str) -> bool: + # Empty string is not considerd a path. + if path_string == "": + return False + + my_path = Path(path_string) + if my_path.exists(): + if my_path.is_file(): + return True + elif my_path.is_dir(): + return True + return False diff --git a/src/together/lib/cli/api/fine_tuning/list.py b/src/together/lib/cli/api/fine_tuning/list.py index 53606391..c1b5f96e 100644 --- a/src/together/lib/cli/api/fine_tuning/list.py +++ b/src/together/lib/cli/api/fine_tuning/list.py @@ -8,17 +8,25 @@ from together import Together from together.lib.utils import finetune_price_to_dollars from together.lib.cli.api._utils import handle_api_errors, generate_progress_bar +from together.lib.utils.serializer import datetime_serializer @click.command() @click.pass_context +@click.option("--json", is_flag=True, help="Print output in JSON format") @handle_api_errors("Fine-tuning") -def list(ctx: click.Context) -> None: +def list(ctx: click.Context, json: bool) -> None: """List fine-tuning jobs""" client: Together = ctx.obj response = client.fine_tuning.list() + if json: + from json import dumps + + click.echo(dumps(response.model_dump(exclude_none=True), indent=2, default=datetime_serializer)) + return + response.data = response.data or [] # Use a default datetime for None values to make sure the key function always returns a comparable value diff --git a/src/together/lib/cli/api/models/list.py b/src/together/lib/cli/api/models/list.py index 8ea206a7..e109d998 100644 --- a/src/together/lib/cli/api/models/list.py +++ b/src/together/lib/cli/api/models/list.py @@ -36,8 +36,8 @@ def list(ctx: click.Context, type: Optional[str], json: bool) -> None: display_list: List[Dict[str, Any]] = [] - # If the server has a bug and returns an empty .type this will crash if we don't do the or "". - for model in sorted(models_list, key=lambda x: x.type or ""): # type: ignore + # If the server has a bug and returns an empty .type this will crash if we don't do the or "". + for model in sorted(models_list, key=lambda x: x.type or ""): # type: ignore price_parts: List[str] = [] # Only show pricing if a value actually exists diff --git a/src/together/lib/resources/files.py b/src/together/lib/resources/files.py index 20d5be12..e77bb716 100644 --- a/src/together/lib/resources/files.py +++ b/src/together/lib/resources/files.py @@ -7,6 +7,7 @@ import uuid import shutil import asyncio +import hashlib import logging import tempfile from typing import IO, Any, Dict, List, Tuple, cast @@ -287,6 +288,7 @@ def get_upload_url( self, url: str, file: Path, + checksum: str, purpose: FilePurpose, filetype: FileType, ) -> Tuple[str, str]: @@ -294,6 +296,7 @@ def get_upload_url( "purpose": purpose, "file_name": file.name, "file_type": filetype, + "checksum": checksum, } try: @@ -304,6 +307,8 @@ def get_upload_url( options={"headers": {"Content-Type": "multipart/form-data"}, "follow_redirects": False}, ) except APIStatusError as e: + if e.response.status_code == 409: + raise FileAlreadyExistsError(e.response.json()["file_id"]) from e if e.response.status_code == 401: raise AuthenticationError( "This job would exceed your free trial credits. " @@ -354,16 +359,19 @@ def upload( f"File size {file_size_gb:.1f}GB exceeds maximum supported size of {MAX_FILE_SIZE_GB}GB" ) + checksum = _calculate_file_checksum(file) + if file_size_gb > MULTIPART_THRESHOLD_GB: multipart_manager = MultipartUploadManager(self._client) - return multipart_manager.upload(url, file, purpose) + return multipart_manager.upload(url, file, checksum, purpose) else: - return self._upload_single_file(url, file, purpose) + return self._upload_single_file(url, file, checksum, purpose) def _upload_single_file( self, url: str, file: Path, + checksum: str, purpose: FilePurpose, ) -> FileResponse: file_id = None @@ -377,7 +385,7 @@ def _upload_single_file( raise FileTypeError( f"Unknown extension of file {file}. Only files with extensions .jsonl and .parquet are supported." ) - redirect_url, file_id = self.get_upload_url(url, file, purpose, filetype) # type: ignore + redirect_url, file_id = self.get_upload_url(url, file, checksum, purpose, filetype) # type: ignore file_size = os.stat(file.as_posix()).st_size @@ -432,6 +440,7 @@ def upload( self, url: str, file: Path, + checksum: str, purpose: FilePurpose, ) -> FileResponse: """Upload large file using multipart upload""" @@ -449,7 +458,7 @@ def upload( upload_info = None try: - upload_info = self._initiate_upload(url, file, file_size, num_parts, purpose, file_type) + upload_info = self._initiate_upload(url, file, checksum, file_size, num_parts, purpose, file_type) completed_parts = self._upload_parts_concurrent(file, upload_info, part_size) @@ -460,6 +469,10 @@ def upload( return self._complete_upload(url, upload_id, file_id, completed_parts) + # If the server says the file already exists, raise the error to the files.upload resource + # This should be silently handled by fetching down the file and returning it + except FileAlreadyExistsError as e: + raise e except Exception as e: if upload_info is not None: upload_id = upload_info.get("upload_id") @@ -485,6 +498,7 @@ def _initiate_upload( self, url: str, file: Path, + checksum: str, file_size: int, num_parts: int, purpose: FilePurpose, @@ -498,6 +512,7 @@ def _initiate_upload( "num_parts": num_parts, "purpose": str(purpose), "file_type": file_type, + "checksum": checksum, } try: @@ -508,6 +523,8 @@ def _initiate_upload( options={"headers": {"Content-Type": "application/json"}}, ) except APIStatusError as e: + if e.response.status_code == 409: + raise FileAlreadyExistsError(e.response.json()["file_id"]) from e if e.response.status_code == 400: response = e.response else: @@ -664,6 +681,7 @@ async def get_upload_url( self, url: str, file: Path, + checksum: str, purpose: FilePurpose, filetype: FileType, ) -> Tuple[str, str]: @@ -671,6 +689,7 @@ async def get_upload_url( "purpose": str(purpose), "file_name": file.name, "file_type": filetype, + "checksum": checksum, } try: @@ -681,6 +700,8 @@ async def get_upload_url( options={"headers": {"Content-Type": "multipart/form-data"}, "follow_redirects": False}, ) except APIStatusError as e: + if e.response.status_code == 409: + raise FileAlreadyExistsError(e.response.json()["file_id"]) from e if e.response.status_code == 401: raise AuthenticationError( "This job would exceed your free trial credits. " @@ -735,16 +756,19 @@ async def upload( f"File size {file_size_gb:.1f}GB exceeds maximum supported size of {MAX_FILE_SIZE_GB}GB" ) + checksum = _calculate_file_checksum(file) + if file_size_gb > MULTIPART_THRESHOLD_GB: multipart_manager = AsyncMultipartUploadManager(self._client) - return await multipart_manager.upload(url, file, purpose) + return await multipart_manager.upload(url, file, checksum, purpose) else: - return await self._upload_single_file(url, file, purpose) + return await self._upload_single_file(url, file, checksum, purpose) async def _upload_single_file( self, url: str, file: Path, + checksum: str, purpose: FilePurpose, ) -> FileResponse: file_id = None @@ -758,7 +782,8 @@ async def _upload_single_file( raise FileTypeError( f"Unknown extension of file {file}. Only files with extensions .jsonl and .parquet are supported." ) - redirect_url, file_id = await self.get_upload_url(url, file, purpose, filetype) # type: ignore + + redirect_url, file_id = await self.get_upload_url(url, file, checksum, purpose, filetype) # type: ignore file_size = os.stat(file.as_posix()).st_size @@ -813,6 +838,7 @@ async def upload( self, url: str, file: Path, + checksum: str, purpose: FilePurpose, ) -> FileResponse: """Upload large file using multipart upload via ThreadPoolExecutor""" @@ -830,7 +856,7 @@ async def upload( upload_info = None try: - upload_info = await self._initiate_upload(url, file, file_size, num_parts, purpose, file_type) + upload_info = await self._initiate_upload(url, file, checksum, file_size, num_parts, purpose, file_type) completed_parts = await self._upload_parts_concurrent(file, upload_info, part_size) @@ -841,6 +867,10 @@ async def upload( return await self._complete_upload(url, upload_id, file_id, completed_parts) + # If the server says the file already exists, raise the error to the files.upload resource + # This should be silently handled by fetching down the file and returning it + except FileAlreadyExistsError as e: + raise e except Exception as e: if upload_info is not None: upload_id = upload_info.get("upload_id") @@ -866,6 +896,7 @@ async def _initiate_upload( self, url: str, file: Path, + checksum: str, file_size: int, num_parts: int, purpose: FilePurpose, @@ -879,6 +910,7 @@ async def _initiate_upload( "num_parts": num_parts, "purpose": str(purpose), "file_type": file_type, + "checksum": checksum, } try: @@ -889,6 +921,8 @@ async def _initiate_upload( options={"headers": {"Content-Type": "application/json"}}, ) except APIStatusError as e: + if e.response.status_code == 409: + raise FileAlreadyExistsError(e.response.json()["file_id"]) from e if e.response.status_code == 400: response = e.response else: @@ -1056,3 +1090,37 @@ def _calculate_parts(file_size: int) -> Tuple[int, int]: num_parts = math.ceil(file_size / part_size) return part_size, num_parts + + +def _calculate_file_checksum(file_path: Path, algorithm: str = "sha256", block_size: int = 65536) -> str: + """ + Calculates the checksum of a file using a specified hashing algorithm. + + Args: + file_path (str or Path): The path to the file. + algorithm (str): The name of the hashing algorithm (e.g., 'md5', 'sha256'). + block_size (int): The size of chunks to read the file in (for large files). + + Returns: + str: The hexadecimal representation of the file checksum. + """ + # Create a hash object with the specified algorithm name + try: + hasher = hashlib.new(algorithm) + except ValueError: + return f"Error: Invalid algorithm name '{algorithm}'" + + # Open the file in binary read mode + with open(file_path, "rb") as f: + # Read the file in chunks and update the hash object + for chunk in iter(lambda: f.read(block_size), b""): + hasher.update(chunk) + + # Return the hexadecimal digest of the hash + return hasher.hexdigest() + + +class FileAlreadyExistsError(Exception): + def __init__(self, file_id: str): + self.file_id = file_id + super().__init__(f"File already exists: {file_id}") diff --git a/src/together/resources/batches.py b/src/together/resources/batches.py index ff9e7a29..cb906759 100644 --- a/src/together/resources/batches.py +++ b/src/together/resources/batches.py @@ -113,6 +113,8 @@ def retrieve( Get details of a batch job by ID Args: + id: The ID of the batch job to retrieve + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -165,6 +167,8 @@ def cancel( Cancel a batch job by ID Args: + id: The ID of the batch job to cancel + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -274,6 +278,8 @@ async def retrieve( Get details of a batch job by ID Args: + id: The ID of the batch job to retrieve + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -326,6 +332,8 @@ async def cancel( Cancel a batch job by ID Args: + id: The ID of the batch job to cancel + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/together/resources/beta/clusters/clusters.py b/src/together/resources/beta/clusters/clusters.py index aa0ce95d..6f5eebcc 100644 --- a/src/together/resources/beta/clusters/clusters.py +++ b/src/together/resources/beta/clusters/clusters.py @@ -66,7 +66,7 @@ def create( driver_version: Literal["CUDA_12_5_555", "CUDA_12_6_560", "CUDA_12_6_565", "CUDA_12_8_570"], gpu_type: Literal["H100_SXM", "H200_SXM", "RTX_6000_PCI", "L40_PCIE", "B200_SXM", "H100_SXM_INF"], num_gpus: int, - region: Literal["us-central-8", "us-central-4"], + region: str, cluster_type: Literal["KUBERNETES", "SLURM"] | Omit = omit, duration_days: int | Omit = omit, shared_volume: cluster_create_params.SharedVolume | Omit = omit, @@ -87,6 +87,10 @@ def create( management. Args: + billing_type: RESERVED billing types allow you to specify the duration of the cluster + reservation via the duration_days field. ON_DEMAND billing types will give you + ownership of the cluster until you delete it. + cluster_name: Name of the GPU cluster. driver_version: NVIDIA driver version to use in the cluster. @@ -96,11 +100,17 @@ def create( num_gpus: Number of GPUs to allocate in the cluster. This must be multiple of 8. For example, 8, 16 or 24 - region: Region to create the GPU cluster in. Valid values are us-central-8 and - us-central-4. + region: Region to create the GPU cluster in. Usable regions can be found from + `client.clusters.list_regions()` + + cluster_type: Type of cluster to create. duration_days: Duration in days to keep the cluster running. + shared_volume: Inline configuration to create a shared volume with the cluster creation. + + volume_id: ID of an existing volume to use with the cluster creation. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -147,6 +157,8 @@ def retrieve( Retrieve information about a specific GPU cluster. Args: + cluster_id: The ID of the cluster to retrieve + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -182,6 +194,13 @@ def update( Update the configuration of an existing GPU cluster. Args: + cluster_id: The ID of the cluster to update + + cluster_type: Type of cluster to update. + + num_gpus: Number of GPUs to allocate in the cluster. This must be multiple of 8. For + example, 8, 16 or 24 + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -241,6 +260,8 @@ def delete( Delete a GPU cluster by cluster ID. Args: + cluster_id: The ID of the cluster to delete + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -311,7 +332,7 @@ async def create( driver_version: Literal["CUDA_12_5_555", "CUDA_12_6_560", "CUDA_12_6_565", "CUDA_12_8_570"], gpu_type: Literal["H100_SXM", "H200_SXM", "RTX_6000_PCI", "L40_PCIE", "B200_SXM", "H100_SXM_INF"], num_gpus: int, - region: Literal["us-central-8", "us-central-4"], + region: str, cluster_type: Literal["KUBERNETES", "SLURM"] | Omit = omit, duration_days: int | Omit = omit, shared_volume: cluster_create_params.SharedVolume | Omit = omit, @@ -332,6 +353,10 @@ async def create( management. Args: + billing_type: RESERVED billing types allow you to specify the duration of the cluster + reservation via the duration_days field. ON_DEMAND billing types will give you + ownership of the cluster until you delete it. + cluster_name: Name of the GPU cluster. driver_version: NVIDIA driver version to use in the cluster. @@ -341,11 +366,17 @@ async def create( num_gpus: Number of GPUs to allocate in the cluster. This must be multiple of 8. For example, 8, 16 or 24 - region: Region to create the GPU cluster in. Valid values are us-central-8 and - us-central-4. + region: Region to create the GPU cluster in. Usable regions can be found from + `client.clusters.list_regions()` + + cluster_type: Type of cluster to create. duration_days: Duration in days to keep the cluster running. + shared_volume: Inline configuration to create a shared volume with the cluster creation. + + volume_id: ID of an existing volume to use with the cluster creation. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -392,6 +423,8 @@ async def retrieve( Retrieve information about a specific GPU cluster. Args: + cluster_id: The ID of the cluster to retrieve + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -427,6 +460,13 @@ async def update( Update the configuration of an existing GPU cluster. Args: + cluster_id: The ID of the cluster to update + + cluster_type: Type of cluster to update. + + num_gpus: Number of GPUs to allocate in the cluster. This must be multiple of 8. For + example, 8, 16 or 24 + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -486,6 +526,8 @@ async def delete( Delete a GPU cluster by cluster ID. Args: + cluster_id: The ID of the cluster to delete + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/together/resources/beta/clusters/storage.py b/src/together/resources/beta/clusters/storage.py index bf0e48ad..0d6260e0 100644 --- a/src/together/resources/beta/clusters/storage.py +++ b/src/together/resources/beta/clusters/storage.py @@ -68,6 +68,8 @@ def create( size_tib: Volume size in whole tebibytes (TiB). + volume_name: Customizable name of the volume to create. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -107,6 +109,8 @@ def retrieve( Retrieve information about a specific shared volume. Args: + volume_id: The ID of the volume to retrieve + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -141,6 +145,10 @@ def update( Update the configuration of an existing shared volume. Args: + size_tib: Size of the volume in whole tebibytes (TiB). + + volume_id: ID of the volume to update. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -200,6 +208,8 @@ def delete( deleting will fail. Args: + volume_id: The ID of the volume to delete + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -264,6 +274,8 @@ async def create( size_tib: Volume size in whole tebibytes (TiB). + volume_name: Customizable name of the volume to create. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -303,6 +315,8 @@ async def retrieve( Retrieve information about a specific shared volume. Args: + volume_id: The ID of the volume to retrieve + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -337,6 +351,10 @@ async def update( Update the configuration of an existing shared volume. Args: + size_tib: Size of the volume in whole tebibytes (TiB). + + volume_id: ID of the volume to update. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -396,6 +414,8 @@ async def delete( deleting will fail. Args: + volume_id: The ID of the volume to delete + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/together/resources/beta/jig/jig.py b/src/together/resources/beta/jig/jig.py index 13d3698f..102d0761 100644 --- a/src/together/resources/beta/jig/jig.py +++ b/src/together/resources/beta/jig/jig.py @@ -97,6 +97,8 @@ def retrieve( Retrieve details of a specific deployment by its ID or name Args: + id: Deployment ID or name + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -148,6 +150,8 @@ def update( Update an existing deployment configuration Args: + id: Deployment ID or name + args: Args overrides the container's CMD. Provide as an array of arguments (e.g., ["python", "app.py"]) @@ -395,6 +399,8 @@ def destroy( Delete an existing deployment Args: + id: Deployment ID or name + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -429,6 +435,8 @@ def retrieve_logs( Retrieve logs from a deployment, optionally filtered by replica ID. Args: + id: Deployment ID or name + replica_id: Replica ID to filter logs extra_headers: Send extra headers @@ -501,6 +509,8 @@ async def retrieve( Retrieve details of a specific deployment by its ID or name Args: + id: Deployment ID or name + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -552,6 +562,8 @@ async def update( Update an existing deployment configuration Args: + id: Deployment ID or name + args: Args overrides the container's CMD. Provide as an array of arguments (e.g., ["python", "app.py"]) @@ -799,6 +811,8 @@ async def destroy( Delete an existing deployment Args: + id: Deployment ID or name + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -833,6 +847,8 @@ async def retrieve_logs( Retrieve logs from a deployment, optionally filtered by replica ID. Args: + id: Deployment ID or name + replica_id: Replica ID to filter logs extra_headers: Send extra headers diff --git a/src/together/resources/beta/jig/secrets.py b/src/together/resources/beta/jig/secrets.py index 395a8222..f1b874ca 100644 --- a/src/together/resources/beta/jig/secrets.py +++ b/src/together/resources/beta/jig/secrets.py @@ -113,6 +113,8 @@ def retrieve( Retrieve details of a specific secret by its ID or name Args: + id: Secret ID or name + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -150,6 +152,8 @@ def update( Update an existing secret's value or metadata Args: + id: Secret ID or name + description: Description is an optional human-readable description of the secret's purpose (max 500 characters) @@ -224,6 +228,8 @@ def delete( Delete an existing secret Args: + id: Secret ID or name + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -334,6 +340,8 @@ async def retrieve( Retrieve details of a specific secret by its ID or name Args: + id: Secret ID or name + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -371,6 +379,8 @@ async def update( Update an existing secret's value or metadata Args: + id: Secret ID or name + description: Description is an optional human-readable description of the secret's purpose (max 500 characters) @@ -445,6 +455,8 @@ async def delete( Delete an existing secret Args: + id: Secret ID or name + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/together/resources/beta/jig/volumes.py b/src/together/resources/beta/jig/volumes.py index c2553657..6df1367c 100644 --- a/src/together/resources/beta/jig/volumes.py +++ b/src/together/resources/beta/jig/volumes.py @@ -106,6 +106,8 @@ def retrieve( Retrieve details of a specific volume by its ID or name Args: + id: Volume ID or name + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -142,6 +144,8 @@ def update( Update an existing volume's configuration or contents Args: + id: Volume ID or name. + content: Content specifies the new content that will be preloaded to this volume name: Name is the new unique identifier for the volume within the project @@ -208,6 +212,8 @@ def delete( Delete an existing volume Args: + id: Volume ID or name + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -309,6 +315,8 @@ async def retrieve( Retrieve details of a specific volume by its ID or name Args: + id: Volume ID or name + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -345,6 +353,8 @@ async def update( Update an existing volume's configuration or contents Args: + id: Volume ID or name. + content: Content specifies the new content that will be preloaded to this volume name: Name is the new unique identifier for the volume within the project @@ -411,6 +421,8 @@ async def delete( Delete an existing volume Args: + id: Volume ID or name + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/together/resources/chat/completions.py b/src/together/resources/chat/completions.py index 705bcf36..c6a02091 100644 --- a/src/together/resources/chat/completions.py +++ b/src/together/resources/chat/completions.py @@ -94,8 +94,10 @@ def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> ChatCompletion: - """ - Query a chat model. + """Generate a model response for a given chat conversation. + + Supports single queries + and multi-turn conversations with system, user, and assistant messages. Args: messages: A list of messages comprising the conversation so far. @@ -104,6 +106,8 @@ def create( [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#chat-models) + chat_template_kwargs: Additional configuration to pass to model engine. + context_length_exceeded_behavior: Defined the behavior of the API when max_tokens exceed the maximum context length of the model. When set to 'error', API will return 400 with appropriate error message. When set to 'truncate', override the max_tokens with maximum @@ -130,6 +134,9 @@ def create( presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of a model talking about new topics. + reasoning: For models that support toggling reasoning functionality, this object can be + used to control that functionality. + reasoning_effort: Controls the level of reasoning effort the model should apply when generating responses. Higher values may result in more thoughtful and detailed responses but may take longer to generate. @@ -242,8 +249,10 @@ def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> Stream[ChatCompletionChunk]: - """ - Query a chat model. + """Generate a model response for a given chat conversation. + + Supports single queries + and multi-turn conversations with system, user, and assistant messages. Args: messages: A list of messages comprising the conversation so far. @@ -256,6 +265,8 @@ def create( of waiting for the full model response. The stream terminates with `data: [DONE]`. If false, return a single JSON object containing the results. + chat_template_kwargs: Additional configuration to pass to model engine. + context_length_exceeded_behavior: Defined the behavior of the API when max_tokens exceed the maximum context length of the model. When set to 'error', API will return 400 with appropriate error message. When set to 'truncate', override the max_tokens with maximum @@ -282,6 +293,9 @@ def create( presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of a model talking about new topics. + reasoning: For models that support toggling reasoning functionality, this object can be + used to control that functionality. + reasoning_effort: Controls the level of reasoning effort the model should apply when generating responses. Higher values may result in more thoughtful and detailed responses but may take longer to generate. @@ -390,8 +404,10 @@ def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> ChatCompletion | Stream[ChatCompletionChunk]: - """ - Query a chat model. + """Generate a model response for a given chat conversation. + + Supports single queries + and multi-turn conversations with system, user, and assistant messages. Args: messages: A list of messages comprising the conversation so far. @@ -404,6 +420,8 @@ def create( of waiting for the full model response. The stream terminates with `data: [DONE]`. If false, return a single JSON object containing the results. + chat_template_kwargs: Additional configuration to pass to model engine. + context_length_exceeded_behavior: Defined the behavior of the API when max_tokens exceed the maximum context length of the model. When set to 'error', API will return 400 with appropriate error message. When set to 'truncate', override the max_tokens with maximum @@ -430,6 +448,9 @@ def create( presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of a model talking about new topics. + reasoning: For models that support toggling reasoning functionality, this object can be + used to control that functionality. + reasoning_effort: Controls the level of reasoning effort the model should apply when generating responses. Higher values may result in more thoughtful and detailed responses but may take longer to generate. @@ -650,8 +671,10 @@ async def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> ChatCompletion: - """ - Query a chat model. + """Generate a model response for a given chat conversation. + + Supports single queries + and multi-turn conversations with system, user, and assistant messages. Args: messages: A list of messages comprising the conversation so far. @@ -660,6 +683,8 @@ async def create( [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#chat-models) + chat_template_kwargs: Additional configuration to pass to model engine. + context_length_exceeded_behavior: Defined the behavior of the API when max_tokens exceed the maximum context length of the model. When set to 'error', API will return 400 with appropriate error message. When set to 'truncate', override the max_tokens with maximum @@ -686,6 +711,9 @@ async def create( presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of a model talking about new topics. + reasoning: For models that support toggling reasoning functionality, this object can be + used to control that functionality. + reasoning_effort: Controls the level of reasoning effort the model should apply when generating responses. Higher values may result in more thoughtful and detailed responses but may take longer to generate. @@ -798,8 +826,10 @@ async def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> AsyncStream[ChatCompletionChunk]: - """ - Query a chat model. + """Generate a model response for a given chat conversation. + + Supports single queries + and multi-turn conversations with system, user, and assistant messages. Args: messages: A list of messages comprising the conversation so far. @@ -812,6 +842,8 @@ async def create( of waiting for the full model response. The stream terminates with `data: [DONE]`. If false, return a single JSON object containing the results. + chat_template_kwargs: Additional configuration to pass to model engine. + context_length_exceeded_behavior: Defined the behavior of the API when max_tokens exceed the maximum context length of the model. When set to 'error', API will return 400 with appropriate error message. When set to 'truncate', override the max_tokens with maximum @@ -838,6 +870,9 @@ async def create( presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of a model talking about new topics. + reasoning: For models that support toggling reasoning functionality, this object can be + used to control that functionality. + reasoning_effort: Controls the level of reasoning effort the model should apply when generating responses. Higher values may result in more thoughtful and detailed responses but may take longer to generate. @@ -946,8 +981,10 @@ async def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]: - """ - Query a chat model. + """Generate a model response for a given chat conversation. + + Supports single queries + and multi-turn conversations with system, user, and assistant messages. Args: messages: A list of messages comprising the conversation so far. @@ -960,6 +997,8 @@ async def create( of waiting for the full model response. The stream terminates with `data: [DONE]`. If false, return a single JSON object containing the results. + chat_template_kwargs: Additional configuration to pass to model engine. + context_length_exceeded_behavior: Defined the behavior of the API when max_tokens exceed the maximum context length of the model. When set to 'error', API will return 400 with appropriate error message. When set to 'truncate', override the max_tokens with maximum @@ -986,6 +1025,9 @@ async def create( presence_penalty: A number between -2.0 and 2.0 where a positive value increases the likelihood of a model talking about new topics. + reasoning: For models that support toggling reasoning functionality, this object can be + used to control that functionality. + reasoning_effort: Controls the level of reasoning effort the model should apply when generating responses. Higher values may result in more thoughtful and detailed responses but may take longer to generate. diff --git a/src/together/resources/completions.py b/src/together/resources/completions.py index c6a2fafb..7fb330b5 100644 --- a/src/together/resources/completions.py +++ b/src/together/resources/completions.py @@ -84,7 +84,8 @@ def create( timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> Completion: """ - Query a language, code, or image model. + Generate text completions for a given prompt using a language, code, or image + model. Args: model: The name of the model to query. @@ -196,7 +197,8 @@ def create( timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> Stream[CompletionChunk]: """ - Query a language, code, or image model. + Generate text completions for a given prompt using a language, code, or image + model. Args: model: The name of the model to query. @@ -308,7 +310,8 @@ def create( timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> Completion | Stream[CompletionChunk]: """ - Query a language, code, or image model. + Generate text completions for a given prompt using a language, code, or image + model. Args: model: The name of the model to query. @@ -513,7 +516,8 @@ async def create( timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> Completion: """ - Query a language, code, or image model. + Generate text completions for a given prompt using a language, code, or image + model. Args: model: The name of the model to query. @@ -625,7 +629,8 @@ async def create( timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> AsyncStream[CompletionChunk]: """ - Query a language, code, or image model. + Generate text completions for a given prompt using a language, code, or image + model. Args: model: The name of the model to query. @@ -737,7 +742,8 @@ async def create( timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> Completion | AsyncStream[CompletionChunk]: """ - Query a language, code, or image model. + Generate text completions for a given prompt using a language, code, or image + model. Args: model: The name of the model to query. diff --git a/src/together/resources/embeddings.py b/src/together/resources/embeddings.py index 88136bda..dff27576 100644 --- a/src/together/resources/embeddings.py +++ b/src/together/resources/embeddings.py @@ -64,8 +64,10 @@ def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> Embedding: - """ - Query an embedding model for a given string of text. + """Generate vector embeddings for one or more text inputs. + + Returns numerical arrays + representing semantic meaning, useful for search, classification, and retrieval. Args: input: A string providing the text for the model to embed. @@ -138,8 +140,10 @@ async def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> Embedding: - """ - Query an embedding model for a given string of text. + """Generate vector embeddings for one or more text inputs. + + Returns numerical arrays + representing semantic meaning, useful for search, classification, and retrieval. Args: input: A string providing the text for the model to embed. diff --git a/src/together/resources/endpoints.py b/src/together/resources/endpoints.py index 372e5120..8330da21 100644 --- a/src/together/resources/endpoints.py +++ b/src/together/resources/endpoints.py @@ -145,6 +145,8 @@ def retrieve( configuration, and scaling settings. Args: + endpoint_id: The ID of the endpoint to retrieve + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -184,6 +186,8 @@ def update( autoscaling settings, or change the endpoint's state (start/stop). Args: + endpoint_id: The ID of the endpoint to update + autoscaling: New autoscaling configuration for the endpoint display_name: A human-readable name for the endpoint @@ -288,6 +292,8 @@ def delete( This action cannot be undone. Args: + endpoint_id: The ID of the endpoint to delete + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -480,6 +486,8 @@ async def retrieve( configuration, and scaling settings. Args: + endpoint_id: The ID of the endpoint to retrieve + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -519,6 +527,8 @@ async def update( autoscaling settings, or change the endpoint's state (start/stop). Args: + endpoint_id: The ID of the endpoint to update + autoscaling: New autoscaling configuration for the endpoint display_name: A human-readable name for the endpoint @@ -623,6 +633,8 @@ async def delete( This action cannot be undone. Args: + endpoint_id: The ID of the endpoint to delete + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/together/resources/evals.py b/src/together/resources/evals.py index 4870c401..b3ebd563 100644 --- a/src/together/resources/evals.py +++ b/src/together/resources/evals.py @@ -104,6 +104,8 @@ def retrieve( Get evaluation job details Args: + id: The ID of the evaluation job to retrieve + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -127,7 +129,6 @@ def list( *, limit: int | Omit = omit, status: str | Omit = omit, - user_id: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -139,8 +140,9 @@ def list( Get all evaluation jobs Args: - user_id: Admin users can specify a user ID to filter jobs. Pass empty string to get all - jobs. + limit: Limit the number of results + + status: Filter evaluation jobs by status extra_headers: Send extra headers @@ -161,7 +163,6 @@ def list( { "limit": limit, "status": status, - "user_id": user_id, }, eval_list_params.EvalListParams, ), @@ -184,6 +185,8 @@ def status( Get evaluation job status and results Args: + id: The ID of the evaluation job to get the status of + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -281,6 +284,8 @@ async def retrieve( Get evaluation job details Args: + id: The ID of the evaluation job to retrieve + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -304,7 +309,6 @@ async def list( *, limit: int | Omit = omit, status: str | Omit = omit, - user_id: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -316,8 +320,9 @@ async def list( Get all evaluation jobs Args: - user_id: Admin users can specify a user ID to filter jobs. Pass empty string to get all - jobs. + limit: Limit the number of results + + status: Filter evaluation jobs by status extra_headers: Send extra headers @@ -338,7 +343,6 @@ async def list( { "limit": limit, "status": status, - "user_id": user_id, }, eval_list_params.EvalListParams, ), @@ -361,6 +365,8 @@ async def status( Get evaluation job status and results Args: + id: The ID of the evaluation job to get the status of + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/together/resources/files.py b/src/together/resources/files.py index e8e4cd34..74121dab 100644 --- a/src/together/resources/files.py +++ b/src/together/resources/files.py @@ -9,6 +9,7 @@ import httpx from together.types import FilePurpose +from together.lib.resources.files import FileAlreadyExistsError from ..lib import FileTypeError, UploadManager, AsyncUploadManager, check_file from ..types import FilePurpose @@ -69,9 +70,11 @@ def retrieve( timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> FileResponse: """ - List the metadata for a single uploaded data file. + Retrieve the metadata for a single uploaded data file. Args: + id: The ID of the file to retrieve + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -124,6 +127,8 @@ def delete( Delete a previously uploaded data file. Args: + id: The ID of the file to delete + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -162,20 +167,22 @@ def upload( purpose = cast(FilePurpose, purpose) - upload_manager = UploadManager(self._client) - result = upload_manager.upload("/files", file, purpose) - - return FileResponse( - id=result.id, - bytes=result.bytes, - created_at=result.created_at, - filename=result.filename, - FileType=result.file_type, - LineCount=result.line_count, - object=result.object, - Processed=result.processed, - purpose=result.purpose, - ) + try: + upload_manager = UploadManager(self._client) + result = upload_manager.upload("/files", file, purpose) + + return FileResponse( + id=result.id, + bytes=result.bytes, + created_at=result.created_at, + filename=result.filename, + FileType=result.file_type, + object=result.object, + Processed=result.processed, + purpose=result.purpose, + ) + except FileAlreadyExistsError as e: + return self.retrieve(e.file_id) def content( self, @@ -192,6 +199,8 @@ def content( Get the contents of a single uploaded data file. Args: + id: The ID of the file to get the content of + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -244,9 +253,11 @@ async def retrieve( timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> FileResponse: """ - List the metadata for a single uploaded data file. + Retrieve the metadata for a single uploaded data file. Args: + id: The ID of the file to retrieve + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -299,6 +310,8 @@ async def delete( Delete a previously uploaded data file. Args: + id: The ID of the file to delete + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -337,20 +350,22 @@ async def upload( purpose = cast(FilePurpose, purpose) - upload_manager = AsyncUploadManager(self._client) - result = await upload_manager.upload("/files", file, purpose) - - return FileResponse( - id=result.id, - bytes=result.bytes, - created_at=result.created_at, - filename=result.filename, - FileType=result.file_type, - LineCount=result.line_count, - object=result.object, - Processed=result.processed, - purpose=result.purpose, - ) + try: + upload_manager = AsyncUploadManager(self._client) + result = await upload_manager.upload("/files", file, purpose) + + return FileResponse( + id=result.id, + bytes=result.bytes, + created_at=result.created_at, + filename=result.filename, + FileType=result.file_type, + object=result.object, + Processed=result.processed, + purpose=result.purpose, + ) + except FileAlreadyExistsError as e: + return await self.retrieve(e.file_id) async def content( self, @@ -367,6 +382,8 @@ async def content( Get the contents of a single uploaded data file. Args: + id: The ID of the file to get the content of + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/together/resources/fine_tuning.py b/src/together/resources/fine_tuning.py index 5b22bad3..589869d7 100644 --- a/src/together/resources/fine_tuning.py +++ b/src/together/resources/fine_tuning.py @@ -288,6 +288,8 @@ def retrieve( List the metadata for a single fine-tuning job. Args: + id: The ID of the job to retrieve + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -345,6 +347,10 @@ def delete( Delete a fine-tuning job. Args: + id: The ID of the fine-tune job to delete + + force: Deprecated and unused parameter. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -384,6 +390,8 @@ def cancel( object. Args: + id: Fine-tune ID to cancel. A string that starts with `ft-`. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -540,6 +548,8 @@ def list_checkpoints( List the checkpoints for a single fine-tuning job. Args: + id: The ID of the fine-tune job to list checkpoints for + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -573,6 +583,8 @@ def list_events( List the events for a single fine-tuning job. Args: + id: The ID of the fine-tune job to list events for + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -821,6 +833,8 @@ async def retrieve( List the metadata for a single fine-tuning job. Args: + id: The ID of the job to retrieve + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -878,6 +892,10 @@ async def delete( Delete a fine-tuning job. Args: + id: The ID of the fine-tune job to delete + + force: Deprecated and unused parameter. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -917,6 +935,8 @@ async def cancel( object. Args: + id: Fine-tune ID to cancel. A string that starts with `ft-`. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -1073,6 +1093,8 @@ async def list_checkpoints( List the checkpoints for a single fine-tuning job. Args: + id: The ID of the fine-tune job to list checkpoints for + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -1106,6 +1128,8 @@ async def list_events( List the events for a single fine-tuning job. Args: + id: The ID of the fine-tune job to list events for + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/together/resources/models/uploads.py b/src/together/resources/models/uploads.py index d861f0e0..f0cacfcd 100644 --- a/src/together/resources/models/uploads.py +++ b/src/together/resources/models/uploads.py @@ -54,6 +54,8 @@ def status( Get the status of a specific job Args: + job_id: The ID of the job to retrieve + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -108,6 +110,8 @@ async def status( Get the status of a specific job Args: + job_id: The ID of the job to retrieve + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/together/resources/rerank.py b/src/together/resources/rerank.py index b8a62735..db663244 100644 --- a/src/together/resources/rerank.py +++ b/src/together/resources/rerank.py @@ -60,8 +60,10 @@ def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> RerankCreateResponse: - """ - Query a reranker model + """Rerank a list of documents by relevance to a query. + + Returns a relevance score + and ordering index for each document. Args: documents: List of documents, which can be either strings or objects. @@ -143,8 +145,10 @@ async def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> RerankCreateResponse: - """ - Query a reranker model + """Rerank a list of documents by relevance to a query. + + Returns a relevance score + and ordering index for each document. Args: documents: List of documents, which can be either strings or objects. diff --git a/src/together/resources/videos.py b/src/together/resources/videos.py index 76cbe5ec..81a7a78a 100644 --- a/src/together/resources/videos.py +++ b/src/together/resources/videos.py @@ -156,6 +156,8 @@ def retrieve( Fetch video metadata Args: + id: Identifier of video from create response. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -307,6 +309,8 @@ async def retrieve( Fetch video metadata Args: + id: Identifier of video from create response. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/together/types/audio/voice_list_response.py b/src/together/types/audio/voice_list_response.py index e456afdf..fe925f34 100644 --- a/src/together/types/audio/voice_list_response.py +++ b/src/together/types/audio/voice_list_response.py @@ -11,14 +11,17 @@ class DataVoice(BaseModel): id: str name: str + """Voice name to be used for audio inference.""" class Data(BaseModel): """Represents a model with its available voices.""" model: str + """Model name.""" voices: List[DataVoice] + """List of available voices for the model.""" class VoiceListResponse(BaseModel): diff --git a/src/together/types/audio_speech_stream_chunk.py b/src/together/types/audio_speech_stream_chunk.py index 67763287..277c12af 100644 --- a/src/together/types/audio_speech_stream_chunk.py +++ b/src/together/types/audio_speech_stream_chunk.py @@ -14,3 +14,4 @@ class AudioSpeechStreamChunk(BaseModel): model: str object: Literal["audio.tts.chunk"] + """The object type, which is always `audio.tts.chunk`.""" diff --git a/src/together/types/beta/cluster.py b/src/together/types/beta/cluster.py index 156e43fe..4a6bfbed 100644 --- a/src/together/types/beta/cluster.py +++ b/src/together/types/beta/cluster.py @@ -58,6 +58,7 @@ class Cluster(BaseModel): cluster_name: str cluster_type: Literal["KUBERNETES", "SLURM"] + """Type of cluster.""" control_plane_nodes: List[ControlPlaneNode] diff --git a/src/together/types/beta/cluster_create_params.py b/src/together/types/beta/cluster_create_params.py index 6f460274..8dbbc95e 100644 --- a/src/together/types/beta/cluster_create_params.py +++ b/src/together/types/beta/cluster_create_params.py @@ -9,6 +9,11 @@ class ClusterCreateParams(TypedDict, total=False): billing_type: Required[Literal["RESERVED", "ON_DEMAND"]] + """ + RESERVED billing types allow you to specify the duration of the cluster + reservation via the duration_days field. ON_DEMAND billing types will give you + ownership of the cluster until you delete it. + """ cluster_name: Required[str] """Name of the GPU cluster.""" @@ -25,23 +30,28 @@ class ClusterCreateParams(TypedDict, total=False): This must be multiple of 8. For example, 8, 16 or 24 """ - region: Required[Literal["us-central-8", "us-central-4"]] + region: Required[str] """Region to create the GPU cluster in. - Valid values are us-central-8 and us-central-4. + Usable regions can be found from `client.clusters.list_regions()` """ cluster_type: Literal["KUBERNETES", "SLURM"] + """Type of cluster to create.""" duration_days: int """Duration in days to keep the cluster running.""" shared_volume: SharedVolume + """Inline configuration to create a shared volume with the cluster creation.""" volume_id: str + """ID of an existing volume to use with the cluster creation.""" class SharedVolume(TypedDict, total=False): + """Inline configuration to create a shared volume with the cluster creation.""" + region: Required[str] """Region name. Usable regions can be found from `client.clusters.list_regions()`""" @@ -49,3 +59,4 @@ class SharedVolume(TypedDict, total=False): """Volume size in whole tebibytes (TiB).""" volume_name: Required[str] + """Customizable name of the volume to create.""" diff --git a/src/together/types/beta/cluster_list_regions_response.py b/src/together/types/beta/cluster_list_regions_response.py index 8aedaf3b..3755b494 100644 --- a/src/together/types/beta/cluster_list_regions_response.py +++ b/src/together/types/beta/cluster_list_regions_response.py @@ -1,6 +1,6 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import List +from typing import List, Optional from ..._models import BaseModel @@ -8,13 +8,14 @@ class Region(BaseModel): - id: str - - availability_zones: List[str] - driver_versions: List[str] + """List of supported identifiable driver versions available in the region.""" name: str + """Identifiable name of the region.""" + + supported_instance_types: Optional[List[str]] = None + """List of supported identifiable gpus available in the region.""" class ClusterListRegionsResponse(BaseModel): diff --git a/src/together/types/beta/cluster_update_params.py b/src/together/types/beta/cluster_update_params.py index 1394c896..67c3a698 100644 --- a/src/together/types/beta/cluster_update_params.py +++ b/src/together/types/beta/cluster_update_params.py @@ -9,5 +9,10 @@ class ClusterUpdateParams(TypedDict, total=False): cluster_type: Literal["KUBERNETES", "SLURM"] + """Type of cluster to update.""" num_gpus: int + """Number of GPUs to allocate in the cluster. + + This must be multiple of 8. For example, 8, 16 or 24 + """ diff --git a/src/together/types/beta/clusters/cluster_storage.py b/src/together/types/beta/clusters/cluster_storage.py index 10687cec..6d7a0bfe 100644 --- a/src/together/types/beta/clusters/cluster_storage.py +++ b/src/together/types/beta/clusters/cluster_storage.py @@ -9,9 +9,13 @@ class ClusterStorage(BaseModel): size_tib: int + """Size of the volume in whole tebibytes (TiB).""" status: Literal["available", "bound", "provisioning"] + """Deployment status of the volume.""" volume_id: str + """ID of the volume.""" volume_name: str + """Provided name of the volume.""" diff --git a/src/together/types/beta/clusters/storage_create_params.py b/src/together/types/beta/clusters/storage_create_params.py index 876e4034..5629cb11 100644 --- a/src/together/types/beta/clusters/storage_create_params.py +++ b/src/together/types/beta/clusters/storage_create_params.py @@ -15,3 +15,4 @@ class StorageCreateParams(TypedDict, total=False): """Volume size in whole tebibytes (TiB).""" volume_name: Required[str] + """Customizable name of the volume to create.""" diff --git a/src/together/types/beta/clusters/storage_update_params.py b/src/together/types/beta/clusters/storage_update_params.py index 754a7baa..449a6266 100644 --- a/src/together/types/beta/clusters/storage_update_params.py +++ b/src/together/types/beta/clusters/storage_update_params.py @@ -9,5 +9,7 @@ class StorageUpdateParams(TypedDict, total=False): size_tib: int + """Size of the volume in whole tebibytes (TiB).""" volume_id: str + """ID of the volume to update.""" diff --git a/src/together/types/beta/deployment.py b/src/together/types/beta/deployment.py index ca693ec8..30d77f04 100644 --- a/src/together/types/beta/deployment.py +++ b/src/together/types/beta/deployment.py @@ -150,8 +150,8 @@ class Deployment(BaseModel): name: Optional[str] = None """Name is the name of the deployment""" - object: Optional[str] = None - """Object is the type identifier for this response (always "deployment")""" + object: Optional[Literal["deployment"]] = None + """The object type, which is always `deployment`.""" port: Optional[int] = None """Port is the container port that the deployment exposes""" diff --git a/src/together/types/beta/jig/secret.py b/src/together/types/beta/jig/secret.py index 41fe8e25..3700ebd8 100644 --- a/src/together/types/beta/jig/secret.py +++ b/src/together/types/beta/jig/secret.py @@ -1,6 +1,7 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. from typing import Optional +from typing_extensions import Literal from ...._models import BaseModel @@ -26,8 +27,8 @@ class Secret(BaseModel): name: Optional[str] = None """Name is the name/key of the secret""" - object: Optional[str] = None - """Object is the type identifier for this response (always "secret")""" + object: Optional[Literal["secret"]] = None + """The object type, which is always `secret`.""" updated_at: Optional[str] = None """UpdatedAt is the ISO8601 timestamp when this secret was last updated""" diff --git a/src/together/types/beta/jig/secret_list_response.py b/src/together/types/beta/jig/secret_list_response.py index 89e97adc..757cda11 100644 --- a/src/together/types/beta/jig/secret_list_response.py +++ b/src/together/types/beta/jig/secret_list_response.py @@ -1,6 +1,7 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. from typing import List, Optional +from typing_extensions import Literal from .secret import Secret from ...._models import BaseModel @@ -12,5 +13,5 @@ class SecretListResponse(BaseModel): data: Optional[List[Secret]] = None """Data is the array of secret items""" - object: Optional[str] = None - """Object is the type identifier for this response (always "list")""" + object: Optional[Literal["list"]] = None + """The object type, which is always `list`.""" diff --git a/src/together/types/beta/jig/volume.py b/src/together/types/beta/jig/volume.py index ce6cd171..cdf3b9f7 100644 --- a/src/together/types/beta/jig/volume.py +++ b/src/together/types/beta/jig/volume.py @@ -37,8 +37,8 @@ class Volume(BaseModel): name: Optional[str] = None """Name is the name of the volume""" - object: Optional[str] = None - """Object is the type identifier for this response (always "volume")""" + object: Optional[Literal["volume"]] = None + """The object type, which is always `volume`.""" type: Optional[Literal["readOnly"]] = None """Type is the volume type (e.g., "readOnly")""" diff --git a/src/together/types/beta/jig/volume_list_response.py b/src/together/types/beta/jig/volume_list_response.py index be4923c5..896c8d38 100644 --- a/src/together/types/beta/jig/volume_list_response.py +++ b/src/together/types/beta/jig/volume_list_response.py @@ -1,6 +1,7 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. from typing import List, Optional +from typing_extensions import Literal from .volume import Volume from ...._models import BaseModel @@ -12,5 +13,5 @@ class VolumeListResponse(BaseModel): data: Optional[List[Volume]] = None """Data is the array of volume items""" - object: Optional[str] = None - """Object is the type identifier for this response (always "list")""" + object: Optional[Literal["list"]] = None + """The object type, which is always `list`.""" diff --git a/src/together/types/beta/jig_list_response.py b/src/together/types/beta/jig_list_response.py index 85ff3b24..410946d6 100644 --- a/src/together/types/beta/jig_list_response.py +++ b/src/together/types/beta/jig_list_response.py @@ -1,6 +1,7 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. from typing import List, Optional +from typing_extensions import Literal from ..._models import BaseModel from .deployment import Deployment @@ -12,5 +13,5 @@ class JigListResponse(BaseModel): data: Optional[List[Deployment]] = None """Data is the array of deployment items""" - object: Optional[str] = None - """Object is the type identifier for this response (always "list")""" + object: Optional[Literal["list"]] = None + """The object type, which is always `list`.""" diff --git a/src/together/types/chat/chat_completion.py b/src/together/types/chat/chat_completion.py index aeda562b..840d4cc5 100644 --- a/src/together/types/chat/chat_completion.py +++ b/src/together/types/chat/chat_completion.py @@ -54,6 +54,7 @@ class ChatCompletion(BaseModel): model: str object: Literal["chat.completion"] + """The object type, which is always `chat.completion`.""" usage: Optional[ChatCompletionUsage] = None diff --git a/src/together/types/chat/chat_completion_chunk.py b/src/together/types/chat/chat_completion_chunk.py index 566f8d5d..eeb26959 100644 --- a/src/together/types/chat/chat_completion_chunk.py +++ b/src/together/types/chat/chat_completion_chunk.py @@ -53,6 +53,7 @@ class ChatCompletionChunk(BaseModel): model: str object: Literal["chat.completion.chunk"] + """The object type, which is always `chat.completion.chunk`.""" system_fingerprint: Optional[str] = None diff --git a/src/together/types/chat/completion_create_params.py b/src/together/types/chat/completion_create_params.py index e572a0d6..26075cfc 100644 --- a/src/together/types/chat/completion_create_params.py +++ b/src/together/types/chat/completion_create_params.py @@ -62,6 +62,7 @@ class CompletionCreateParamsBase(TypedDict, total=False): """ chat_template_kwargs: object + """Additional configuration to pass to model engine.""" compliance: Literal["hipaa"] @@ -113,6 +114,10 @@ class CompletionCreateParamsBase(TypedDict, total=False): """ reasoning: Reasoning + """ + For models that support toggling reasoning functionality, this object can be + used to control that functionality. + """ reasoning_effort: Literal["low", "medium", "high"] """ @@ -318,12 +323,12 @@ class FunctionCallName(TypedDict, total=False): class Reasoning(TypedDict, total=False): - enabled: bool """ - For models that support toggling reasoning functionality, this object can be - used to control that functionality. + For models that support toggling reasoning functionality, this object can be used to control that functionality. """ + enabled: bool + class ResponseFormatText(TypedDict, total=False): """Default response format. Used to generate text responses.""" diff --git a/src/together/types/completion.py b/src/together/types/completion.py index dd5e039f..174f316e 100644 --- a/src/together/types/completion.py +++ b/src/together/types/completion.py @@ -36,6 +36,7 @@ class Completion(BaseModel): model: str object: Literal["text.completion"] + """The object type, which is always `text.completion`.""" usage: Optional[ChatCompletionUsage] = None diff --git a/src/together/types/completion_chunk.py b/src/together/types/completion_chunk.py index 51f475c4..a39bd23c 100644 --- a/src/together/types/completion_chunk.py +++ b/src/together/types/completion_chunk.py @@ -62,5 +62,6 @@ class CompletionChunk(BaseModel): created: Optional[int] = None object: Optional[Literal["completion.chunk"]] = None + """The object type, which is always `completion.chunk`.""" seed: Optional[int] = None diff --git a/src/together/types/dedicated_endpoint.py b/src/together/types/dedicated_endpoint.py index 424b01b4..996c433b 100644 --- a/src/together/types/dedicated_endpoint.py +++ b/src/together/types/dedicated_endpoint.py @@ -34,7 +34,7 @@ class DedicatedEndpoint(BaseModel): """System name for the endpoint""" object: Literal["endpoint"] - """The type of object""" + """The object type, which is always `endpoint`.""" owner: str """The owner of this endpoint""" diff --git a/src/together/types/embedding.py b/src/together/types/embedding.py index d2b009ad..b93f523c 100644 --- a/src/together/types/embedding.py +++ b/src/together/types/embedding.py @@ -14,6 +14,7 @@ class Data(BaseModel): index: int object: Literal["embedding"] + """The object type, which is always `embedding`.""" class Embedding(BaseModel): @@ -22,3 +23,4 @@ class Embedding(BaseModel): model: str object: Literal["list"] + """The object type, which is always `list`.""" diff --git a/src/together/types/endpoint_list_hardware_response.py b/src/together/types/endpoint_list_hardware_response.py index 87f08624..03584105 100644 --- a/src/together/types/endpoint_list_hardware_response.py +++ b/src/together/types/endpoint_list_hardware_response.py @@ -46,6 +46,7 @@ class Data(BaseModel): """Unique identifier for the hardware configuration""" object: Literal["hardware"] + """The object type, which is always `hardware`.""" pricing: DataPricing """Pricing details for using an endpoint""" @@ -64,3 +65,4 @@ class EndpointListHardwareResponse(BaseModel): data: List[Data] object: Literal["list"] + """The object type, which is always `list`.""" diff --git a/src/together/types/endpoint_list_response.py b/src/together/types/endpoint_list_response.py index 534d84da..27dc70e7 100644 --- a/src/together/types/endpoint_list_response.py +++ b/src/together/types/endpoint_list_response.py @@ -25,7 +25,7 @@ class Data(BaseModel): """System name for the endpoint""" object: Literal["endpoint"] - """The type of object""" + """The object type, which is always `endpoint`.""" owner: str """The owner of this endpoint""" @@ -41,3 +41,4 @@ class EndpointListResponse(BaseModel): data: List[Data] object: Literal["list"] + """The object type, which is always `list`.""" diff --git a/src/together/types/eval_list_params.py b/src/together/types/eval_list_params.py index ad0d8021..87170b11 100644 --- a/src/together/types/eval_list_params.py +++ b/src/together/types/eval_list_params.py @@ -2,20 +2,14 @@ from __future__ import annotations -from typing_extensions import Annotated, TypedDict - -from .._utils import PropertyInfo +from typing_extensions import TypedDict __all__ = ["EvalListParams"] class EvalListParams(TypedDict, total=False): limit: int + """Limit the number of results""" status: str - - user_id: Annotated[str, PropertyInfo(alias="userId")] - """Admin users can specify a user ID to filter jobs. - - Pass empty string to get all jobs. - """ + """Filter evaluation jobs by status""" diff --git a/src/together/types/file_response.py b/src/together/types/file_response.py index 0468630f..e8f7931f 100644 --- a/src/together/types/file_response.py +++ b/src/together/types/file_response.py @@ -1,5 +1,7 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. +from typing_extensions import Literal + from pydantic import Field as FieldInfo from .._models import BaseModel @@ -10,22 +12,28 @@ class FileResponse(BaseModel): + """Structured information describing a file uploaded to Together.""" + id: str + """ID of the file.""" bytes: int + """The number of bytes in the file.""" created_at: int + """The timestamp when the file was created.""" filename: str + """The name of the file as it was uploaded.""" file_type: FileType = FieldInfo(alias="FileType") - """The type of the file""" - - line_count: int = FieldInfo(alias="LineCount") + """The type of the file such as `jsonl`, `csv`, or `parquet`.""" - object: str + object: Literal["file"] + """The object type, which is always `file`.""" processed: bool = FieldInfo(alias="Processed") + """Whether the file has been parsed and analyzed for correctness for fine-tuning.""" purpose: FilePurpose - """The purpose of the file""" + """The purpose of the file as it was uploaded.""" diff --git a/src/together/types/fine_tuning_delete_params.py b/src/together/types/fine_tuning_delete_params.py index d5343a86..734158d6 100644 --- a/src/together/types/fine_tuning_delete_params.py +++ b/src/together/types/fine_tuning_delete_params.py @@ -9,3 +9,4 @@ class FineTuningDeleteParams(TypedDict, total=False): force: bool + """Deprecated and unused parameter.""" diff --git a/src/together/types/finetune_event.py b/src/together/types/finetune_event.py index 58234de7..1db1fba4 100644 --- a/src/together/types/finetune_event.py +++ b/src/together/types/finetune_event.py @@ -23,6 +23,7 @@ class FinetuneEvent(BaseModel): x_model_path: str = FieldInfo(alias="model_path") object: Literal["fine-tune-event"] + """The object type, which is always `fine-tune-event`.""" param_count: int diff --git a/src/together/types/image_file.py b/src/together/types/image_file.py index cc7949f3..d7579657 100644 --- a/src/together/types/image_file.py +++ b/src/together/types/image_file.py @@ -21,3 +21,4 @@ class ImageFile(BaseModel): model: str object: Literal["list"] + """The object type, which is always `list`.""" diff --git a/src/together/types/model_object.py b/src/together/types/model_object.py index b0c4d432..69cb2266 100644 --- a/src/together/types/model_object.py +++ b/src/together/types/model_object.py @@ -25,7 +25,8 @@ class ModelObject(BaseModel): created: int - object: str + object: Literal["model"] + """The object type, which is always `model`.""" type: Literal["chat", "language", "code", "image", "embedding", "moderation", "rerank"] diff --git a/src/together/types/rerank_create_response.py b/src/together/types/rerank_create_response.py index 8002027e..71e5afe1 100644 --- a/src/together/types/rerank_create_response.py +++ b/src/together/types/rerank_create_response.py @@ -26,7 +26,7 @@ class RerankCreateResponse(BaseModel): """The model to be used for the rerank request.""" object: Literal["rerank"] - """Object type""" + """The object type, which is always `rerank`.""" results: List[Result] diff --git a/tests/api_resources/beta/test_clusters.py b/tests/api_resources/beta/test_clusters.py index ddd4fdf7..f3731d56 100644 --- a/tests/api_resources/beta/test_clusters.py +++ b/tests/api_resources/beta/test_clusters.py @@ -30,7 +30,7 @@ def test_method_create(self, client: Together) -> None: driver_version="CUDA_12_5_555", gpu_type="H100_SXM", num_gpus=0, - region="us-central-8", + region="region", ) assert_matches_type(Cluster, cluster, path=["response"]) @@ -42,7 +42,7 @@ def test_method_create_with_all_params(self, client: Together) -> None: driver_version="CUDA_12_5_555", gpu_type="H100_SXM", num_gpus=0, - region="us-central-8", + region="region", cluster_type="KUBERNETES", duration_days=0, shared_volume={ @@ -62,7 +62,7 @@ def test_raw_response_create(self, client: Together) -> None: driver_version="CUDA_12_5_555", gpu_type="H100_SXM", num_gpus=0, - region="us-central-8", + region="region", ) assert response.is_closed is True @@ -78,7 +78,7 @@ def test_streaming_response_create(self, client: Together) -> None: driver_version="CUDA_12_5_555", gpu_type="H100_SXM", num_gpus=0, - region="us-central-8", + region="region", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -275,7 +275,7 @@ async def test_method_create(self, async_client: AsyncTogether) -> None: driver_version="CUDA_12_5_555", gpu_type="H100_SXM", num_gpus=0, - region="us-central-8", + region="region", ) assert_matches_type(Cluster, cluster, path=["response"]) @@ -287,7 +287,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncTogether) driver_version="CUDA_12_5_555", gpu_type="H100_SXM", num_gpus=0, - region="us-central-8", + region="region", cluster_type="KUBERNETES", duration_days=0, shared_volume={ @@ -307,7 +307,7 @@ async def test_raw_response_create(self, async_client: AsyncTogether) -> None: driver_version="CUDA_12_5_555", gpu_type="H100_SXM", num_gpus=0, - region="us-central-8", + region="region", ) assert response.is_closed is True @@ -323,7 +323,7 @@ async def test_streaming_response_create(self, async_client: AsyncTogether) -> N driver_version="CUDA_12_5_555", gpu_type="H100_SXM", num_gpus=0, - region="us-central-8", + region="region", ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" diff --git a/tests/api_resources/code_interpreter/test_sessions.py b/tests/api_resources/code_interpreter/test_sessions.py index 6b0281a6..687efd2a 100644 --- a/tests/api_resources/code_interpreter/test_sessions.py +++ b/tests/api_resources/code_interpreter/test_sessions.py @@ -17,13 +17,13 @@ class TestSessions: parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) - @pytest.mark.skip(reason="Prism doesn't support callbacks yet") + @pytest.mark.skip(reason="Mock server doesn't support callbacks yet") @parametrize def test_method_list(self, client: Together) -> None: session = client.code_interpreter.sessions.list() assert_matches_type(SessionListResponse, session, path=["response"]) - @pytest.mark.skip(reason="Prism doesn't support callbacks yet") + @pytest.mark.skip(reason="Mock server doesn't support callbacks yet") @parametrize def test_raw_response_list(self, client: Together) -> None: response = client.code_interpreter.sessions.with_raw_response.list() @@ -33,7 +33,7 @@ def test_raw_response_list(self, client: Together) -> None: session = response.parse() assert_matches_type(SessionListResponse, session, path=["response"]) - @pytest.mark.skip(reason="Prism doesn't support callbacks yet") + @pytest.mark.skip(reason="Mock server doesn't support callbacks yet") @parametrize def test_streaming_response_list(self, client: Together) -> None: with client.code_interpreter.sessions.with_streaming_response.list() as response: @@ -51,13 +51,13 @@ class TestAsyncSessions: "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"] ) - @pytest.mark.skip(reason="Prism doesn't support callbacks yet") + @pytest.mark.skip(reason="Mock server doesn't support callbacks yet") @parametrize async def test_method_list(self, async_client: AsyncTogether) -> None: session = await async_client.code_interpreter.sessions.list() assert_matches_type(SessionListResponse, session, path=["response"]) - @pytest.mark.skip(reason="Prism doesn't support callbacks yet") + @pytest.mark.skip(reason="Mock server doesn't support callbacks yet") @parametrize async def test_raw_response_list(self, async_client: AsyncTogether) -> None: response = await async_client.code_interpreter.sessions.with_raw_response.list() @@ -67,7 +67,7 @@ async def test_raw_response_list(self, async_client: AsyncTogether) -> None: session = await response.parse() assert_matches_type(SessionListResponse, session, path=["response"]) - @pytest.mark.skip(reason="Prism doesn't support callbacks yet") + @pytest.mark.skip(reason="Mock server doesn't support callbacks yet") @parametrize async def test_streaming_response_list(self, async_client: AsyncTogether) -> None: async with async_client.code_interpreter.sessions.with_streaming_response.list() as response: diff --git a/tests/api_resources/test_code_interpreter.py b/tests/api_resources/test_code_interpreter.py index c726e560..2d06e917 100644 --- a/tests/api_resources/test_code_interpreter.py +++ b/tests/api_resources/test_code_interpreter.py @@ -17,7 +17,7 @@ class TestCodeInterpreter: parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) - @pytest.mark.skip(reason="Prism doesn't support callbacks yet") + @pytest.mark.skip(reason="Mock server doesn't support callbacks yet") @parametrize def test_method_execute(self, client: Together) -> None: code_interpreter = client.code_interpreter.execute( @@ -26,7 +26,7 @@ def test_method_execute(self, client: Together) -> None: ) assert_matches_type(ExecuteResponse, code_interpreter, path=["response"]) - @pytest.mark.skip(reason="Prism doesn't support callbacks yet") + @pytest.mark.skip(reason="Mock server doesn't support callbacks yet") @parametrize def test_method_execute_with_all_params(self, client: Together) -> None: code_interpreter = client.code_interpreter.execute( @@ -43,7 +43,7 @@ def test_method_execute_with_all_params(self, client: Together) -> None: ) assert_matches_type(ExecuteResponse, code_interpreter, path=["response"]) - @pytest.mark.skip(reason="Prism doesn't support callbacks yet") + @pytest.mark.skip(reason="Mock server doesn't support callbacks yet") @parametrize def test_raw_response_execute(self, client: Together) -> None: response = client.code_interpreter.with_raw_response.execute( @@ -56,7 +56,7 @@ def test_raw_response_execute(self, client: Together) -> None: code_interpreter = response.parse() assert_matches_type(ExecuteResponse, code_interpreter, path=["response"]) - @pytest.mark.skip(reason="Prism doesn't support callbacks yet") + @pytest.mark.skip(reason="Mock server doesn't support callbacks yet") @parametrize def test_streaming_response_execute(self, client: Together) -> None: with client.code_interpreter.with_streaming_response.execute( @@ -77,7 +77,7 @@ class TestAsyncCodeInterpreter: "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"] ) - @pytest.mark.skip(reason="Prism doesn't support callbacks yet") + @pytest.mark.skip(reason="Mock server doesn't support callbacks yet") @parametrize async def test_method_execute(self, async_client: AsyncTogether) -> None: code_interpreter = await async_client.code_interpreter.execute( @@ -86,7 +86,7 @@ async def test_method_execute(self, async_client: AsyncTogether) -> None: ) assert_matches_type(ExecuteResponse, code_interpreter, path=["response"]) - @pytest.mark.skip(reason="Prism doesn't support callbacks yet") + @pytest.mark.skip(reason="Mock server doesn't support callbacks yet") @parametrize async def test_method_execute_with_all_params(self, async_client: AsyncTogether) -> None: code_interpreter = await async_client.code_interpreter.execute( @@ -103,7 +103,7 @@ async def test_method_execute_with_all_params(self, async_client: AsyncTogether) ) assert_matches_type(ExecuteResponse, code_interpreter, path=["response"]) - @pytest.mark.skip(reason="Prism doesn't support callbacks yet") + @pytest.mark.skip(reason="Mock server doesn't support callbacks yet") @parametrize async def test_raw_response_execute(self, async_client: AsyncTogether) -> None: response = await async_client.code_interpreter.with_raw_response.execute( @@ -116,7 +116,7 @@ async def test_raw_response_execute(self, async_client: AsyncTogether) -> None: code_interpreter = await response.parse() assert_matches_type(ExecuteResponse, code_interpreter, path=["response"]) - @pytest.mark.skip(reason="Prism doesn't support callbacks yet") + @pytest.mark.skip(reason="Mock server doesn't support callbacks yet") @parametrize async def test_streaming_response_execute(self, async_client: AsyncTogether) -> None: async with async_client.code_interpreter.with_streaming_response.execute( diff --git a/tests/api_resources/test_endpoints.py b/tests/api_resources/test_endpoints.py index a0467a6b..f040dfab 100644 --- a/tests/api_resources/test_endpoints.py +++ b/tests/api_resources/test_endpoints.py @@ -281,7 +281,7 @@ def test_method_list_hardware(self, client: Together) -> None: @parametrize def test_method_list_hardware_with_all_params(self, client: Together) -> None: endpoint = client.endpoints.list_hardware( - model="model", + model="meta-llama/Llama-3-70b-chat-hf", ) assert_matches_type(EndpointListHardwareResponse, endpoint, path=["response"]) @@ -570,7 +570,7 @@ async def test_method_list_hardware(self, async_client: AsyncTogether) -> None: @parametrize async def test_method_list_hardware_with_all_params(self, async_client: AsyncTogether) -> None: endpoint = await async_client.endpoints.list_hardware( - model="model", + model="meta-llama/Llama-3-70b-chat-hf", ) assert_matches_type(EndpointListHardwareResponse, endpoint, path=["response"]) diff --git a/tests/api_resources/test_evals.py b/tests/api_resources/test_evals.py index c2c81390..4a88b93f 100644 --- a/tests/api_resources/test_evals.py +++ b/tests/api_resources/test_evals.py @@ -155,7 +155,6 @@ def test_method_list_with_all_params(self, client: Together) -> None: eval = client.evals.list( limit=0, status="status", - user_id="userId", ) assert_matches_type(EvalListResponse, eval, path=["response"]) @@ -356,7 +355,6 @@ async def test_method_list_with_all_params(self, async_client: AsyncTogether) -> eval = await async_client.evals.list( limit=0, status="status", - user_id="userId", ) assert_matches_type(EvalListResponse, eval, path=["response"]) diff --git a/tests/integration/resources/test_files.py b/tests/integration/resources/test_files.py index 2e713040..96c52dca 100644 --- a/tests/integration/resources/test_files.py +++ b/tests/integration/resources/test_files.py @@ -42,7 +42,6 @@ def test_file_upload( assert isinstance(response, FileResponse) assert response.filename == "valid.jsonl" assert response.file_type == "jsonl" - assert response.line_count == 0 assert response.object == "file" assert response.processed == True assert response.purpose == "fine-tune" diff --git a/tests/unit/test_files_resource.py b/tests/unit/test_files_resource.py index c7d63eec..0e95d641 100644 --- a/tests/unit/test_files_resource.py +++ b/tests/unit/test_files_resource.py @@ -47,7 +47,6 @@ def test_file_upload(mocker: MockerFixture, tmp_path: Path): "created_at": 1234567890, "filename": "valid.jsonl", "FileType": "jsonl", - "LineCount": 0, "purpose": "fine-tune", "object": "file", "Processed": True, @@ -80,7 +79,6 @@ def test_file_upload(mocker: MockerFixture, tmp_path: Path): assert response.bytes == len(content_bytes) assert response.created_at == 1234567890 assert response.file_type == "jsonl" - assert response.line_count == 0 assert response.object == "file" assert response.processed == True assert response.purpose == "fine-tune" diff --git a/tests/unit/test_multipart_upload_manager.py b/tests/unit/test_multipart_upload_manager.py index 9126dd8f..4e8f06c9 100644 --- a/tests/unit/test_multipart_upload_manager.py +++ b/tests/unit/test_multipart_upload_manager.py @@ -41,6 +41,6 @@ def test_file_size_exceeds_limit_raises_error(mock_stat: MagicMock): manager = MultipartUploadManager(MagicMock()) with pytest.raises(FileTypeError) as exc_info: - manager.upload("/files", Path("too_large.jsonl"), "fine-tune") + manager.upload("/files", Path("too_large.jsonl"), "checksum", "fine-tune") assert "exceeds maximum supported size" in str(exc_info.value) diff --git a/uv.lock b/uv.lock index e177be45..50042913 100644 --- a/uv.lock +++ b/uv.lock @@ -2040,7 +2040,7 @@ wheels = [ [[package]] name = "together" -version = "2.0.0" +version = "2.1.1" source = { editable = "." } dependencies = [ { name = "anyio" },