Skip to content

Commit 8122a49

Browse files
feat(platform, application): introduce flex start (#292)
* feat(platform, application): introduce flex start * test(platform): test flex start on staging
1 parent 49941e0 commit 8122a49

File tree

15 files changed

+35384
-42
lines changed

15 files changed

+35384
-42
lines changed

ATTRIBUTIONS.md

Lines changed: 34514 additions & 1 deletion
Large diffs are not rendered by default.

CLI_REFERENCE.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -215,10 +215,11 @@ $ aignostics application run execute [OPTIONS] APPLICATION_ID METADATA_CSV_FILE
215215
* `--onboard-to-aignostics-portal / --no-onboard-to-aignostics-portal`: If True, onboard the run to the Aignostics Portal. [default: no-onboard-to-aignostics-portal]
216216
* `--validate-only / --no-validate-only`: If True, cancel the run post validation, before analysis. [default: no-validate-only]
217217
* `--gpu-type TEXT`: GPU type to use for processing (L4 or A100). [default: L4]
218-
* `--gpu-provisioning-mode TEXT`: GPU provisioning mode (SPOT or ON_DEMAND). [default: SPOT]
218+
* `--gpu-provisioning-mode TEXT`: GPU provisioning mode (SPOT, ON_DEMAND, or FLEX_START). [default: SPOT]
219219
* `--max-gpus-per-slide INTEGER RANGE`: Maximum number of GPUs to allocate per slide (1-8). [default: 1; 1<=x<=8]
220+
* `--flex-start-max-run-duration-minutes INTEGER RANGE`: Maximum run duration in minutes when using FLEX_START provisioning mode (1-3600). Ignored when gpu_provisioning_mode is not FLEX_START. [default: 720; 1<=x<=3600]
220221
* `--cpu-provisioning-mode TEXT`: CPU provisioning mode (SPOT or ON_DEMAND). [default: SPOT]
221-
* `--node-acquisition-timeout-minutes INTEGER RANGE`: Timeout for acquiring compute nodes in minutes (1-1440). [default: 30; 1<=x<=1440]
222+
* `--node-acquisition-timeout-minutes INTEGER RANGE`: Timeout for acquiring compute nodes in minutes (1-3600). [default: 30; 1<=x<=3600]
222223
* `--help`: Show this message and exit.
223224

224225
#### `aignostics application run prepare`
@@ -309,10 +310,11 @@ $ aignostics application run submit [OPTIONS] APPLICATION_ID METADATA_CSV_FILE
309310
* `--onboard-to-aignostics-portal / --no-onboard-to-aignostics-portal`: If True, onboard the run to the Aignostics Portal. [default: no-onboard-to-aignostics-portal]
310311
* `--validate-only / --no-validate-only`: If True, cancel the run post validation, before analysis. [default: no-validate-only]
311312
* `--gpu-type TEXT`: GPU type to use for processing (L4 or A100). [default: L4]
312-
* `--gpu-provisioning-mode TEXT`: GPU provisioning mode (SPOT or ON_DEMAND). [default: SPOT]
313+
* `--gpu-provisioning-mode TEXT`: GPU provisioning mode (SPOT, ON_DEMAND, or FLEX_START). [default: SPOT]
313314
* `--max-gpus-per-slide INTEGER RANGE`: Maximum number of GPUs to allocate per slide (1-8). [default: 1; 1<=x<=8]
315+
* `--flex-start-max-run-duration-minutes INTEGER RANGE`: Maximum run duration in minutes when using FLEX_START provisioning mode (1-3600). Ignored when gpu_provisioning_mode is not FLEX_START. [default: 720; 1<=x<=3600]
314316
* `--cpu-provisioning-mode TEXT`: CPU provisioning mode (SPOT or ON_DEMAND). [default: SPOT]
315-
* `--node-acquisition-timeout-minutes INTEGER RANGE`: Timeout for acquiring compute nodes in minutes (1-1440). [default: 30; 1<=x<=1440]
317+
* `--node-acquisition-timeout-minutes INTEGER RANGE`: Timeout for acquiring compute nodes in minutes (1-3600). [default: 30; 1<=x<=3600]
316318
* `--help`: Show this message and exit.
317319

318320
#### `aignostics application run list`
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"$defs": {"PlatformBucketMetadata": {"description": "Platform bucket storage metadata for items.", "properties": {"bucket_name": {"description": "Name of the cloud storage bucket", "title": "Bucket Name", "type": "string"}, "object_key": {"description": "Object key/path within the bucket", "title": "Object Key", "type": "string"}, "signed_download_url": {"description": "Signed URL for downloading the object", "title": "Signed Download Url", "type": "string"}}, "required": ["bucket_name", "object_key", "signed_download_url"], "title": "PlatformBucketMetadata", "type": "object"}}, "additionalProperties": false, "description": "Complete Item SDK metadata schema.\n\nThis model defines the structure and validation rules for SDK metadata\nthat is attached to individual items within application runs. It includes\ninformation about where the item is stored in the platform's cloud storage.", "properties": {"schema_version": {"description": "Schema version for this metadata format", "pattern": "^\\d+\\.\\d+\\.\\d+-?.*$", "title": "Schema Version", "type": "string"}, "created_at": {"description": "ISO 8601 timestamp when the metadata was first created", "title": "Created At", "type": "string"}, "updated_at": {"description": "ISO 8601 timestamp when the metadata was last updated", "title": "Updated At", "type": "string"}, "tags": {"anyOf": [{"items": {"type": "string"}, "type": "array", "uniqueItems": true}, {"type": "null"}], "default": null, "description": "Optional list of tags associated with the item", "title": "Tags"}, "platform_bucket": {"anyOf": [{"$ref": "#/$defs/PlatformBucketMetadata"}, {"type": "null"}], "default": null, "description": "Platform bucket storage information"}}, "required": ["schema_version", "created_at", "updated_at"], "title": "ItemSdkMetadata", "type": "object", "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://raw.githubusercontent.com/aignostics/python-sdk/main/docs/source/_static/item_sdk_metadata_schema_v0.0.3.json"}

docs/source/_static/sdk_run_custom_metadata_schema_latest.json

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
},
5252
"provisioning_mode": {
5353
"$ref": "#/$defs/ProvisioningMode",
54-
"description": "The provisioning mode for GPU resources (SPOT or ON_DEMAND)"
54+
"description": "The provisioning mode for GPU resources (SPOT, ON_DEMAND, or FLEX_START)"
5555
},
5656
"max_gpus_per_slide": {
5757
"default": 1,
@@ -60,6 +60,21 @@
6060
"minimum": 1,
6161
"title": "Max Gpus Per Slide",
6262
"type": "integer"
63+
},
64+
"flex_start_max_run_duration_minutes": {
65+
"anyOf": [
66+
{
67+
"maximum": 3600,
68+
"minimum": 1,
69+
"type": "integer"
70+
},
71+
{
72+
"type": "null"
73+
}
74+
],
75+
"default": null,
76+
"description": "Maximum run duration in minutes when using FLEX_START provisioning mode (1-3600). Required when provisioning_mode is FLEX_START, must be None otherwise.",
77+
"title": "Flex Start Max Run Duration Minutes"
6378
}
6479
},
6580
"title": "GPUConfig",
@@ -270,8 +285,8 @@
270285
},
271286
"node_acquisition_timeout_minutes": {
272287
"default": 30,
273-
"description": "Timeout for acquiring compute nodes in minutes (1-1440)",
274-
"maximum": 1440,
288+
"description": "Timeout for acquiring compute nodes in minutes (1-3600)",
289+
"maximum": 3600,
275290
"minimum": 1,
276291
"title": "Node Acquisition Timeout Minutes",
277292
"type": "integer"
@@ -284,7 +299,8 @@
284299
"description": "Provisioning mode for resources.",
285300
"enum": [
286301
"SPOT",
287-
"ON_DEMAND"
302+
"ON_DEMAND",
303+
"FLEX_START"
288304
],
289305
"title": "ProvisioningMode",
290306
"type": "string"
@@ -572,5 +588,5 @@
572588
"title": "RunSdkMetadata",
573589
"type": "object",
574590
"$schema": "https://json-schema.org/draft/2020-12/schema",
575-
"$id": "https://raw.githubusercontent.com/aignostics/python-sdk/main/docs/source/_static/sdk_metadata_schema_v0.0.5.json"
591+
"$id": "https://raw.githubusercontent.com/aignostics/python-sdk/main/docs/source/_static/sdk_metadata_schema_v0.0.6.json"
576592
}

0 commit comments

Comments
 (0)