diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index a893f710..0ace6595 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -350,7 +350,23 @@ jobs: - name: Run Python tests run: | - uv run pytest tests/python/ -v --tb=short + uv run python - <<'PY' + import contextlib + import os + import sys + + import pytest + + exit_code = pytest.main(["tests/python/", "-v", "--tb=short"]) + if "tensorcast._C" in sys.modules: + with contextlib.suppress(Exception): + from tensorcast._c_ext import get_c_ext + + get_c_ext().shutdown_native_runtime() + sys.stdout.flush() + sys.stderr.flush() + os._exit(int(exit_code)) + PY env: LD_LIBRARY_PATH: ${{ github.workspace }}/tensorcast/lib:${{ env.LD_LIBRARY_PATH }} TENSORCAST_CUDA_BACKEND: fake diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 404c3d64..6b84e31a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,7 @@ default_install_hook_types: - pre-commit - commit-msg + - pre-push exclude: ^.github/actions/assigner/dist repos: - repo: https://github.com/pre-commit/pre-commit-hooks @@ -90,6 +91,20 @@ repos: - --use-current-year - repo: local hooks: + - id: pyright + name: pyright (tensorcast) + language: system + entry: env UV_NO_SYNC=1 uv run pyright ./tensorcast + pass_filenames: false + files: ^(tensorcast/.*\.py|pyproject\.toml)$ + stages: [pre-push] + - id: mypy + name: mypy (tensorcast) + language: system + entry: env UV_NO_SYNC=1 uv run mypy ./tensorcast + pass_filenames: false + files: ^(tensorcast/.*\.py|pyproject\.toml)$ + stages: [pre-push] - id: webui-prettier name: webui-prettier-check language: system diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index dc751391..dae468a5 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -34,6 +34,14 @@ ruff check . ruff format . --check ``` +The pre-push hook also runs the CI-matching package type checks through the +project `uv` environment: + +```bash +pyright ./tensorcast +mypy ./tensorcast +``` + If you modify protocol buffers, regenerate Python stubs and C++ headers: ```bash diff --git a/daemon/BUILD b/daemon/BUILD index 9c8222b6..cb56bdc8 100644 --- a/daemon/BUILD +++ b/daemon/BUILD @@ -1683,6 +1683,7 @@ sc_cc_library( "//proto/tensorcast/global_store/v1:global_store_grpc_cc", "@abseil-cpp//absl/strings", "@abseil-cpp//absl/types:span", + "@protobuf", ], ) @@ -1965,7 +1966,9 @@ cc_test( name = "pid_monitor_unwatch_integration_test", srcs = ["state/pid_monitor_unwatch_integration_test.cc"], deps = [ + ":handle_lease_registry_lib", ":ipc_region_registry_lib", + ":lifecycle_kernel_lib", ":lip_manager_lib", ":pid_monitor_lib", ":ref_tracker_hdr", @@ -1973,6 +1976,7 @@ cc_test( ":session_lifecycle_lib", ":session_manager_hdr", "//core/store:device_registry", + "//core/store:store_engine", "@catch2//:catch2_main", ], ) diff --git a/daemon/service/controllers/materialization_controller.cc b/daemon/service/controllers/materialization_controller.cc index 484f5953..8bef4c58 100644 --- a/daemon/service/controllers/materialization_controller.cc +++ b/daemon/service/controllers/materialization_controller.cc @@ -625,6 +625,7 @@ grpc::Status MaterializationController::prefetch_serving_binding( status->set_message(failed ? "serving binding set materialization failed" : "serving binding set is local-ready"); status->set_progress(1.0); status->mutable_result()->PackFrom(set_result); + attach_controller_realization_plan_span_attrs(rctx, realization_plan); rctx.mark_success(); return grpc::Status::OK; } diff --git a/daemon/service/controllers/materialization_policy_utils.cc b/daemon/service/controllers/materialization_policy_utils.cc index 79786675..c6b187c6 100644 --- a/daemon/service/controllers/materialization_policy_utils.cc +++ b/daemon/service/controllers/materialization_policy_utils.cc @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -18,6 +19,9 @@ #include "core/common/artifact_hash.h" #include "core/store/materialization/dataplane/view/view_identity.h" #include "daemon/service/rpc_context.h" +#include "google/protobuf/io/coded_stream.h" +#include "google/protobuf/io/zero_copy_stream_impl_lite.h" +#include "google/protobuf/message_lite.h" namespace tensorcast::daemon::materialization_policy { @@ -30,6 +34,7 @@ using store::loader::ViewOp; constexpr std::string_view kGroupRealizationTransportKind = "group_realization_transport"; constexpr std::string_view kGroupRealizationChildTransportRequestProfile = "tensorcast.group_realization.child_transport_request.v1"; +constexpr std::string_view kControllerSourceSelectionDigestProfile = "tensorcast.controller.source_selection_digest.v1"; store::loading::SourceLocalityHint to_source_locality(v2::SourceLocality locality) { switch (locality) { @@ -375,20 +380,124 @@ std::vector acquire_group_barriers_for(const v2::GroupRealizationAc return barriers; } +std::string serialize_deterministic(const google::protobuf::MessageLite& message) { + std::string output; + { + google::protobuf::io::StringOutputStream string_stream(&output); + google::protobuf::io::CodedOutputStream coded_stream(&string_stream); + coded_stream.SetSerializationDeterministic(true); + if (!message.SerializeToCodedStream(&coded_stream) || coded_stream.HadError()) { + return message.SerializeAsString(); + } + } + return output; +} + +void append_big_endian_u64(std::vector* out, uint64_t value) { + for (int shift = 56; shift >= 0; shift -= 8) { + out->push_back(static_cast((value >> shift) & 0xffU)); + } +} + +void append_digest_part(std::vector* out, std::string_view part) { + append_big_endian_u64(out, static_cast(part.size())); + out->insert(out->end(), part.begin(), part.end()); +} + +std::string sha256_hex_for_parts(const std::vector& parts) { + uint64_t total_size = 0; + for (std::string_view part : parts) { + total_size += 8U + static_cast(part.size()); + } + std::vector payload; + if (total_size <= static_cast(std::numeric_limits::max())) { + payload.reserve(static_cast(total_size)); + } + for (std::string_view part : parts) { + append_digest_part(&payload, part); + } + const std::vector digest = common::sha256_digest_bytes(absl::MakeConstSpan(payload)); + return absl::BytesToHexString(std::string(reinterpret_cast(digest.data()), digest.size())); +} + +std::string artifact_profile_for(std::string_view artifact_id) { + if (artifact_id.starts_with("msa1:")) { + return "mounted_source"; + } + if (artifact_id.starts_with("cgid:")) { + return "byte_artifact"; + } + return "durable_artifact"; +} + +std::string authority_scope_for(std::string_view artifact_id) { + if (artifact_id.starts_with("msa1:")) { + return "daemon_local_mounted_source"; + } + return "daemon_mediated_durable"; +} + +std::optional requested_generation_hint_for(const v2::GroupRealizationOptions* group_realization) { + if (group_realization == nullptr || !group_realization->enabled()) { + return std::nullopt; + } + if (group_realization->version().value_case() != v2::VersionReference::kKeyReference) { + return std::nullopt; + } + const v2::KeyVersionReference& key_ref = group_realization->version().key_reference(); + if (!key_ref.has_expected_generation()) { + return std::nullopt; + } + return std::to_string(key_ref.expected_generation()); +} + +std::optional requested_version_set_id_for(const v2::GroupRealizationOptions* group_realization) { + if (group_realization == nullptr || !group_realization->enabled()) { + return std::nullopt; + } + if (group_realization->version().value_case() != v2::VersionReference::kExplicitVersionSet) { + return std::nullopt; + } + const std::string& version_set_id = group_realization->version().explicit_version_set().version_set_id(); + if (version_set_id.empty()) { + return std::nullopt; + } + return version_set_id; +} + std::optional selection_digest_for( const v2::GroupRealizationOptions* group_realization, const GroupRealizationBeginContext* begin_context, const tensorcast::common::v1::ArtifactSelection& selection) { - if (begin_context != nullptr && !begin_context->selection_hash.empty()) { - return absl::BytesToHexString(begin_context->selection_hash); - } - if (!selection.selection_hash().empty()) { - return absl::BytesToHexString(selection.selection_hash()); - } - if (group_realization != nullptr && group_realization->enabled()) { + const tensorcast::common::v1::ArtifactSelection& effective_selection = + begin_context != nullptr && !begin_context->part_selection.artifact_id().empty() ? begin_context->part_selection + : selection; + if (effective_selection.artifact_id().empty()) { return std::nullopt; } - return std::nullopt; + + const std::string serialized_selection = serialize_deterministic(effective_selection); + const std::string selection_identity = begin_context != nullptr && !begin_context->selection_hash.empty() + ? begin_context->selection_hash + : effective_selection.selection_hash(); + std::string generation_hint; + if (begin_context != nullptr && begin_context->key_generation != 0) { + generation_hint = std::to_string(begin_context->key_generation); + } else if (std::optional requested_generation = requested_generation_hint_for(group_realization); + requested_generation.has_value()) { + generation_hint = *requested_generation; + } + const std::string profile = artifact_profile_for(effective_selection.artifact_id()); + const std::string scope = authority_scope_for(effective_selection.artifact_id()); + return sha256_hex_for_parts({ + kControllerSourceSelectionDigestProfile, + serialized_selection, + effective_selection.logical_layout_hash(), + selection_identity, + profile, + scope, + generation_hint, + }); } std::optional operation_id_for(const v2::MaterializeIntoTargetRequest& request) { @@ -896,7 +1005,7 @@ absl::StatusOr build_controller_realization_plan_impl .group_barriers = group_barriers_for(group_realization), .version_set_id = group_begin_context != nullptr && !group_begin_context->version_set.version_set_id().empty() ? std::optional(group_begin_context->version_set.version_set_id()) - : std::nullopt, + : requested_version_set_id_for(group_realization), .transaction_id = group_begin_context != nullptr && !group_begin_context->transaction_id.empty() ? std::optional(group_begin_context->transaction_id) : std::nullopt, @@ -956,11 +1065,15 @@ absl::StatusOr build_prefetch_target_set_realization_ : "same_daemon_session", .collective_policy = prefetch_collective_policy_for(request, member_count), .group_barriers = group_barriers_for(request.has_group_realization() ? &request.group_realization() : nullptr), - .version_set_id = std::nullopt, + .version_set_id = + requested_version_set_id_for(request.has_group_realization() ? &request.group_realization() : nullptr), .transaction_id = std::nullopt, .source_selection_digest = !request.source().artifact_selection_digest().empty() ? std::optional(request.source().artifact_selection_digest()) - : selection_digest_for(nullptr, nullptr, request.source_selection()), + : selection_digest_for( + request.has_group_realization() ? &request.group_realization() : nullptr, + nullptr, + request.source_selection()), }; plan.lifecycle = ControllerRealizationLifecyclePlan{ .capability = "target_set", @@ -1020,11 +1133,15 @@ absl::StatusOr build_prefetch_member_realization_plan : "same_daemon_session", .collective_policy = prefetch_collective_policy_for(request, member_count), .group_barriers = group_barriers_for(request.has_group_realization() ? &request.group_realization() : nullptr), - .version_set_id = std::nullopt, + .version_set_id = + requested_version_set_id_for(request.has_group_realization() ? &request.group_realization() : nullptr), .transaction_id = std::nullopt, .source_selection_digest = !request.source().artifact_selection_digest().empty() ? std::optional(request.source().artifact_selection_digest()) - : selection_digest_for(nullptr, nullptr, request.source_selection()), + : selection_digest_for( + request.has_group_realization() ? &request.group_realization() : nullptr, + nullptr, + request.source_selection()), }; plan.lifecycle = ControllerRealizationLifecyclePlan{ .capability = "retained_binding", @@ -1259,7 +1376,7 @@ absl::StatusOr resolve_collective_policy( const ExecutionTopologyContext& execution_topology) { const bool has_collective_group = execution_topology.collective_load_group.has_value(); if (requested == v2::CollectivePolicy::COLLECTIVE_POLICY_UNSPECIFIED) { - return has_collective_group ? v2::CollectivePolicy::COLLECTIVE_POLICY_REQUIRE_COLLECTIVE + return has_collective_group ? v2::CollectivePolicy::COLLECTIVE_POLICY_COLLECTIVE_FIRST : v2::CollectivePolicy::COLLECTIVE_POLICY_DISABLE_COLLECTIVE; } if (requested == v2::CollectivePolicy::COLLECTIVE_POLICY_DISABLE_COLLECTIVE && has_collective_group) { @@ -1367,13 +1484,11 @@ absl::StatusOr build_controller_realization_plan( .group_barriers = group_barriers_for(group_realization), .version_set_id = group_begin_context != nullptr && !group_begin_context->version_set.version_set_id().empty() ? std::optional(group_begin_context->version_set.version_set_id()) - : std::nullopt, + : requested_version_set_id_for(group_realization), .transaction_id = group_begin_context != nullptr && !group_begin_context->transaction_id.empty() ? std::optional(group_begin_context->transaction_id) : std::nullopt, - .source_selection_digest = !resolved_selection.selection_hash().empty() - ? std::optional(absl::BytesToHexString(resolved_selection.selection_hash())) - : std::nullopt, + .source_selection_digest = selection_digest_for(group_realization, group_begin_context, resolved_selection), }; plan.lifecycle = ControllerRealizationLifecyclePlan{ .capability = target_kind, @@ -1439,9 +1554,8 @@ absl::StatusOr build_controller_realization_plan(cons .group_barriers = {}, .version_set_id = std::nullopt, .transaction_id = std::nullopt, - .source_selection_digest = - request.has_initial_selection() && !request.initial_selection().selection_hash().empty() - ? std::optional(absl::BytesToHexString(request.initial_selection().selection_hash())) + .source_selection_digest = request.has_initial_selection() + ? selection_digest_for(nullptr, nullptr, request.initial_selection()) : std::nullopt, }; const bool daemon_owned = request.ownership() == v2::BindingOwnership::BINDING_OWNERSHIP_DAEMON; @@ -1967,9 +2081,7 @@ absl::StatusOr build_controller_realization_plan( .group_barriers = group_barriers_for(group_realization), .version_set_id = std::nullopt, .transaction_id = std::nullopt, - .source_selection_digest = !scope.selection().selection_hash().empty() - ? std::optional(absl::BytesToHexString(scope.selection().selection_hash())) - : std::nullopt, + .source_selection_digest = selection_digest_for(group_realization, nullptr, scope.selection()), }; plan.lifecycle = ControllerRealizationLifecyclePlan{ .capability = "publication", diff --git a/daemon/service/controllers/owned_binding_service.cc b/daemon/service/controllers/owned_binding_service.cc index 6ff39041..bb68ca74 100644 --- a/daemon/service/controllers/owned_binding_service.cc +++ b/daemon/service/controllers/owned_binding_service.cc @@ -207,6 +207,7 @@ std::string serialize_proto_for_cache_key(const google::protobuf::MessageLite& p std::string compute_target_layout_geometry_hash(const v2::TargetLayout& layout) { std::string payload; + absl::flat_hash_map storage_ordinals; absl::StrAppend( &payload, "layout_kind=", @@ -218,15 +219,26 @@ std::string compute_target_layout_geometry_hash(const v2::TargetLayout& layout) "|view_id="); append_cache_field(&payload, layout.view_id()); append_cache_field(&payload, layout.logical_layout_hash()); + uint64_t storage_ordinal = 0; for (const auto& storage : layout.storages()) { - append_cache_field(&payload, storage.storage_id()); + if (!storage.storage_id().empty()) { + storage_ordinals.emplace(storage.storage_id(), storage_ordinal); + } + append_cache_uint64(&payload, storage_ordinal); append_cache_uint64(&payload, static_cast(storage.device_id())); append_cache_uint64(&payload, storage.storage_length()); append_cache_uint64(&payload, storage.mapping_base_offset()); + storage_ordinal++; } for (const auto& entry : layout.offsets()) { append_cache_field(&payload, entry.name()); - append_cache_field(&payload, entry.storage_id()); + auto storage_it = storage_ordinals.find(entry.storage_id()); + if (storage_it == storage_ordinals.end()) { + append_cache_field(&payload, "unknown-storage"); + append_cache_field(&payload, entry.storage_id()); + } else { + append_cache_uint64(&payload, storage_it->second); + } append_cache_uint64(&payload, entry.storage_offset()); append_cache_uint64(&payload, entry.logical_length()); } @@ -242,7 +254,7 @@ std::string binding_realization_plan_cache_key( std::string_view canonical_index_json, v2::TransformPlacement placement) { std::string payload; - append_cache_field(&payload, "binding-realization-plan-v1"); + append_cache_field(&payload, "binding-realization-plan-v2"); append_cache_field(&payload, resolved_artifact_id); append_cache_field(&payload, serialize_proto_for_cache_key(selection)); append_cache_field(&payload, serialize_proto_for_cache_key(realization_plan)); @@ -306,7 +318,7 @@ std::string mapped_execution_template_cache_key( const store::loading::ExecutionTopologyContext& topology, bool disk_source_available) { std::string payload; - append_cache_field(&payload, "mapped-execution-template-v1"); + append_cache_field(&payload, "mapped-execution-template-v2"); append_cache_field(&payload, plan_key); append_cache_field( &payload, diff --git a/daemon/service/materialization_policy_utils_test.cc b/daemon/service/materialization_policy_utils_test.cc index c630b810..f5f406e0 100644 --- a/daemon/service/materialization_policy_utils_test.cc +++ b/daemon/service/materialization_policy_utils_test.cc @@ -3,6 +3,7 @@ #include "daemon/service/controllers/materialization_policy_utils.h" #include +#include #include #include #include @@ -24,6 +25,7 @@ using tensorcast::daemon::materialization_policy::GroupRealizationPreparedMember using tensorcast::daemon::materialization_policy::report_group_realization_prepared_if_enabled; using tensorcast::daemon::materialization_policy::require_controller_export_kind; using tensorcast::daemon::materialization_policy::require_controller_resource_authority; +using tensorcast::daemon::materialization_policy::resolve_collective_policy; using tensorcast::daemon::materialization_policy::resolve_group_realization_transport_context; using tensorcast::daemon::materialization_policy::resolve_materialization_request_context; using tensorcast::daemon::materialization_policy::resolve_operation_transport_context; @@ -84,6 +86,18 @@ bool has_resource_authority(const ControllerRealizationPlan& plan, std::string_v [authority](const std::string& current) { return std::string_view(current) == authority; }); } +bool is_sha256_hex(std::string_view value) { + return value.size() == 64 && std::all_of(value.begin(), value.end(), [](char ch) { + return std::isxdigit(static_cast(ch)) != 0; + }); +} + +void check_controller_source_selection_digest(const std::optional& digest) { + REQUIRE(digest.has_value()); + CHECK(is_sha256_hex(*digest)); + CHECK(*digest != "73656c656374696f6e2d68617368"); +} + v2::GroupRealizationOptions build_group_realization_options() { v2::GroupRealizationOptions options; options.set_enabled(true); @@ -180,7 +194,7 @@ TEST_CASE("Controller realization plan mirrors caller target materialization", " CHECK(plan_or->target.member_count == 1); CHECK(plan_or->strategy.source_selection_mode == "single_selection"); CHECK(plan_or->strategy.source_coordination == "single_request"); - CHECK(plan_or->strategy.source_selection_digest == "73656c656374696f6e2d68617368"); + check_controller_source_selection_digest(plan_or->strategy.source_selection_digest); CHECK(plan_or->lifecycle.capability == "caller_tensors"); CHECK((plan_or->lifecycle.release_policy == std::vector{"release_external_target_storage_lease"})); CHECK(plan_or->resource_envelope.projection_kind == "completion"); @@ -191,6 +205,44 @@ TEST_CASE("Controller realization plan mirrors caller target materialization", " REQUIRE(require_controller_resource_authority(*plan_or, "caller_allocation", "MaterializeIntoTarget").ok()); } +TEST_CASE( + "Controller source selection digest is independent from target layout digest", + "[daemon][materialization][policy]") { + v2::MaterializeIntoTargetRequest base_request; + base_request.mutable_selection()->set_artifact_id("mi2:test:artifact"); + base_request.mutable_selection()->set_logical_layout_hash("logical-layout"); + base_request.mutable_selection()->set_selection_hash("selection-hash"); + base_request.set_pid(1234); + base_request.set_device_uuid("GPU-0"); + fill_target_layout(base_request.mutable_target_layout()); + auto request_context_or = resolve_materialization_request_context(nullptr); + REQUIRE(request_context_or.ok()); + + const auto first_transport = resolve_operation_transport_context("op-identity-1"); + auto first_or = build_controller_realization_plan( + base_request, *request_context_or, first_transport, nullptr, "mi2:test:artifact"); + REQUIRE(first_or.ok()); + + v2::MaterializeIntoTargetRequest target_changed_request = base_request; + target_changed_request.mutable_target_layout()->mutable_storages(0)->set_storage_length(2048); + const auto target_changed_transport = resolve_operation_transport_context("op-identity-2"); + auto target_changed_or = build_controller_realization_plan( + target_changed_request, *request_context_or, target_changed_transport, nullptr, "mi2:test:artifact"); + REQUIRE(target_changed_or.ok()); + + v2::MaterializeIntoTargetRequest source_changed_request = base_request; + source_changed_request.mutable_selection()->set_logical_layout_hash("other-logical-layout"); + const auto source_changed_transport = resolve_operation_transport_context("op-identity-3"); + auto source_changed_or = build_controller_realization_plan( + source_changed_request, *request_context_or, source_changed_transport, nullptr, "mi2:test:artifact"); + REQUIRE(source_changed_or.ok()); + + check_controller_source_selection_digest(first_or->strategy.source_selection_digest); + CHECK(first_or->strategy.source_selection_digest == target_changed_or->strategy.source_selection_digest); + CHECK(first_or->target.target_layout_digest != target_changed_or->target.target_layout_digest); + CHECK(first_or->strategy.source_selection_digest != source_changed_or->strategy.source_selection_digest); +} + TEST_CASE("Controller realization plan mirrors binding creation ownership", "[daemon][materialization][policy]") { v2::CreateBindingRequest daemon_request; daemon_request.set_ownership(v2::BindingOwnership::BINDING_OWNERSHIP_DAEMON); @@ -243,8 +295,7 @@ TEST_CASE("Controller realization plan mirrors binding creation ownership", "[da CHECK(adopted_plan_or->target.resolved_artifact_id == "mi2:source-override"); CHECK(adopted_plan_or->strategy.source_selection_mode == "single_selection"); CHECK(adopted_plan_or->strategy.source_coordination == "binding_initial_value"); - REQUIRE(adopted_plan_or->strategy.source_selection_digest.has_value()); - CHECK(*adopted_plan_or->strategy.source_selection_digest == "73656c656374696f6e2d68617368"); + check_controller_source_selection_digest(adopted_plan_or->strategy.source_selection_digest); CHECK(adopted_plan_or->lifecycle.export_lifetime_kind == "binding_registry"); CHECK(adopted_plan_or->lifecycle.mutability_contract == "caller_region_borrowed"); CHECK(adopted_plan_or->resource_envelope.backing_kind == "caller_region"); @@ -450,8 +501,7 @@ TEST_CASE("Controller realization plan mirrors owned binding refill", "[daemon][ CHECK(*plan_or->target.operation_id == "refill-op"); CHECK(plan_or->strategy.source_selection_mode == "single_selection"); CHECK(plan_or->strategy.collective_policy == v2::CollectivePolicy::COLLECTIVE_POLICY_REQUIRE_COLLECTIVE); - REQUIRE(plan_or->strategy.source_selection_digest.has_value()); - CHECK(*plan_or->strategy.source_selection_digest == "73656c656374696f6e2d68617368"); + check_controller_source_selection_digest(plan_or->strategy.source_selection_digest); CHECK(plan_or->lifecycle.capability == "binding_owned"); CHECK(plan_or->lifecycle.export_lifetime_kind == "binding_current_value"); CHECK(plan_or->lifecycle.mutability_contract == "binding_controlled_read_only"); @@ -688,8 +738,7 @@ TEST_CASE( CHECK(plan_or->strategy.source_selection_mode == "single_selection"); CHECK(plan_or->strategy.source_coordination == "single_request"); CHECK(plan_or->strategy.collective_policy == v2::CollectivePolicy::COLLECTIVE_POLICY_COLLECTIVE_FIRST); - REQUIRE(plan_or->strategy.source_selection_digest.has_value()); - CHECK(*plan_or->strategy.source_selection_digest == "73656c656374696f6e2d68617368"); + check_controller_source_selection_digest(plan_or->strategy.source_selection_digest); CHECK(plan_or->lifecycle.capability == "tensor_dict"); CHECK(plan_or->lifecycle.export_lifetime_kind == "handle_lease"); CHECK( @@ -827,6 +876,8 @@ TEST_CASE( fill_serving_target(target_set->add_members(), "member-1", 1, "GPU-1"); target_set->mutable_source()->CopyFrom(request.source()); request.mutable_group_realization()->CopyFrom(build_group_realization_options()); + request.mutable_group_realization()->mutable_version()->mutable_explicit_version_set()->set_version_set_id( + "vs-requested"); request.mutable_group_realization()->set_require_staged_publish(true); auto plan_or = build_controller_realization_plan(request); @@ -840,6 +891,10 @@ TEST_CASE( CHECK(plan_or->strategy.source_selection_mode == "same_selection"); CHECK(plan_or->strategy.source_coordination == "group_realization_transport"); CHECK(plan_or->strategy.collective_policy == v2::CollectivePolicy::COLLECTIVE_POLICY_COLLECTIVE_FIRST); + REQUIRE(plan_or->strategy.version_set_id.has_value()); + CHECK(*plan_or->strategy.version_set_id == "vs-requested"); + REQUIRE(plan_or->strategy.source_selection_digest.has_value()); + CHECK(*plan_or->strategy.source_selection_digest == "source-selection"); CHECK( (plan_or->strategy.group_barriers == std::vector{"member_readiness", "group_acquire", "staged_values", "publish_barrier"})); @@ -977,8 +1032,7 @@ TEST_CASE("Controller realization plan mirrors target publication lifecycle", "[ CHECK(plan_or->strategy.source_selection_mode == "single_selection"); CHECK(plan_or->strategy.source_coordination == "publication_lifecycle"); CHECK(plan_or->strategy.collective_policy == v2::CollectivePolicy::COLLECTIVE_POLICY_DISABLE_COLLECTIVE); - REQUIRE(plan_or->strategy.source_selection_digest.has_value()); - CHECK(*plan_or->strategy.source_selection_digest == "73656c656374696f6e2d68617368"); + check_controller_source_selection_digest(plan_or->strategy.source_selection_digest); CHECK(plan_or->lifecycle.capability == "publication"); CHECK(plan_or->lifecycle.export_lifetime_kind == "publication_lease"); CHECK(plan_or->lifecycle.mutability_contract == "published_read_only"); @@ -1157,6 +1211,19 @@ TEST_CASE( v2::CollectivePolicy::COLLECTIVE_POLICY_COLLECTIVE_FIRST); } +TEST_CASE( + "Unspecified collective policy defaults to collective-first when topology is present", + "[daemon][materialization][policy]") { + ExecutionTopologyContext execution_topology; + execution_topology.collective_load_group = + CollectiveLoadGroupHint{.group_id = "same-host-tp-load", .world_size = 8, .rank = 3}; + + auto policy_or = resolve_collective_policy(v2::CollectivePolicy::COLLECTIVE_POLICY_UNSPECIFIED, execution_topology); + + REQUIRE(policy_or.ok()); + CHECK(*policy_or == v2::CollectivePolicy::COLLECTIVE_POLICY_COLLECTIVE_FIRST); +} + TEST_CASE( "Mapped target defaults to disable-collective without collective topology", "[daemon][materialization][policy]") { diff --git a/daemon/state/handle_lease_registry.cc b/daemon/state/handle_lease_registry.cc index dcf7623f..88115a48 100644 --- a/daemon/state/handle_lease_registry.cc +++ b/daemon/state/handle_lease_registry.cc @@ -858,6 +858,9 @@ absl::StatusOr HandleLeaseRegistry::mint_external_cuda_lease(pid_t if (!cleanup) { return absl::InvalidArgumentError("cleanup is required"); } + if (lifecycle_ == nullptr) { + return absl::FailedPreconditionError("lifecycle manager is unavailable"); + } std::string token; { @@ -880,11 +883,13 @@ absl::StatusOr HandleLeaseRegistry::mint_external_cuda_lease(pid_t .external_cleanup = std::move(cleanup), }; } + lifecycle_->watch_pid(pid); return token; } absl::Status HandleLeaseRegistry::release(const std::string& lease_token) { SessionLifecycleManager::LeaseId id = 0; + pid_t external_owner_pid = 0; std::function external_cleanup; { absl::MutexLock lock(&mu_); @@ -893,6 +898,7 @@ absl::Status HandleLeaseRegistry::release(const std::string& lease_token) { return absl::NotFoundError("lease_token not found"); } if (it->second.kind == HandleKind::kExternal) { + external_owner_pid = it->second.external_owner_pid; external_cleanup = std::move(it->second.external_cleanup); leases_.erase(it); } else { @@ -901,6 +907,9 @@ absl::Status HandleLeaseRegistry::release(const std::string& lease_token) { } if (external_cleanup) { external_cleanup(); + if (lifecycle_ != nullptr && external_owner_pid > 0) { + lifecycle_->unwatch_pid(external_owner_pid); + } return absl::OkStatus(); } lifecycle_->release_lease(id); diff --git a/daemon/state/pid_monitor_unwatch_integration_test.cc b/daemon/state/pid_monitor_unwatch_integration_test.cc index 51c95c62..cd66cca5 100644 --- a/daemon/state/pid_monitor_unwatch_integration_test.cc +++ b/daemon/state/pid_monitor_unwatch_integration_test.cc @@ -2,8 +2,16 @@ #include +#include +#include + +#include "absl/status/status.h" #include "core/store/device_registry.h" +#include "core/store/store_engine.h" +#include "core/store/store_engine_options.h" +#include "daemon/state/handle_lease_registry.h" #include "daemon/state/ipc_region_registry.h" +#include "daemon/state/lifecycle_kernel.h" #include "daemon/state/lip_manager.h" #include "daemon/state/pid_monitor.h" #include "daemon/state/ref_tracker.h" @@ -17,6 +25,30 @@ using tensorcast::daemon::SessionLifecycleManager; using tensorcast::store::DeviceRegistry; using tensorcast::store::loading::ReplicaKey; +namespace { + +std::filesystem::path test_tmpdir() { + const char* env = std::getenv("TEST_TMPDIR"); + if (env != nullptr && *env != '\0') { + return std::filesystem::path(env); + } + return std::filesystem::temp_directory_path() / "tensorcast_pid_monitor_unwatch_integration_test"; +} + +tensorcast::store::StoreEngineOptions make_engine_opts() { + tensorcast::store::StoreEngineOptions opts; + opts.storage_path = (test_tmpdir() / "engine").string(); + std::filesystem::create_directories(opts.storage_path); + opts.p2p_port = 0; + opts.memory_pool_size = 32ULL << 20; + opts.tx_slice_bytes = 1ULL << 20; + opts.num_thread = 2; + opts.global_store_address.clear(); + return opts; +} + +} // namespace + TEST_CASE("PidMonitor unwatch called on last guard retire", "[daemon][lifecycle][pid]") { ReplicaSessionManager sessions(std::chrono::seconds(60)); RefTracker refs; @@ -71,3 +103,55 @@ TEST_CASE("PidMonitor unwatch is suppressed by external watches", "[daemon][life mgr.unwatch_pid(pid); REQUIRE_FALSE(mon.is_watching_for_test(pid)); } + +TEST_CASE("external CUDA handle lease watches owner pid until release", "[daemon][lifecycle][pid][handle]") { + ReplicaSessionManager sessions(std::chrono::seconds(60)); + RefTracker refs; + tensorcast::daemon::IpcRegionRegistry regions(tensorcast::daemon::IpcRegionRegistry::Options{}); + auto lip = + std::make_unique(std::shared_ptr(), ®ions); + SessionLifecycleManager mgr(sessions, refs, *lip); + + PidMonitor mon([&](pid_t) {}); + mgr.attach_pid_monitor(&mon); + auto engine = std::make_shared(make_engine_opts()); + tensorcast::daemon::LifecycleKernel lifecycle_kernel("daemon-test"); + tensorcast::daemon::HandleLeaseRegistry leases( + tensorcast::daemon::HandleLeaseRegistry::Options{}, *engine, mgr, lifecycle_kernel); + + const int32_t pid = 884422; + int cleanup_calls = 0; + auto token_or = leases.mint_external_cuda_lease(pid, [&]() { ++cleanup_calls; }); + REQUIRE(token_or.ok()); + REQUIRE(mon.is_watching_for_test(pid)); + + REQUIRE(leases.release(*token_or).ok()); + + CHECK(cleanup_calls == 1); + REQUIRE_FALSE(mon.is_watching_for_test(pid)); +} + +TEST_CASE("external CUDA handle lease cleanup runs on owner pid exit", "[daemon][lifecycle][pid][handle]") { + ReplicaSessionManager sessions(std::chrono::seconds(60)); + RefTracker refs; + tensorcast::daemon::IpcRegionRegistry regions(tensorcast::daemon::IpcRegionRegistry::Options{}); + auto lip = + std::make_unique(std::shared_ptr(), ®ions); + SessionLifecycleManager mgr(sessions, refs, *lip); + + auto engine = std::make_shared(make_engine_opts()); + tensorcast::daemon::LifecycleKernel lifecycle_kernel("daemon-test"); + tensorcast::daemon::HandleLeaseRegistry leases( + tensorcast::daemon::HandleLeaseRegistry::Options{}, *engine, mgr, lifecycle_kernel); + + const int32_t pid = 884423; + int cleanup_calls = 0; + auto token_or = leases.mint_external_cuda_lease(pid, [&]() { ++cleanup_calls; }); + REQUIRE(token_or.ok()); + + mgr.handle_pid_exit(pid); + leases.handle_pid_exit(pid); + + CHECK(cleanup_calls == 1); + CHECK(absl::IsNotFound(leases.release(*token_or))); +} diff --git a/docs/designs/0111-source-to-serving-builder-and-representation-publication.md b/docs/designs/0111-source-to-serving-builder-and-representation-publication.md index d45e6a3d..9c8da26c 100644 --- a/docs/designs/0111-source-to-serving-builder-and-representation-publication.md +++ b/docs/designs/0111-source-to-serving-builder-and-representation-publication.md @@ -15,7 +15,7 @@ related_code: - docs/internals/model-loading.md - tensorcast/types.py - tensorcast/api/store/__init__.py - - tensorcast/api/store/serving_builder.py + - tensorcast/api/store/publication_builder.py - tensorcast/api/store/binding.py - tensorcast/api/store/owned_binding_slot.py - proto/tensorcast/daemon/v2/store_daemon.proto diff --git a/docs/designs/0112-binding-native-serving-realization-and-publication.md b/docs/designs/0112-binding-native-serving-realization-and-publication.md index 724c0ea3..dfa94c4e 100644 --- a/docs/designs/0112-binding-native-serving-realization-and-publication.md +++ b/docs/designs/0112-binding-native-serving-realization-and-publication.md @@ -18,7 +18,7 @@ related_code: - tensorcast/api/store/binding.py - tensorcast/api/store/owned_binding_slot.py - tensorcast/api/store/realization_plan.py - - tensorcast/api/store/serving_builder.py + - tensorcast/api/store/publication_builder.py - tensorcast/api/store/mapped_binding.py - tensorcast/types.py - proto/tensorcast/daemon/v2/store_daemon.proto diff --git a/docs/designs/0120-artifact-centered-model-runtime-realization.md b/docs/designs/0120-artifact-centered-model-runtime-realization.md index 033e1b8d..9563974f 100644 --- a/docs/designs/0120-artifact-centered-model-runtime-realization.md +++ b/docs/designs/0120-artifact-centered-model-runtime-realization.md @@ -4,7 +4,7 @@ title: Artifact-Centered Model Runtime Realization status: draft areas: ["sdk", "serving", "daemon", "core", "integrations", "docs", "tests"] created: 2026-05-23 -last_updated: 2026-05-25 +last_updated: 2026-05-26 related_code: - docs/designs/0039-artifact-first-sdk.md - docs/designs/0078-selection-first-artifact-retrieval.md @@ -24,15 +24,14 @@ related_code: - tensorcast/api/store/artifact.py - tensorcast/api/store/binding.py - tensorcast/types.py - - tensorcast/serving/runtime.py - - tensorcast/serving/config.py - - tensorcast/serving/policy.py - - tensorcast/serving/hosts.py - - tensorcast/serving/binding_plan.py - - tensorcast/serving/retained_binding.py - - tensorcast/serving/runtime_attachment.py - - tensorcast/serving/replica_publication.py - - tensorcast/serving/_runtime_impl/lifecycle.py + - tensorcast/artifact_runtime/lifecycle.py + - tensorcast/artifact_runtime/config.py + - tensorcast/artifact_runtime/policy.py + - tensorcast/artifact_runtime/host.py + - tensorcast/artifact_runtime/attachment.py + - tensorcast/artifact_runtime/binding/retained.py + - tensorcast/artifact_runtime/publication/replica.py + - tensorcast/artifact_runtime/recipe/ links: plan: ../plans/0120-artifact-centered-model-runtime-realization.md dependencies: @@ -94,7 +93,7 @@ The decision is: model-runtime stacks; - preserve the current vLLM scenario semantics, fastest compatible data path, retained memory-credit timing, and zero-extra-weight-residency behavior even - when TensorCast and vllm APIs are changed incompatibly. + when TensorCast and vLLM APIs are changed incompatibly. ```mermaid flowchart LR @@ -156,7 +155,7 @@ The plan paired with this design owns current code status, phase tracking, implementation gaps, and rollout order. This design should not be read as an implementation snapshot. -The vllm baseline remains important only as a regression contract: +The serving-runtime baseline remains important only as a regression contract: - vLLM behavior is a regression baseline; - runtime attachment, retained acquire, reload, runtime view, publication, and @@ -231,7 +230,7 @@ serialized handoff is a retained binding claim/capability for a realization target. It is not user-facing preload vocabulary and should not remain a separate serving materialization family. -## vllm cleanup baseline +## Serving runtime cleanup baseline Kept as current behavior baseline, not as an API compatibility boundary. Its `ServingRuntimeSession`, `RuntimeAttachment`, retained acquire, runtime view, @@ -284,7 +283,7 @@ publication, or diagnostics. | `tensorcast.serving` | internal serving ABI helpers, optional private lowerings, builder/publication implementation details while they remain serving-ABI-specific | public runtime session root, public locator authority, independent retained acquire model, independent diagnostics/report model | | Store Daemon | binding values, leases, mounted-source attestation, local realization ownership, PID/session safety, device-local movement | framework construction/finalize hooks, durable metadata authority | | Global Store | durable artifact metadata, replica metadata, coordination records, publication visibility | SDK direct control path, process-local attachment state | -| vllm `vllm.tensorcast.*` | runtime host capability construction, vLLM placement/source/collective facts, model construction/finalize hooks, reload/publication calls | TensorCast artifact selection authority, daemon lease authority, duplicate vllm session model | +| vLLM `vllm.tensorcast.*` | runtime host capability construction, vLLM placement/source/collective facts, model construction/finalize hooks, reload/publication calls | TensorCast artifact selection authority, daemon lease authority, duplicate serving runtime session model | The intended end state is one public root and one professional framework boundary. If a serving-named object remains after migration, it must satisfy one @@ -443,7 +442,7 @@ state. - acceptable as package namespace during migration; - acceptable in profile names such as `serving_abi_version` when the payload is - specifically the model-vllm ABI; + specifically the model-serving runtime ABI; - not acceptable as a second root for artifact identity, source discovery, P2P routing, or publication. @@ -629,9 +628,9 @@ parallel TensorDict or source subsystem: current publication state, swap the active binding value, and project the reload response from the new attachment. -## vllm target integration flow +## vLLM target integration flow -vllm should become a client of the artifact-runtime professional API, +vLLM should become a client of the artifact-runtime professional API, not a client of a public TensorCast serving session. The expected final flow is: 1. `model_loader_extra_config` is normalized into an artifact/runtime request: @@ -659,7 +658,7 @@ not a client of a public TensorCast serving session. The expected final flow is: local-ready durable promotion call artifact-runtime publication actions. They do not call a separate serving publication authority. -| vllm concern | Target TensorCast interaction | Decision logic | +| vLLM concern | Target TensorCast interaction | Decision logic | | --- | --- | --- | | loader startup | `Artifact.realize(... model_runtime ..., runtime_host=...)` | model loading is a realization of an artifact selection | | placement/topology facts | `ArtifactRealizationSpec` plus runtime host admission facts | topology affects realization/admission, not artifact identity | @@ -671,7 +670,7 @@ not a client of a public TensorCast serving session. The expected final flow is: | publication/shutdown | handle or attachment publication actions with active-generation CAS | publication is artifact replica lifecycle | | main/draft models | target-set transaction or documented sequential semantics | partial reload behavior must be explicit | -At the end of migration, vllm should not import public +At the end of migration, vLLM should not import public `tensorcast.serving.*` session/config/retained/publication APIs for normal startup, reload, memory accounting, runtime view, or shutdown. Any remaining serving import must be either an internal implementation dependency with no @@ -786,7 +785,7 @@ These objects should not become the everyday user API. The following names are the preferred long-term conceptual direction. They do not require compatibility aliases; the implementation may rename or reshape the -current vllm interfaces directly once the vLLM scenario matrix is +current serving-runtime interfaces directly once the vLLM scenario matrix is covered. When a replacement is wired and tested, the old public name should be deleted or narrowed to an internal implementation name; it should not remain as a parallel compatibility path. @@ -1042,7 +1041,7 @@ artifact-realization stack: - The final `tensorcast.serving` module is shallow: normal startup, reload, retained memory credit, runtime view, and shutdown/publication do not require public serving-session/config/retained/publication APIs. -- vllm normal paths use the direct artifact-runtime API and runtime +- vLLM normal paths use the direct artifact-runtime API and runtime host capabilities; remaining serving imports are private implementation or serving-ABI-specific builder paths with owners. - Retained pre-admission credit, mounted-source bootstrap, active-generation @@ -1091,10 +1090,10 @@ the same migration window instead of kept as permanent compatibility aliases. - `docs/designs/0114-collective-first-binding-realization-for-tp-serving-startup.md` - `docs/designs/0116-prefetch-serving-binding-target.md` - `docs/architecture/p2p-transfer-strategies.md` -- `/opt/vllm/vllm/tensorcast/loader.py` -- `/opt/vllm/vllm/tensorcast/placement.py` -- `/opt/vllm/vllm/tensorcast/source.py` -- `/opt/vllm/vllm/tensorcast/collective.py` -- `/opt/vllm/vllm/tensorcast/adapter.py` -- `/opt/vllm/vllm/model_executor/model_loader/memory_accounting.py` -- `/opt/vllm/vllm/v1/worker/gpu_model_runner.py` +- `vllm/tensorcast/loader.py` +- `vllm/tensorcast/placement.py` +- `vllm/tensorcast/source.py` +- `vllm/tensorcast/collective.py` +- `vllm/tensorcast/adapter.py` +- `vllm/model_executor/model_loader/memory_accounting.py` +- `vllm/v1/worker/gpu_model_runner.py` diff --git a/docs/guides/steptron-vllm-binding-integration.md b/docs/guides/steptron-vllm-binding-integration.md index 397ad284..82159362 100644 --- a/docs/guides/steptron-vllm-binding-integration.md +++ b/docs/guides/steptron-vllm-binding-integration.md @@ -241,8 +241,8 @@ TensorCast must own: - contributor liveness through the existing lease/guard/finalizer runtime - final source `seal_assembly(...)` - source immutable version-key publication in the current dependency-ready wave -- optional source -> serving builder or publisher only in the successor wave - after typed child closeout contracts exist +- optional source -> runtime-artifact representation builder or publisher only + in the successor wave after typed child closeout contracts exist - final serving-key or serving-manifest publication only in that successor wave `steptron` should not: diff --git a/docs/internals/model-loading.md b/docs/internals/model-loading.md index 35ea377f..706e0bad 100644 --- a/docs/internals/model-loading.md +++ b/docs/internals/model-loading.md @@ -166,13 +166,14 @@ region-backed data plane: current bound layout once the local overwrite succeeds. This publish path is the ordinary artifact-backed replica path from `0084`. It -is not the serving-artifact publication or `representation_publish` closeout -path used by source-to-serving builder work. +is not the runtime-artifact publication or `representation_publish` closeout +path used by representation-publication builder work. -## Serving-Artifact Runtime Preflight +## Runtime-Artifact Preflight -When runtime consumes a serving artifact, TensorCast now performs a serving -artifact preflight before accepting it into the steady-state loading path. +When runtime consumes an artifact with serving-manifest ABI metadata, +TensorCast performs a runtime-artifact preflight before accepting it into the +steady-state loading path. Phase-1 rules: @@ -180,12 +181,12 @@ Phase-1 rules: `tensor:__tensorcast_meta__.manifest_json` - artifacts without that reserved manifest tensor continue to load as ordinary non-serving artifacts -- strict serving runtime is now explicit rather than inferred from every +- runtime-artifact policy is explicit rather than inferred from every generic materialization request: - `PublishedModelVersion.require_serving_runtime_policy()`, + `PublishedModelVersion.require_runtime_artifact_policy()`, `RepresentationPublishContract.to_runtime_policy()`, and - `ServingArtifactManifest.to_runtime_policy()` produce a - `ServingRuntimePolicy` that callers can pass into + `RuntimeArtifactManifest.to_runtime_policy()` produce a + `RuntimeArtifactPolicy` that callers can pass as `runtime_artifact_policy` to `artifact.bind(...)`, `artifact.bind_into(...)`, and `binding.swap(...)` - artifacts with that reserved manifest tensor must pass: - manifest JSON parseability @@ -196,7 +197,7 @@ Phase-1 rules: `serving_build_digest`, `tensor_schema_hash`, `builder_mode`, and `build_pipeline_version` - `serving_manifest_ref` agreement between the manifest and the runtime - policy when strict serving runtime is requested + policy when runtime-artifact policy is requested - canonical tensor count equality between manifest and canonical index - tensor schema hash equality between manifest and the canonical index with the reserved manifest tensor excluded @@ -207,22 +208,22 @@ Current daemon coverage: - `MaterializeIntoTarget` - source-bound owned-binding create/refill paths -This keeps serving-artifact publication-time validation and runtime acceptance +This keeps runtime-artifact publication-time validation and runtime acceptance validation on the same contract, so runtime no longer silently accepts a -manifest-bearing serving artifact whose self-description is inconsistent with -its canonical tensor layout. +manifest-bearing artifact whose self-description is inconsistent with its +canonical tensor layout. Important distinction: - generic artifact load remains fail-open for ordinary non-serving artifacts -- strict serving runtime is opt-in through `ServingRuntimePolicy` -- this lets serving startup and reload fail closed without turning the whole - artifact runtime into a serving-only surface +- runtime-artifact preflight is opt-in through `RuntimeArtifactPolicy` +- this lets model-runtime startup and reload fail closed without turning the + whole artifact runtime into a serving-only surface -### Serving-Builder Guardrails +### Runtime Recipe Builder Guardrails -The Python serving builder keeps artifact identity as the source authority for -compiled serving recipes: +The Python runtime recipe builder keeps artifact identity as the source +authority for compiled runtime recipes: - `SourceCatalog.source_artifact_ref` must be a real artifact identity. The builder accepts `mi2` content identities and daemon-attested `msa1` mounted diff --git a/docs/plans/0113-example-tp-model-closure-and-sot-convergence.md b/docs/plans/0113-example-tp-model-closure-and-sot-convergence.md index da383398..4ca45c2b 100644 --- a/docs/plans/0113-example-tp-model-closure-and-sot-convergence.md +++ b/docs/plans/0113-example-tp-model-closure-and-sot-convergence.md @@ -73,9 +73,9 @@ Execution policy for this plan: evidence, mixed-residual policy, and prototype deletion remain open: - `core/store/replica/collective_disk_loader.cc` - `0111` repo-owned builder/publication bridge is landed at base scope: - - `tensorcast/api/store/serving_builder.py` + - `tensorcast/api/store/publication_builder.py` - `daemon/service/controllers/assembly_operation_service.cc` -- `ServingAdmissionFacts.same_binding_fast_path_validated` is already a correctness and +- `RuntimeAdmissionFacts.same_binding_fast_path_validated` is already a correctness and admission gate for same-binding publication: - `tensorcast/types.py` - `docs/designs/0111-source-to-serving-builder-and-representation-publication.md` diff --git a/docs/plans/0116-prefetch-serving-binding-target.md b/docs/plans/0116-prefetch-serving-binding-target.md index ae1335cf..4cf0f87c 100644 --- a/docs/plans/0116-prefetch-serving-binding-target.md +++ b/docs/plans/0116-prefetch-serving-binding-target.md @@ -14,7 +14,7 @@ related_code: - proto/tensorcast/config/v1/daemon_config.proto - tensorcast/api/store/artifact.py - tensorcast/api/store/owned_binding_slot.py - - tensorcast/api/store/serving_binding_reference_consumer.py + - tensorcast/api/store/runtime_realization_reference_consumer.py - tensorcast/api/store/serving_binding_spec_cache.py - tensorcast/types.py - daemon/service/controllers/materialization_controller.cc @@ -71,7 +71,7 @@ Updated 2026-05-11. ## P0: Stabilize The Public Example -- [ ] Convert `examples/serving_binding_consumer/` into the canonical +- [ ] Convert `examples/runtime_realization_reference_consumer/` into the canonical TensorCast-side serving binding example. - [ ] Document the parent-to-worker handoff payload: `ServingBindingTarget`, `PrefetchedServingBinding`, expected digests, and @@ -157,9 +157,9 @@ bazel test //daemon:grpc_service_impl_operation_rpc_test \ --ui_event_filters=warning,error source .venv/bin/activate -pytest tests/python/api/test_serving_binding_reference_consumer.py \ +pytest tests/python/api/test_runtime_realization_reference_consumer.py \ tests/python/api/test_serving_binding_spec_cache.py \ - tests/python/api/test_prefetch_serving_binding_target.py \ + tests/python/api/test_runtime_realization_target.py \ tests/python/api/test_operation_semantics.py -q pytest tests/python/daemon/test_prefetch_serving_binding_real_cuda_e2e.py -q diff --git a/docs/plans/0120-artifact-centered-model-runtime-realization.md b/docs/plans/0120-artifact-centered-model-runtime-realization.md deleted file mode 100644 index 5cf459c1..00000000 --- a/docs/plans/0120-artifact-centered-model-runtime-realization.md +++ /dev/null @@ -1,537 +0,0 @@ ---- -slug: artifact-centered-model-runtime-realization -title: Artifact-Centered Model Runtime Realization Plan -status: draft -areas: ["sdk", "serving", "daemon", "core", "integrations", "docs", "tests"] -created: 2026-05-23 -last_updated: 2026-05-25 -related_code: - - docs/designs/0120-artifact-centered-model-runtime-realization.md - - docs/designs/0121-unified-artifact-realization-kernel.md - - docs/plans/0121-unified-artifact-realization-kernel.md - - docs/designs/0116-prefetch-serving-binding-target.md - - docs/plans/0116-prefetch-serving-binding-target.md - - tensorcast/api/store/artifact.py - - tensorcast/types.py - - tensorcast/serving/runtime.py - - tensorcast/serving/config.py - - tensorcast/serving/policy.py - - tensorcast/serving/hosts.py - - tensorcast/serving/binding_plan.py - - tensorcast/serving/retained_binding.py - - tensorcast/serving/runtime_attachment.py - - tensorcast/serving/replica_publication.py - - tensorcast/serving/_runtime_impl/lifecycle.py -links: - design: ../designs/0120-artifact-centered-model-runtime-realization.md - dependencies: - - ../designs/0121-unified-artifact-realization-kernel.md ---- - -# Objective - -Plan the successor work from the current vllm baseline: move -TensorCast TensorDict retrieval, binding, prefetch, and model-runtime loading -from parallel surfaces toward one artifact-centered realization model while -preserving all vLLM behavior and performance-sensitive semantics. - -No source compatibility guarantee is required. This plan is delete-forward: -replacement artifact-runtime paths should absorb behavior, prove equivalence -with focused tests, and then remove or internalize old serving-rooted public -entrypoints, compatibility adapters, duplicate diagnostics, and redundant tests. -Semantic regression is not allowed; maintaining two long-term stacks is also not -allowed. - -# Current State & Grounding - -The current vllm baseline is implemented and folded into `0120` as -context. It is not a separate long-term public model. -- `tensorcast.serving.runtime` is the narrow framework-facing runtime API. -- `tensorcast.serving.config` selects exactly one startup plan. -- `tensorcast.serving.policy` owns serving locator and policy normalization. -- `tensorcast.serving.binding_plan` centralizes trace/recipe/spec/layout/schema - identity. -- `tensorcast.serving.retained_binding` owns retained acquire validation, - reservation bytes, lease restore, and runtime ownership transfer. -- `tensorcast.serving.runtime_attachment` owns process-local attachment state. -- `tensorcast.serving.replica_publication` owns artifact-backed runtime replica - publication and retirement. -- `tensorcast.serving._runtime_impl.lifecycle` still performs most orchestration. - -The Store SDK baseline has moved from target model to implemented kernel -baseline: - -- `Artifact.tensor_dict(...)`, `tensor_dict_with_diagnostics(...)`, - `tensor_dict_into(...)`, `tensor_into(...)`, `bind(...)`, and `bind_into(...)` - lower through `Artifact.realize(...)`. -- `Artifact.prefetch(device=...)` and `Artifact.prefetch(target=...)` lower - through `Artifact.realize_async(...)` for retained replica, retained binding, - and target-set operation semantics. -- `ArtifactRealizationSpec`, `ArtifactRealizationHandle`, - `ArtifactRealizationReport`, `RealizationResourceEnvelope`, - `RealizationReleaseContract`, target-set reports, mounted-source reports, - runtime-attachment reports, model-runtime report wrappers, and publication - reports are implemented and exported from the Store SDK. -- Direct `Artifact.realize(ArtifactRealizationSpec.model_runtime(...))` still - fails closed. Serving lifecycle code creates runtime-attachment and - model-runtime handles internally while vllm still enters through - `ServingRuntimeSession`. -- Daemon materialization already has the key performance primitives the target - API must preserve: `MaterializeReplica` tries the artifact LIP/local-replica - fast path before engine-backed materialization, binds CUDA IPC or CPU memfd - leases into the response, and reports the selected materialization source. -- Binding materialization already attempts direct byte-space planning for - compatible mapped source artifacts and exposes source-bound plan diagnostics; - direct model-runtime realization should reuse this behavior rather than - routing through Python TensorDict materialization. -- Python materialization reconstructs tensor views from daemon CUDA IPC or memfd - handles and records IPC/restore timings. Those views are acceptable - projections but must not become a required intermediate for vLLM weight - loading. - -Current gaps against the `0120` target state: - -- Top-level `tensorcast` does not yet expose `ArtifactRealizationSpec`, - `ArtifactRealizationHandle`, or `ArtifactRealizationReport`, even though the - target design treats them as public SDK peers of `Artifact`. -- Direct model-runtime realization still returns `UNIMPLEMENTED` from - `Artifact.realize(...)`; runtime attachment lowering is only reachable through - serving lifecycle code. -- `ArtifactRealizationHandle.attach(...)` currently exists as a delegation hook, - but the target design needs completed-handle runtime attachment projection - semantics. The implementation must either add `attachment()` or make - `attach(...)` explicitly projection-only. -- Runtime host capabilities exist under serving host/integration names - (`IntegrationHost`, framework/tensor-surface/placement/source/collective - protocols). They need to become the artifact-runtime professional boundary - instead of a serving-session dependency. -- `ArtifactRealizationSpec` still carries serving-rooted fields such as - `serving_runtime_policy`; those fields need a neutral target/profile policy or - a binding-specific options object. -- Retained pre-admission memory credit is still exposed through - retained-serving-binding helpers. A neutral retained realization claim wrapper - must absorb that behavior before vLLM migration. -- Publication generation, active-generation checks, replay, reload rejection, - and shutdown retirement already exist in serving publication code but are not - yet formalized as the shared artifact-runtime publication/CAS contract. -- A fake second framework already proves the vllm is not purely - vLLM-shaped, but the proof still enters through `ServingRuntimeSession`; it - must be repeated on the direct artifact-runtime API. -- Serving-rooted public DTOs and helpers remain broadly exported. They must be - removed or internalized after replacement, not kept as compatibility aliases. - -The plan no longer needs to converge the main SDK paths before broad serving -cleanup; `0121` did that. This plan now tracks the remaining naming/API boundary -work after the kernel convergence. - -The concrete kernel work is tracked in -[`0121-unified-artifact-realization-kernel`](../designs/0121-unified-artifact-realization-kernel.md). -This `0120` plan remains the umbrella migration plan; `0121` owns the -anti-split-brain implementation sequence for selection, target, strategy, -representation, lifecycle, execution lowering, reports, and TP target sets. - -Execution order: - -1. Treat the implemented `0121` kernel as the baseline. -2. Expose the public realization symbols at the intended package level and make - the docs/examples use the real target API shape. -3. Finish direct public/professional model-runtime realization instead of routing - framework integrations through serving-named session APIs. -4. Port vllm in the same execution window as TensorCast API changes. -5. Delete or internalize serving-centered public names, compatibility wrappers, - redundant diagnostics, and duplicate tests once replacements are wired. -6. Prove the boundary with SGLang or a minimal second runtime adapter through the - direct artifact-runtime API. - -The vllm baseline depends on these surfaces: - -- `vllm/tensorcast/loader.py`: session start, attachment storage, in-place - reload, replica publication, local-ready durable promotion. -- `vllm/tensorcast/placement.py`: TP/PP/DP member identity, EP/EPLB digests, - materialization execution facts. -- `vllm/tensorcast/source.py`: local source catalog and cache policy. -- `vllm/tensorcast/collective.py`: same-node source coordination and local-ready - TP barrier. -- `vllm/tensorcast/adapter.py`: meta/runtime model construction, trace capture, - tensor attach/finalize, runtime-only tensors, semantic probes. -- `vllm/model_executor/model_loader/memory_accounting.py`: trusted retained - reservation bytes before vLLM memory admission. -- `vllm/v1/worker/gpu_model_runner.py`: reload endpoint, runtime view, - shutdown retirement, EP/EPLB reload safety. - -Feasibility result for the current scenario: - -- vllm can adapt cleanly to the artifact-centered model because its - TensorCast loader is already concentrated behind model loader, placement, - source, adapter, and worker reload/publication surfaces; -- vLLM does not need a direct TensorCast TensorDict model-loading API, and - TensorDict is now a first-class projection of the shared realization kernel; -- retained reservation credit before vLLM memory admission is the main timing - constraint and must be represented before any runtime attachment exists; -- weight loading must keep the current best data path: retained acquire, - local-replica/LIP, compatible P2P, local mounted-source/disk streaming, or - explicit transform. The direct artifact-runtime API must not introduce a - TensorDict or Python state-dict intermediate; -- local HF/safetensors bootstrap is feasible through daemon-attested mounted - source subjects such as `msa1:...`, not as a vLLM-owned source authority; -- reload/publication semantics are implemented in serving lifecycle and remain a - migration constraint for the public artifact-runtime API; -- EP/EPLB reload safety must combine static semantic digests with live framework - checks from vLLM before reload; -- main and draft TensorCast model reload must be made explicit as either a - target-set transaction or the current sequential main-then-draft behavior with - unhealthy marking on partial failure. - -# Migration Decision Logic - -Every migration change should classify the touched concept before renaming or -deleting it. Use this order: - -1. If it is durable identity, discovery, routing, replica visibility, or - lifecycle, move it to artifact selection or artifact replica metadata. -2. If it is target/device/member/layout/strategy/admission intent, move it to - `ArtifactRealizationSpec`, target plans, strategy plans, representation - admission, or target-set realization. -3. If it is framework construction, trace capture, tensor surface, - runtime-only tensor handling, finalize hooks, placement facts, source - catalog, collective behavior, or live EP/EPLB checks, move it to the runtime - host capability surface or `RuntimeAttachment`. -4. If it exists to credit memory or acquire a prepared value later, move it to - retained realization claim or prefetch handoff naming. -5. If it creates a reusable source for later loads/P2P, move it to - artifact-runtime publication/promote actions. -6. If the name is serving-rooted only for source compatibility, delete or - internalize it once the replacement behavior and tests exist. -7. If a serving name truly describes a model-serving ABI payload, it may remain, - but only as a profile/ABI field or private implementation detail. - -The default decision is not "rename everything first." The default is: -classify behavior, wire the artifact-runtime replacement, prove behavior with -tests, then delete or internalize the old public serving surface in the same -cleanup window. A phase is incomplete if old and new public paths both remain as -supported peer entrypoints. - -# vllm Migration Slice - -TensorCast and vllm changes should land in one coordinated window -because both sides are under our control and no source compatibility guarantee is -required. - -| vllm owner | Current TensorCast dependency | Target interaction | Completion signal | -| --- | --- | --- | --- | -| `vllm/tensorcast/loader.py` | `ServingConfig`, `IntegrationHost`, `ServingRuntimeSession`, `RuntimeAttachment` | build artifact/runtime request, call direct `Artifact.realize(... model_runtime ..., runtime_host=...)`, store `handle.attachment()` | startup, reload, required-publication, and local-ready promotion smoke tests no longer instantiate `ServingRuntimeSession` | -| `vllm/tensorcast/host.py` | `tensorcast.serving.hosts.IntegrationHost` | construct `RuntimeHostCapabilities` or transitional alias with deletion trigger | host construction has no public serving-session dependency | -| `vllm/tensorcast/adapter.py` | serving host/tensor-surface DTOs | framework capability implementation for construction, trace, runtime-only tensors, finalize hooks, semantic probes | adapter tests pass through direct artifact-runtime handle | -| `vllm/tensorcast/placement.py` | serving placement/local-ready DTOs | target/member/admission facts plus publication context for artifact-runtime actions | placement no longer creates public serving targets for normal runtime startup | -| `vllm/tensorcast/source.py` | `ServingConfig` and serving source catalog | mounted-source or durable artifact selection input | local HF/safetensors cold start admits an `msa1:` subject before planning | -| `vllm/tensorcast/collective.py` | serving-local collective coordination | realization strategy and target-set coordination facts | TP same-node startup uses shared target-set strategy reports | -| `vllm/tensorcast/retained_binding.py` | retained-serving-binding helpers | retained realization claim helpers | retained startup validates claim through neutral naming | -| `vllm/model_executor/model_loader/memory_accounting.py` | `tensorcast.serving.retained_binding` trusted bytes | retained realization claim trusted reservation bytes | memory credit still occurs before vLLM admission without public serving imports | -| `vllm/tensorcast/runtime_view.py` and `gpu_model_runner.py` | vllm view, session shutdown retirement, serving policy helpers | runtime attachment/view projection and artifact-runtime retirement actions | runtime view, reload, and shutdown tests use artifact-runtime actions | -| `vllm/tensorcast/builder/*` | serving builder/publication helpers | keep only if the payload is serving-ABI-specific; otherwise move to artifact publication actions | remaining builder imports are documented as internal/offline ABI-specific paths | - -# Performance Migration Gates - -The migration is not complete until the direct artifact-runtime path proves that -the user-facing API change did not move model loading onto a slower or larger -path. - -| Gate | What to prove | Concrete check | -| --- | --- | --- | -| No TensorDict intermediate | `Artifact.realize(... model_runtime ...)` attaches a binding/retained value directly instead of first calling TensorDict materialization. | Direct API tests and vllm startup tests fail if normal model-runtime startup calls TensorDict projection helpers, Python builder materializers, or full state-dict loaders. | -| Fast source selection preserved | Retained, local replica/LIP, P2P, disk, mounted-source/direct-write, and explicit-transform cases report the expected selected source and fallback status. | Artifact-realization reports assert source kind, fallback reason bucket, copy bytes, temporary bytes, retained bytes, and direct-write bytes for each representative path. | -| No extra GPU weight residency | Steady-state runtime attach owns one TensorCast weight residency plus framework runtime-only tensors; direct API migration does not keep both serving and artifact-runtime owners. | vLLM smoke/profile captures CUDA allocated/reserved deltas around startup and reload, checks `_vllm_external_weight_bytes`/retained credit, and verifies old attachment/binding handles are retired. | -| No full host-memory staging | Normal durable, retained, and mounted-source startup do not build a full Python `dict[str, torch.Tensor]`, full safetensors state dict, or full CPU copy of weights. | RSS/profile events and call-site audit keep full host materialization limited to explicit offline builder workflows. | -| Admission timing preserved | Retained reservation bytes are credited before vLLM calculates requested KV/cache memory. | `memory_accounting.py` tests use the neutral claim helper and assert credit before `gpu_worker` startup admission. | -| Latency remains stage-local | Direct artifact-runtime start adds no extra data-plane RPC, session start, retained acquire, or IPC restore beyond the chosen source strategy. | Profile events compare current serving baseline and direct API for startup, IPC open, attach/finalize, source-bound plan, reload, and publication stages; any added stage needs an explicit reason in the report. | -| Reload overlap bounded | In-place reload may temporarily overlap old and new weights only under declared swap semantics. | Reload tests assert active-generation CAS, stale publication retirement, and resource-envelope overlap accounting. | - -These gates are intentionally behavior-based. A rename can pass only when the -resolved source, memory ownership, and timing shape match the current optimal -path for the same compatibility class. - -# Phases & Milestones - -- [x] Phase 1: Freeze The Current vllm Baseline - - [x] Remove the standalone serving-centered design and fold baseline context - into `0120`. - - [x] Record the current vllm code/module state and mark behavior - contracts versus temporary names. - - [x] Capture the vLLM scenario matrix with owner files and expected behavior - in the design. - - [x] Record that current vLLM does not use TensorCast TensorDict as its steady - model-loading path; TensorDict is the equivalence proof for shared - realization semantics. - - [x] Verify no Python SDK artifact metadata or realization path added direct - Global Store access; `0121` guardrails now cover this. - -- [x] Phase 2: Land The Shared Realization Kernel Through `0121` - - [x] Define and export `ArtifactRealizationSpec`, - `ArtifactRealizationHandle`, `ArtifactRealizationReport`, selection, - target, strategy, representation, lifecycle, resource-envelope, - release-contract, and report DTOs. - - [x] Lower `Artifact.tensor_dict(...)`, - `tensor_dict_with_diagnostics(...)`, `tensor_dict_into(...)`, - `tensor_into(...)`, `bind(...)`, and `bind_into(...)` through - `Artifact.realize(...)`. - - [x] Lower retained replica, retained binding, and target-set prefetch through - `Artifact.realize_async(...)` while preserving `Operation[T]`. - - [x] Add TensorDict projection ownership and release-contract lifecycle - coverage. - - [x] Add retained binding/target-set reports, mounted-source realization, - runtime-attachment reports, model-runtime report wrappers, and - publication reports. - - [x] Add direct Global Store guardrails for SDK artifact metadata and - realization paths. - -- [x] Phase 3: Preserve TP, P2P, Publication, And Runtime-Attachment Correctness - - [x] Represent TP as target-set realization with member-local layouts and - source-selection modes. - - [x] Keep same-node collective-first as strategy-plane state. - - [x] Keep P2P direct reuse gated by compatible - representation/topology/member/layout/schema. - - [x] Route runtime attachment, retained acquire close, and publication - projection close through realization release contracts. - - [x] Preserve source coordination, local-ready barrier, active-generation - publication, stale-publication retirement, and shutdown retirement - semantics. - -- [ ] Phase 4: Expose Direct Model-Runtime Realization - - [ ] Export `ArtifactRealizationSpec`, `ArtifactRealizationHandle`, and - `ArtifactRealizationReport` from the intended public SDK package level - and add import smoke tests. - - [ ] Adopt direct - `Artifact.realize(ArtifactRealizationSpec.model_runtime(...), runtime_host=...)` - or an equivalent artifact-rooted signature as the professional framework - API. Do not introduce a new public artifact-runtime session facade. - - [ ] Lower model-runtime realization through the existing runtime attachment - implementation instead of returning `UNIMPLEMENTED`. - - [ ] Ensure that lowering calls the daemon binding/retained/source-bound - paths directly. Direct model-runtime realization must not first call - TensorDict projection helpers, materialize a Python state dict, or start - a second public serving session. - - [ ] Define the runtime host capability surface for construction, trace - capture, tensor surface, runtime-only tensors, finalize hooks, placement - facts, source catalog, collective behavior, semantic probes, reload - admission, and live EP/EPLB checks. - - [ ] Define completed-handle projection semantics: add - `ArtifactRealizationHandle.attachment()` or make `attach(...)` - explicitly projection-only, not a second execution step. - - [ ] Preserve `RuntimeAttachment` as the process-local framework boundary and - keep model object state out of `Artifact`. - - [ ] Add tests that direct model-runtime handle reports match - serving-lifecycle model-runtime reports. - - [ ] Add report assertions for selected source kind, fallback reason, - copy bytes, temporary bytes, retained bytes, direct-write bytes, - IPC-open timing, and attach/finalize timing. - - [ ] Update public examples to use the actual target API shape and current - binding arguments (`mapping` / target-plan DTOs), not stale `layout=...` - placeholders. - -- [ ] Phase 5: Migrate vllm to the successor boundary - - [ ] Port `TensorcastModelLoader` startup from serving session naming to the - successor artifact-runtime API. - - [ ] Port `vllm/tensorcast/host.py` from public `IntegrationHost` - construction to `RuntimeHostCapabilities` construction or a transitional - alias with an explicit deletion trigger. - - [ ] Preserve retained reservation byte credit before vLLM admission through - retained realization claim validation. - - [ ] Introduce neutral retained realization claim helpers and migrate vLLM - memory accounting off retained-serving-binding public helpers. - - [ ] Add vLLM memory-admission tests proving retained credit is applied - before startup admission and is not double-counted after acquire. - - [ ] Port vLLM source bootstrap to the mounted-source artifact contract and - keep `msa1:`/durable artifact admission explicit. - - [ ] Keep durable, retained, and local-source startup off TensorDict and full - Python state-dict paths; direct API startup should attach daemon-owned - tensors through the selected binding/retained/source-bound path. - - [ ] Keep `VLLMTensorcastAdapter` as the owner for model construction, - trace capture, runtime-only tensor rehydration, finalize hooks, and - semantic probes. - - [ ] Add vllm profile/smoke coverage for CUDA allocated/reserved - deltas, host RSS deltas, selected source kind, attach/finalize timing, - reload overlap, and old-handle retirement. - - [ ] Preserve in-place reload response projection, stale/duplicate reload - handling, after-ready publication, required-publication failure state, - stale publication retirement, shutdown retirement, EP/EPLB reload - safety, and drafter sequential failure/unhealthy behavior. - - [ ] Remove normal vllm startup, reload, memory-accounting, - runtime-view, and shutdown imports of public `tensorcast.serving.*` - APIs after replacement paths pass. - -- [ ] Phase 6: Narrow Serving-Centered Names - - [ ] Classify `serving.artifact_locator`, `serving.policy`, `bootstrap.*`, - `materialization.collective`, `retained_binding_acquire.*`, - `replica_publication.*`, and `diagnostics.*` into artifact selection, - representation preflight, source artifact bootstrap, realization - strategy, retained claim, publication policy, and diagnostics fields. - - [ ] Decide which names remain because they describe serving ABI semantics - and which move to artifact/runtime vocabulary. - - [ ] Rename or absorb serving-rooted DTOs such as `ServingBindingTarget`, - `PrefetchedServingBinding`, and `ServingArtifactManifest` only after - replacements exist and tests cover the replacement behavior. - - [ ] Move public docs toward artifact/runtime terminology. - - [ ] Remove broad facades that expose private lifecycle helpers as public API. - - [ ] Delete compatibility aliases and duplicate helper functions in the same - cleanup window; do not leave old and new public surfaces as peers. - - [ ] Rewrite or delete tests that primarily assert the old serving surface - instead of the artifact-runtime contract. - - [ ] Close every applicable entry in the deletion ledger below; Phase 6 is - not complete while any old public serving path remains as a supported - peer of the artifact-runtime path. - -- [ ] Phase 7: Extend To A Second Runtime - - [ ] Use SGLang or a minimal mock runtime adapter to prove the direct - artifact-runtime API is not vLLM-shaped. - - [ ] Confirm source catalog, target layout, runtime-only tensors, and - publication can be represented without vLLM-specific public names. - -# Deletion Ledger - -The migration is delete-forward. Each old surface below must either be removed -from the public API or narrowed to an explicitly internal implementation detail -after its replacement is wired. - -| Current surface | Replacement owner | Delete/internalize after | Guardrail | -| --- | --- | --- | --- | -| `ServingRuntimeSession` public runtime root | `Artifact.realize(... model_runtime ..., runtime_host=...)` plus completed `ArtifactRealizationHandle.attachment()` | vllm startup/reload/shutdown and second-runtime fixture use the direct API | import/call-site search shows no normal public startup path instantiates `ServingRuntimeSession`; smoke tests show no extra binding owner or TensorDict intermediate | -| `ServingConfig` as public runtime request | artifact/runtime request DTOs and profile policy fields | loader/source/reload paths parse the new request and preserve behavior | semantic field-map tests cover durable, source, retained, diagnostics, publication, and reload inputs | -| `serving_runtime_policy` on generic realization specs | neutral runtime profile/preflight policy or binding-specific options object | binding/runtime preflight no longer needs serving-rooted field names | spec construction tests use neutral field names; old field rejected or private | -| `ServingArtifactLocator` | artifact locator or artifact selection locator | durable startup and reload resolve through artifact selection | reload/startup tests assert artifact selection digest and no serving locator authority | -| `ServingBindingTarget` and `ServingBindingSetTarget` | `RealizationTarget` and `RealizationTargetSet` | retained prefetch, TP target-set, and direct runtime startup accept replacement targets | target-set tests cover member layout, source reuse, and collective strategy | -| `PrefetchedServingBinding` and `PrefetchedServingBindingSet` | `PrefetchHandoff` or `RetainedRealizationClaim` | vLLM retained acquire and memory accounting use neutral claim helpers | trusted reservation byte tests pass before admission; old public names absent from normal API | -| `RetainedServingBindingAuthority` and retained-serving helpers | retained realization claim parser/validator | acquire validation, lease restore, and reservation credit are represented neutrally | retained startup tests validate member/device/layout at credit and acquire time | -| serving publication helpers used by normal runtime startup | handle/attachment artifact-runtime publication actions | after-ready publication, reload retirement, shutdown retirement, and local-ready promotion use shared actions | generation/CAS tests cover stale result, duplicate reload, required-publication failure, and shutdown | -| `ServingArtifactManifest` for non-ABI metadata | runtime representation manifest or artifact representation metadata | manifest fields are reclassified into representation/runtime profile terms | preflight tests assert schema/build/topology/contract admission without public serving manifest authority | -| `serving_build_digest` when not serving-ABI-specific | `runtime_build_digest` or `representation_build_digest` | build identity is owned by representation/runtime profile | manifest/build tests explain any remaining serving ABI field | -| `ServingRealizationReport` and serving diagnostics aliases | `ArtifactRealizationReport` and target-specific report payloads | direct model-runtime reports match current serving lifecycle reports | diagnostics tests assert one report model and no duplicate path-specific assertions | -| old tests that assert public serving-session behavior | direct artifact-runtime tests plus private lowering tests where needed | replacement tests pass and internal lowerings are covered directly | test inventory has no compatibility-only public serving tests | - -# Tasks - -- Keep this plan as the post-`0121` migration ledger; do not duplicate the - completed `0121` kernel checklist. -- Add public SDK exports for the realization spec/handle/report symbols and - verify `import tensorcast as tc; tc.ArtifactRealizationSpec` works. -- Implement the direct model-runtime API path from `Artifact.realize(...)` to the - existing runtime attachment lowerings, preserving the current report and - release-contract behavior. -- Preserve the current optimal weight-loading data paths while changing the API: - retained acquire, local-replica/LIP, compatible P2P, local mounted-source/disk - streaming, and explicit transforms must remain distinguishable in reports. -- Define the completed-handle runtime attachment projection API and update tests - so `attach(...)`, if retained, is not treated as a separate execution step. -- Rename or wrap `IntegrationHost`-style serving host capabilities into the - artifact-runtime professional API without changing framework behavior. -- Move `serving_runtime_policy` out of the generic realization spec or fence it - behind a transitional binding/runtime-profile options object with a deletion - trigger. -- Build a current vllm import/call-site table showing every remaining - `ServingRuntimeSession`, `ServingConfig`, retained-binding, publication, and - runtime-view dependency. -- Create the semantic field map from current `model_loader_extra_config` to - artifact/runtime terminology. -- Define the retained realization claim public/professional naming path while - preserving current trusted reservation validation inputs. -- Formalize the existing publication generation/CAS contract shared by - after-ready publication, reload retirement, and shutdown retirement as the - vLLM migration acceptance rule. -- Add direct model-runtime realization tests once the public lowering exists. -- Add or update vllm smoke/integration tests for startup, retained - memory credit, local source cold start, durable artifact startup, in-place - reload, after-ready publication, shutdown retirement, EP/EPLB rejection, and - draft partial-failure/unhealthy behavior. -- Add vllm profile checks for CUDA allocated/reserved deltas, host RSS, - selected source kind, copy/temporary/direct-write bytes, attach/finalize - timing, reload overlap, and old-handle retirement. -- Add direct artifact-runtime second-framework tests or fixtures before retiring - serving-centered public vocabulary broadly. -- Delete old serving-session public tests, compatibility wrappers, and redundant - diagnostics assertions after the replacement tests pass; keep only tests that - exercise internal lowerings still intentionally owned by serving modules. -- Maintain the deletion ledger above as implementation work proceeds; every - temporary serving compatibility object must have a replacement, owner, - guardrail, and removal trigger. -- Add a vllm call-site search check before cleanup completion so - normal startup, reload, memory accounting, runtime view, and shutdown do not - import public `tensorcast.serving.*` APIs. - -# Test / Rollout / Recovery - -Validation now splits completed `0121` kernel guardrails from remaining `0120` -migration checks. - -Completed kernel guardrails: - -- `source .venv/bin/activate && pytest tests/python/api/test_realization_kernel.py` -- `source .venv/bin/activate && pytest tests/python/api/test_artifact_handle.py` -- `source .venv/bin/activate && pytest tests/python/api/test_prefetch_operation.py` -- `source .venv/bin/activate && pytest tests/python/test_serving_integration.py` -- `source .venv/bin/activate && pytest tests/python/test_serving_replica_publication.py` - -Remaining migration checks: - -- public SDK import/export smoke tests for `tc.ArtifactRealizationSpec`, - `tc.ArtifactRealizationHandle`, and `tc.ArtifactRealizationReport`; -- direct model-runtime realization tests once `Artifact.realize(model_runtime)` - no longer fails closed; -- completed-handle runtime attachment projection tests proving no second attach - execution path exists; -- vllm smoke/integration tests for startup, reload, publication, and - retained credit; -- performance migration gates for no TensorDict intermediate, source selection, - no extra GPU/host full-weight residency, pre-admission retained credit, stage - timing, and bounded reload overlap; -- direct artifact-runtime second-framework adapter proof before broad serving - vocabulary retirement; -- cleanup guardrails proving old public serving-session entrypoints, - compatibility aliases, duplicate diagnostics paths, and redundant tests are - deleted or internalized after replacement; -- deletion-ledger audit showing each old public serving surface is removed, - private, or serving-ABI-specific with an owner; -- vllm import/call-site audit proving normal startup, reload, - memory-accounting, runtime-view, and shutdown paths use artifact-runtime - APIs; -- C++ daemon/core tests only when proto, materialization, binding, or P2P - behavior changes. - -- Execute TensorCast and vllm changes together, because both codebases - are under our control. -- Recovery is behavior-based and delete-forward: if the new API shape is wrong, - revise the refactor before landing rather than preserving a parallel - compatibility layer. - -# Risks & Tracking - -- Artifact API overgrowth: track whether proposed methods are durable artifact - lifecycle operations or realization-handle projection/actions. -- TensorDict split-brain: track whether TensorDict tests still exercise a - separate materialization path that bypasses realization specs, strategy - selection, diagnostics, or P2P compatibility checks. -- vLLM timing regression: specifically track retained reservation memory credit - before admission. -- Weight-loading fast-path regression: track whether direct model-runtime - startup keeps retained acquire, local-replica/LIP, P2P, disk, mounted-source, - and explicit-transform cases on their intended paths. -- Hidden memory duplication: track GPU allocated/reserved deltas, host RSS, - live binding owners, reload overlap, and compatibility wrappers that keep old - handles alive after artifact-runtime attachment. -- Source authority split-brain: track whether local source bootstrap can run - without an admitted `msa1:` or durable artifact subject. -- Reload/publication race: track whether publication, reload, and shutdown - retirement compare the active attachment or binding-value generation before - mutating state. -- Drafter partial reload: track whether main/draft behavior is target-set atomic - or explicitly sequential with worker-unhealthy marking. -- TP/P2P semantic confusion: require explicit compatibility validation before - direct P2P member reuse. -- Hidden framework leakage: keep vLLM model attributes and finalize hooks inside - runtime adapters and attachments. -- Naming churn without semantic gain: do not rename a class until ownership is - clear and the vLLM migration path is documented. -- Dual-stack drift: every compatibility adapter or old public entrypoint must - have an owner, replacement, test coverage, and deletion trigger. A migration - phase is not complete while old and new public paths both remain supported. -- Redundant test drift: tests that assert old serving-session behavior must be - rewritten to assert artifact-runtime behavior or deleted after internal - lowering coverage exists. diff --git a/examples/runtime_realization_reference_consumer/README.md b/examples/runtime_realization_reference_consumer/README.md new file mode 100644 index 00000000..0536d79f --- /dev/null +++ b/examples/runtime_realization_reference_consumer/README.md @@ -0,0 +1,18 @@ +# Runtime Realization Reference Consumer + +This example is a minimal TensorCast-side consumer for the runtime realization +prefetch/acquire flow. It is intentionally independent of internal-vLLM so the +daemon API can be exercised as a public reference path. + +```bash +source .venv/bin/activate +python examples/runtime_realization_reference_consumer/reference_consumer.py \ + --daemon-address 127.0.0.1:8073 \ + --source-artifact-id mi2: \ + --device-uuid +``` + +The parent process writes a resolved realization target cache entry, calls the +daemon `PrefetchServingBinding` wire RPC, and launches a worker subprocess that +reconstructs a `RealizationTarget` plus `PrefetchHandoff`, calls +`AcquireBindingValue`, and releases the returned lease. diff --git a/examples/serving_binding_consumer/reference_consumer.py b/examples/runtime_realization_reference_consumer/reference_consumer.py similarity index 91% rename from examples/serving_binding_consumer/reference_consumer.py rename to examples/runtime_realization_reference_consumer/reference_consumer.py index a5abd881..48721a37 100644 --- a/examples/serving_binding_consumer/reference_consumer.py +++ b/examples/runtime_realization_reference_consumer/reference_consumer.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # Copyright (c) 2026, TensorCast Team. -"""Minimal serving-binding consumer for TensorCast prefetch/acquire flows.""" +"""Minimal runtime realization consumer for TensorCast prefetch/acquire flows.""" from __future__ import annotations @@ -14,7 +14,7 @@ from pathlib import Path from tensorcast.api.store import ( - ReferenceServingTensorSpec, + ReferenceRuntimeTensorSpec, acquire_reference_binding, build_reference_resolved_spec, prefetch_reference_binding, @@ -24,12 +24,12 @@ ) from tensorcast.daemon_ctl import DaemonCtl from tensorcast.proto.operation.v1 import operation_pb2 -from tensorcast.types import PrefetchedServingBinding, ServingBindingTarget +from tensorcast.types import PrefetchHandoff, RealizationTarget def _parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( - description="Run a minimal TensorCast serving-binding prefetch/acquire flow." + description="Run a minimal TensorCast runtime realization prefetch/acquire flow." ) parser.add_argument("--daemon-address", default="127.0.0.1:8073") parser.add_argument("--source-artifact-id", required=True) @@ -48,10 +48,10 @@ def _worker_main(args: argparse.Namespace) -> None: raise SystemExit("--target-path and --prefetched-path are required for worker") target_proto = operation_pb2.ServingBindingTarget() target_proto.ParseFromString(Path(args.target_path).read_bytes()) - target = ServingBindingTarget.from_proto(target_proto) + target = RealizationTarget.from_proto(target_proto) prefetched_proto = operation_pb2.PrefetchServingBindingResult() prefetched_proto.ParseFromString(Path(args.prefetched_path).read_bytes()) - prefetched = PrefetchedServingBinding.from_proto(prefetched_proto) + prefetched = PrefetchHandoff.from_proto(prefetched_proto) client = DaemonCtl(args.daemon_address) acquired = acquire_reference_binding( client, @@ -80,7 +80,7 @@ def _worker_main(args: argparse.Namespace) -> None: def _parent_main(args: argparse.Namespace) -> None: cache_root = Path(args.cache_root) if args.cache_root else Path(tempfile.mkdtemp()) client = DaemonCtl(args.daemon_address) - tensor = ReferenceServingTensorSpec( + tensor = ReferenceRuntimeTensorSpec( name=args.tensor_name, size_bytes=args.tensor_size_bytes, shape=(args.tensor_size_bytes,), diff --git a/examples/runtime_reference_framework/README.md b/examples/runtime_reference_framework/README.md new file mode 100644 index 00000000..7fe34935 --- /dev/null +++ b/examples/runtime_reference_framework/README.md @@ -0,0 +1,22 @@ +# Runtime Reference Framework + +This example is the smallest TensorCast Level 1 framework integration shape. +It consumes an existing durable artifact through +`Artifact.realize(... model_runtime ...)` and validates the integration with the +artifact-runtime conformance kit. + +The runtime path intentionally uses only: + +- `tensorcast` +- `tensorcast.artifact_runtime.host` +- `tensorcast.artifact_runtime.testing` + +It does not import `tensorcast.serving`, vLLM, source catalog helpers, retained +preload helpers, `ArtifactRuntimeSession`, or low-level bind/swap/restore +functions. + +Run: + +```bash +python examples/runtime_reference_framework/reference_framework.py +``` diff --git a/examples/serving_runtime_reference_framework/reference_framework.py b/examples/runtime_reference_framework/reference_framework.py similarity index 78% rename from examples/serving_runtime_reference_framework/reference_framework.py rename to examples/runtime_reference_framework/reference_framework.py index f0ae58c1..a9459c35 100644 --- a/examples/serving_runtime_reference_framework/reference_framework.py +++ b/examples/runtime_reference_framework/reference_framework.py @@ -1,5 +1,5 @@ # Copyright (c) 2026, TensorCast Team. -"""Minimal Level 1 TensorCast serving runtime framework integration.""" +"""Minimal Level 1 TensorCast artifact-runtime framework integration.""" from __future__ import annotations @@ -8,9 +8,9 @@ import torch -import tensorcast.serving.hosts as tc_hosts -import tensorcast.serving.runtime as tc_runtime -import tensorcast.serving.testing as tc_testing +import tensorcast as tc +import tensorcast.artifact_runtime.host as tc_runtime_host +import tensorcast.artifact_runtime.testing as tc_testing class ReferenceRuntimeModel: @@ -25,9 +25,9 @@ def __init__(self) -> None: class ReferenceFrameworkHost: """Framework-owned model construction and semantic facts.""" - def identity(self, model_config: object) -> tc_hosts.FrameworkIdentity: + def identity(self, model_config: object) -> tc_runtime_host.FrameworkIdentity: del model_config - return tc_hosts.FrameworkIdentity( + return tc_runtime_host.FrameworkIdentity( framework_name="referencefw", framework_version="0", adapter_version="level1-example", @@ -83,9 +83,9 @@ class ReferencePlacementHost: def identity_facts( self, framework_config: object | None, - ) -> tc_hosts.PlacementIdentityFacts: + ) -> tc_runtime_host.PlacementIdentityFacts: del framework_config - return tc_hosts.PlacementIdentityFacts( + return tc_runtime_host.PlacementIdentityFacts( tensor_parallel_rank=0, tensor_parallel_size=1, pipeline_parallel_rank=0, @@ -97,16 +97,16 @@ def identity_facts( def admission_facts( self, framework_config: object | None, - ) -> tc_hosts.PlacementAdmissionFacts: + ) -> tc_runtime_host.PlacementAdmissionFacts: del framework_config - return tc_hosts.PlacementAdmissionFacts() + return tc_runtime_host.PlacementAdmissionFacts() def member_facts( self, framework_config: object | None, - ) -> tc_hosts.PlacementMemberFacts: + ) -> tc_runtime_host.PlacementMemberFacts: del framework_config - return tc_hosts.PlacementMemberFacts( + return tc_runtime_host.PlacementMemberFacts( runtime_rank=0, runtime_world_size=1, member_id="member-0", @@ -118,9 +118,9 @@ def member_facts( def execution_facts( self, framework_config: object | None, - ) -> tc_hosts.MaterializationExecutionFacts: + ) -> tc_runtime_host.MaterializationExecutionFacts: del framework_config - return tc_hosts.MaterializationExecutionFacts( + return tc_runtime_host.MaterializationExecutionFacts( collective_rank=0, collective_world_size=1, tensor_parallel_ranks=(0,), @@ -205,41 +205,21 @@ def validate_tensor_invariants( raise AssertionError("reference tensor invariants changed") -def build_reference_host() -> tc_hosts.IntegrationHost: - """Build the minimal host object a framework passes to TensorCast.""" +def build_reference_host() -> tc.RuntimeHostCapabilities: + """Build the minimal runtime host object a framework passes to TensorCast.""" - return tc_hosts.IntegrationHost( + return tc.RuntimeHostCapabilities( framework=ReferenceFrameworkHost(), placement=ReferencePlacementHost(), tensor_surface=ReferenceTensorSurface(), ) -def create_session() -> tc_runtime.ServingRuntimeSession: - """Create a Level 1 session shape for a durable serving artifact selector.""" - - return tc_runtime.ServingRuntimeSession.from_config( - { - "bootstrap": { - "mode": "disabled", - }, - "serving": { - "selector": { - "kind": "artifact_ref", - "value": "mi2:serving", - }, - }, - }, - host=build_reference_host(), - ) - - def run_level1_conformance() -> tc_testing.ConformanceResult: """Run the TensorCast Level 1 conformance kit against this host.""" - return tc_testing.assert_level1_runtime_conformance( - tc_runtime, - tc_hosts, + return tc_testing.assert_level1_artifact_runtime_conformance( + tc, host=build_reference_host(), ) diff --git a/examples/serving_binding_consumer/README.md b/examples/serving_binding_consumer/README.md deleted file mode 100644 index 12b27b58..00000000 --- a/examples/serving_binding_consumer/README.md +++ /dev/null @@ -1,17 +0,0 @@ -# Serving Binding Reference Consumer - -This example is a minimal TensorCast-side consumer for the serving binding -prefetch/acquire flow. It is intentionally independent of vllm so the -daemon API can be exercised as a public reference path. - -```bash -source .venv/bin/activate -python examples/serving_binding_consumer/reference_consumer.py \ - --daemon-address 127.0.0.1:8073 \ - --source-artifact-id mi2: \ - --device-uuid -``` - -The parent process writes a resolved serving binding spec cache entry, calls -`PrefetchServingBinding`, and launches a worker subprocess that calls -`AcquireBindingValue` and releases the returned lease. diff --git a/examples/serving_runtime_reference_framework/README.md b/examples/serving_runtime_reference_framework/README.md deleted file mode 100644 index 054ed7ab..00000000 --- a/examples/serving_runtime_reference_framework/README.md +++ /dev/null @@ -1,21 +0,0 @@ -# Serving Runtime Reference Framework - -This example is the smallest TensorCast Level 1 framework integration shape. -It consumes an existing durable serving artifact through -`ServingRuntimeSession` and validates the integration with the conformance kit. - -The runtime path intentionally uses only: - -- `tensorcast.serving.runtime` -- `tensorcast.serving.hosts` -- `tensorcast.serving.testing` - -It does not import `tensorcast.serving.integration`, builder/admin modules, -vLLM, source catalog helpers, retained preload helpers, or low-level -bind/swap/restore functions. - -Run: - -```bash -python examples/serving_runtime_reference_framework/reference_framework.py -``` diff --git a/pyproject.toml b/pyproject.toml index c9d30661..c8764374 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,9 +60,9 @@ dev = [ "hypothesis>=6.135.26", "ipdb>=0.13.13", "ipynb>=0.5.1", - "mypy>=1.15.0", + "mypy==2.1.0", "pre-commit>=4.2.0", - "pyright>=1.1.403", + "pyright==1.1.409", "pytest>=8.3.5", "pytest-asyncio>=0.26.0", "pytest-xdist>=3.8.0", diff --git a/tensorcast/__init__.py b/tensorcast/__init__.py index 234b4ed4..01d29974 100644 --- a/tensorcast/__init__.py +++ b/tensorcast/__init__.py @@ -200,6 +200,9 @@ def _install_c_extension_bootstrap() -> None: "ArtifactDescriptor": ("tensorcast.api", "ArtifactDescriptor"), "ArtifactError": ("tensorcast.api", "ArtifactError"), "ArtifactFuture": ("tensorcast.api", "ArtifactFuture"), + "ArtifactRealizationHandle": ("tensorcast.api", "ArtifactRealizationHandle"), + "ArtifactRealizationReport": ("tensorcast.api", "ArtifactRealizationReport"), + "ArtifactRealizationSpec": ("tensorcast.api", "ArtifactRealizationSpec"), "AssemblyAttemptRef": ("tensorcast.api", "AssemblyAttemptRef"), "BindingReservationCapability": ( "tensorcast.api", @@ -219,44 +222,42 @@ def _install_c_extension_bootstrap() -> None: "PlanType": ("tensorcast.api", "PlanType"), "RegisterArtifactOptions": ("tensorcast.api", "RegisterArtifactOptions"), "RegisteredArtifact": ("tensorcast.api", "RegisteredArtifact"), - "RegisteredServingPublication": ( - "tensorcast.api", - "RegisteredServingPublication", - ), "PrefetchRetentionPolicy": ("tensorcast.api", "PrefetchRetentionPolicy"), - "PrefetchedServingBinding": ("tensorcast.api", "PrefetchedServingBinding"), - "PrefetchedServingBindingSet": ( + "PrefetchHandoff": ("tensorcast.api", "PrefetchHandoff"), + "PrefetchHandoffMemberFailure": ( "tensorcast.api", - "PrefetchedServingBindingSet", + "PrefetchHandoffMemberFailure", ), + "PrefetchHandoffSet": ("tensorcast.api", "PrefetchHandoffSet"), "RegisteredLease": ("tensorcast.api", "RegisteredLease"), "RegistrationResult": ("tensorcast.api", "RegistrationResult"), - "ServingBindingMemberRef": ("tensorcast.api", "ServingBindingMemberRef"), - "ServingBindingReadiness": ("tensorcast.api", "ServingBindingReadiness"), - "ServingBindingResolvedLayout": ( + "RuntimeBindingMemberRef": ("tensorcast.api", "RuntimeBindingMemberRef"), + "RuntimeBindingReadiness": ("tensorcast.api", "RuntimeBindingReadiness"), + "RuntimeBindingResolvedLayout": ( "tensorcast.api", - "ServingBindingResolvedLayout", + "RuntimeBindingResolvedLayout", ), - "ServingBindingResolvedSpecCacheEntry": ( + "RuntimeRealizationSpecCacheEntry": ( "tensorcast.api", - "ServingBindingResolvedSpecCacheEntry", + "RuntimeRealizationSpecCacheEntry", ), - "ServingBindingSetTarget": ("tensorcast.api", "ServingBindingSetTarget"), - "ServingBindingSourceKind": ("tensorcast.api", "ServingBindingSourceKind"), - "ServingBindingSourceMemberRef": ( + "RuntimeBindingSourceKind": ("tensorcast.api", "RuntimeBindingSourceKind"), + "RuntimeBindingSourceMemberRef": ( "tensorcast.api", - "ServingBindingSourceMemberRef", + "RuntimeBindingSourceMemberRef", ), - "ServingBindingSourceRef": ("tensorcast.api", "ServingBindingSourceRef"), - "ServingBindingSourceReuseDecision": ( + "RuntimeBindingSourceRef": ("tensorcast.api", "RuntimeBindingSourceRef"), + "RuntimeBindingSourceReuseDecision": ( "tensorcast.api", - "ServingBindingSourceReuseDecision", + "RuntimeBindingSourceReuseDecision", ), - "ServingBindingSourceReuseMode": ( + "RuntimeBindingSourceReuseMode": ( "tensorcast.api", - "ServingBindingSourceReuseMode", + "RuntimeBindingSourceReuseMode", ), - "ServingBindingTarget": ("tensorcast.api", "ServingBindingTarget"), + "RuntimeTopologyRef": ("tensorcast.api", "RuntimeTopologyRef"), + "RealizationTarget": ("tensorcast.api", "RealizationTarget"), + "RealizationTargetSet": ("tensorcast.api", "RealizationTargetSet"), "SealAssemblyResult": ("tensorcast.api", "SealAssemblyResult"), "SERVING_MANIFEST_TENSOR_NAME": ("tensorcast.api", "SERVING_MANIFEST_TENSOR_NAME"), "ViewRegistrationKind": ("tensorcast.api", "ViewRegistrationKind"), @@ -276,14 +277,6 @@ def _install_c_extension_bootstrap() -> None: "GroupVersionSetRef": ("tensorcast.api", "GroupVersionSetRef"), "GovernanceContext": ("tensorcast.api", "GovernanceContext"), "DirectorySnapshot": ("tensorcast.api", "DirectorySnapshot"), - "CapabilityDirectoryClient": ( - "tensorcast.capability_directory", - "CapabilityDirectoryClient", - ), - "CapabilityDirectoryOptions": ( - "tensorcast.capability_directory", - "CapabilityDirectoryOptions", - ), "Operation": ("tensorcast.api", "Operation"), "OperationError": ("tensorcast.api", "OperationError"), "OperationStatus": ("tensorcast.api", "OperationStatus"), @@ -295,10 +288,6 @@ def _install_c_extension_bootstrap() -> None: "PlanStepResult": ("tensorcast.api", "PlanStepResult"), "PartialSealResult": ("tensorcast.api", "PartialSealResult"), "PublicDiskSourceHandle": ("tensorcast.api", "PublicDiskSourceHandle"), - "PreparedServingRegistration": ( - "tensorcast.api", - "PreparedServingRegistration", - ), "PublishedModelVersion": ("tensorcast.api", "PublishedModelVersion"), "ExecutionDiagnostics": ("tensorcast.api", "ExecutionDiagnostics"), "BindingUpdateEpoch": ("tensorcast.api", "BindingUpdateEpoch"), @@ -321,23 +310,138 @@ def _install_c_extension_bootstrap() -> None: "tensorcast.api", "RepresentationPublishSpec", ), + "RuntimeArtifactBuildIntent": ("tensorcast.api", "RuntimeArtifactBuildIntent"), + "RuntimeArtifactManifest": ("tensorcast.api", "RuntimeArtifactManifest"), + "RuntimeArtifactPolicy": ("tensorcast.api", "RuntimeArtifactPolicy"), + "RuntimeArtifactPolicyInput": ("tensorcast.api", "RuntimeArtifactPolicyInput"), "SourceBoundCapability": ("tensorcast.api", "SourceBoundCapability"), - "ServingPublicationSubject": ("tensorcast.api", "ServingPublicationSubject"), - "ServingAdmissionFacts": ("tensorcast.api", "ServingAdmissionFacts"), - "ServingArtifactManifest": ("tensorcast.api", "ServingArtifactManifest"), - "SERVING_BUILD_DIGEST_VERSION": ( + "coerce_runtime_artifact_policy": ( "tensorcast.api", - "SERVING_BUILD_DIGEST_VERSION", + "coerce_runtime_artifact_policy", ), - "ServingBuildIntent": ("tensorcast.api", "ServingBuildIntent"), - "ServingRuntimePolicy": ("tensorcast.api", "ServingRuntimePolicy"), - "ServingSupportLevel": ("tensorcast.api", "ServingSupportLevel"), "Instance": ("tensorcast.api", "Instance"), "InstanceExecutionRoute": ("tensorcast.api", "InstanceExecutionRoute"), "Worker": ("tensorcast.api", "Worker"), "TargetSpec": ("tensorcast.api", "TargetSpec"), "TransformSpec": ("tensorcast.api", "TransformSpec"), "Runtime": ("tensorcast.api", "Runtime"), + "RuntimeAttachment": ( + "tensorcast.artifact_runtime.attachment", + "RuntimeAttachment", + ), + "RuntimeBindingState": ( + "tensorcast.artifact_runtime.attachment", + "RuntimeBindingState", + ), + "RuntimeAdmissionDecision": ( + "tensorcast.artifact_runtime.host", + "RuntimeAdmissionDecision", + ), + "RuntimeAdmissionPolicy": ( + "tensorcast.artifact_runtime.host", + "RuntimeAdmissionPolicy", + ), + "RuntimeAdmissionRequest": ( + "tensorcast.artifact_runtime.host", + "RuntimeAdmissionRequest", + ), + "RuntimeHostCapabilities": ( + "tensorcast.artifact_runtime.host", + "RuntimeHostCapabilities", + ), + "RuntimePlacement": ("tensorcast.artifact_runtime.host", "RuntimePlacement"), + "RuntimeProfile": ("tensorcast.artifact_runtime.host", "RuntimeProfile"), + "RuntimeTensorView": ("tensorcast.artifact_runtime.host", "RuntimeTensorView"), + "ArtifactLocator": ("tensorcast.artifact_runtime.locator", "ArtifactLocator"), + "RuntimeArtifactLocator": ( + "tensorcast.artifact_runtime.config", + "RuntimeArtifactLocator", + ), + "RuntimeStartPlanError": ( + "tensorcast.artifact_runtime.config", + "RuntimeStartPlanError", + ), + "RuntimePolicy": ("tensorcast.artifact_runtime.policy", "RuntimePolicy"), + "RuntimeRealizationReport": ( + "tensorcast.artifact_runtime.diagnostics", + "RuntimeRealizationReport", + ), + "TensorCastRuntimeConfig": ( + "tensorcast.artifact_runtime.config", + "TensorCastRuntimeConfig", + ), + "plan_runtime_start": ("tensorcast.artifact_runtime.config", "plan_runtime_start"), + "RuntimeRequestContext": ( + "tensorcast.artifact_runtime.intent", + "RuntimeRequestContext", + ), + "ModelAttributeNames": ("tensorcast.artifact_runtime.state", "ModelAttributeNames"), + "ModelAttributeRuntimeState": ( + "tensorcast.artifact_runtime.state", + "ModelAttributeRuntimeState", + ), + "OneShotRuntimeHook": ("tensorcast.artifact_runtime.state", "OneShotRuntimeHook"), + "BindingValueRefProjection": ( + "tensorcast.artifact_runtime.view", + "BindingValueRefProjection", + ), + "RuntimeEndpointProjection": ( + "tensorcast.artifact_runtime.view", + "RuntimeEndpointProjection", + ), + "RuntimeWorkerView": ("tensorcast.artifact_runtime.view", "RuntimeWorkerView"), + "SourceSelectionProjection": ( + "tensorcast.artifact_runtime.view", + "SourceSelectionProjection", + ), + "WeightVersionProjection": ( + "tensorcast.artifact_runtime.view", + "WeightVersionProjection", + ), + "aggregate_runtime_view_outputs": ( + "tensorcast.artifact_runtime.view", + "aggregate_runtime_view_outputs", + ), + "RuntimeReplicaPublicationSettings": ( + "tensorcast.artifact_runtime.publication.actions", + "RuntimeReplicaPublicationSettings", + ), + "RetainedRealizationClaim": ( + "tensorcast.retained_realization", + "RetainedRealizationClaim", + ), + "RetainedRealizationExpectedDigests": ( + "tensorcast.retained_realization", + "RetainedRealizationExpectedDigests", + ), + "project_runtime_replica_publication_state": ( + "tensorcast.artifact_runtime.publication.actions", + "project_runtime_replica_publication_state", + ), + "publish_runtime_replica": ( + "tensorcast.artifact_runtime.publication.actions", + "publish_runtime_replica", + ), + "reload_runtime_attachment": ( + "tensorcast.artifact_runtime.reload", + "reload_runtime_attachment", + ), + "merge_runtime_reload_extra_config": ( + "tensorcast.artifact_runtime.reload", + "merge_runtime_reload_extra_config", + ), + "normalize_runtime_reload_request_payload": ( + "tensorcast.artifact_runtime.reload", + "normalize_runtime_reload_request_payload", + ), + "retire_runtime_replica": ( + "tensorcast.artifact_runtime.publication.actions", + "retire_runtime_replica", + ), + "runtime_replica_publication_settings": ( + "tensorcast.artifact_runtime.publication.actions", + "runtime_replica_publication_settings", + ), "SignalSnapshot": ("tensorcast.api", "SignalSnapshot"), "TensorCastDirectory": ("tensorcast.api", "TensorCastDirectory"), "TensorCastSignals": ("tensorcast.api", "TensorCastSignals"), @@ -346,10 +450,29 @@ def _install_c_extension_bootstrap() -> None: "connect": ("tensorcast.api", "connect"), "context": ("tensorcast.api", "context"), "plan": ("tensorcast.api", "plan"), - "runtime": ("tensorcast.api", "runtime"), "RetentionHandle": ("tensorcast.retention", "RetentionHandle"), "acquire_retention_handle": ("tensorcast.retention", "acquire_retention_handle"), + "parse_retained_realization_claim": ( + "tensorcast.retained_realization", + "parse_retained_realization_claim", + ), "renew_retention_handle": ("tensorcast.retention", "renew_retention_handle"), + "retained_realization_claim_extra_from_handoff": ( + "tensorcast.retained_realization", + "retained_realization_claim_extra_from_handoff", + ), + "retained_realization_claim_extra_json_from_handoff": ( + "tensorcast.retained_realization", + "retained_realization_claim_extra_json_from_handoff", + ), + "retained_realization_claim_mode": ( + "tensorcast.retained_realization", + "retained_realization_claim_mode", + ), + "retained_realization_trusted_reservation_bytes": ( + "tensorcast.retained_realization", + "retained_realization_trusted_reservation_bytes", + ), "release_retention_handle": ("tensorcast.retention", "release_retention_handle"), "artifact": ("tensorcast.api.store", "artifact"), "artifact_async": ("tensorcast.api.store", "artifact_async"), @@ -422,18 +545,10 @@ def _install_c_extension_bootstrap() -> None: "tensorcast.api.store", "build_binding_finalize_publication_bundle", ), - "build_serving_publication_bundle": ( - "tensorcast.api.store", - "build_serving_publication_bundle", - ), "build_pure_transform_publication_bundle_from_registered_artifact": ( "tensorcast.api.store", "build_pure_transform_publication_bundle_from_registered_artifact", ), - "build_serving_publication_bundle_from_registered_artifact": ( - "tensorcast.api.store", - "build_serving_publication_bundle_from_registered_artifact", - ), "build_pure_transform_publication_spec": ( "tensorcast.api.store", "build_pure_transform_publication_spec", @@ -478,42 +593,10 @@ def _install_c_extension_bootstrap() -> None: "tensorcast.api.store", "complete_structural_representation_publish_attempt", ), - "build_serving_manifest_ref": ( - "tensorcast.api.store", - "build_serving_manifest_ref", - ), - "coerce_serving_runtime_policy": ( - "tensorcast.api.store", - "coerce_serving_runtime_policy", - ), "compute_pure_transform_representation_contract_hash": ( "tensorcast.api.store", "compute_pure_transform_representation_contract_hash", ), - "compute_serving_tensor_schema_hash": ( - "tensorcast.api.store", - "compute_serving_tensor_schema_hash", - ), - "count_canonical_serving_tensors": ( - "tensorcast.api.store", - "count_canonical_serving_tensors", - ), - "prepare_pure_transform_serving_registration": ( - "tensorcast.api.store", - "prepare_pure_transform_serving_registration", - ), - "prepare_binding_finalize_serving_registration": ( - "tensorcast.api.store", - "prepare_binding_finalize_serving_registration", - ), - "prepare_serving_registration": ( - "tensorcast.api.store", - "prepare_serving_registration", - ), - "parse_serving_manifest_ref": ( - "tensorcast.api.store", - "parse_serving_manifest_ref", - ), "init": ("tensorcast.startup", "init"), "PortConfig": ("tensorcast.startup", "PortConfig"), "is_initialized": ("tensorcast.startup", "is_initialized"), @@ -522,11 +605,19 @@ def _install_c_extension_bootstrap() -> None: context: Any plan: Any +runtime: Any +RuntimeAdmissionDecision: Any +RuntimeAdmissionPolicy: Any +RuntimeAdmissionRequest: Any +RuntimePlacement: Any +RuntimeProfile: Any +RuntimeTensorView: Any +coerce_runtime_artifact_policy: Any def __getattr__(name: str) -> Any: - if name == "serving": - module = importlib.import_module("tensorcast.serving") + if name == "runtime": + module = importlib.import_module("tensorcast.runtime") globals()[name] = module return module if name not in _LAZY_ATTRS: @@ -539,16 +630,18 @@ def __getattr__(name: str) -> Any: def __dir__() -> list[str]: - return sorted(set(globals()).union(_LAZY_ATTRS).union({"serving"})) + return sorted(set(globals()).union(_LAZY_ATTRS).union({"runtime"})) if TYPE_CHECKING: - import tensorcast.serving as serving # noqa: F401 from tensorcast.api import ( # noqa: F401 Artifact, ArtifactDescriptor, ArtifactError, ArtifactFuture, + ArtifactRealizationHandle, + ArtifactRealizationReport, + ArtifactRealizationSpec, BindingRealizationEntry, BindingRealizationPlan, BindingReservationCapability, @@ -558,8 +651,6 @@ def __dir__() -> list[str]: CallContext, CanonicalIndex, CanonicalIndexEntry, - CapabilityDirectoryClient, - CapabilityDirectoryOptions, CollectiveLoadGroup, DirectorySnapshot, ExecutionDiagnostics, @@ -584,30 +675,33 @@ def __dir__() -> list[str]: PlanStepRef, PlanStepResult, PlanType, - PrefetchedServingBinding, - PrefetchedServingBindingSet, + PrefetchHandoff, + PrefetchHandoffMemberFailure, + PrefetchHandoffSet, PrefetchRetentionPolicy, - PreparedServingRegistration, PublicDiskSourceHandle, + RealizationTarget, + RealizationTargetSet, RegisterArtifactOptions, RegisteredArtifact, RegisteredLease, - RegisteredServingPublication, RegistrationResult, RetentionHandle, Runtime, - ServingBindingMemberRef, - ServingBindingReadiness, - ServingBindingResolvedLayout, - ServingBindingResolvedSpecCacheEntry, - ServingBindingSetTarget, - ServingBindingSourceKind, - ServingBindingSourceMemberRef, - ServingBindingSourceRef, - ServingBindingSourceReuseDecision, - ServingBindingSourceReuseMode, - ServingBindingTarget, - ServingPublicationSubject, + RuntimeArtifactBuildIntent, + RuntimeArtifactManifest, + RuntimeArtifactPolicy, + RuntimeArtifactPolicyInput, + RuntimeBindingMemberRef, + RuntimeBindingReadiness, + RuntimeBindingResolvedLayout, + RuntimeBindingSourceKind, + RuntimeBindingSourceMemberRef, + RuntimeBindingSourceRef, + RuntimeBindingSourceReuseDecision, + RuntimeBindingSourceReuseMode, + RuntimeRealizationSpecCacheEntry, + RuntimeTopologyRef, SignalSnapshot, SourceBoundCapability, Store, @@ -630,8 +724,6 @@ def __dir__() -> list[str]: artifact, artifact_async, binding_realization_plan_to_proto, - build_serving_publication_bundle, - build_serving_publication_bundle_from_registered_artifact, complete_pure_transform_publication, deregister_artifact, from_disk, @@ -639,7 +731,6 @@ def __dir__() -> list[str]: normalize_binding_realization_plan, persist_artifact, persistence_operation, - prepare_serving_registration, promote_mounted_source, put, put_async, @@ -654,6 +745,57 @@ def __dir__() -> list[str]: store, unregister_vram_region, ) + from tensorcast.artifact_runtime.attachment import ( # noqa: F401 + RuntimeAttachment, + RuntimeBindingState, + ) + from tensorcast.artifact_runtime.config import ( # noqa: F401 + RuntimeArtifactLocator, + RuntimeStartPlanError, + TensorCastRuntimeConfig, + plan_runtime_start, + ) + from tensorcast.artifact_runtime.diagnostics import ( + RuntimeRealizationReport, # noqa: F401 + ) + from tensorcast.artifact_runtime.host import RuntimeHostCapabilities # noqa: F401 + from tensorcast.artifact_runtime.intent import RuntimeRequestContext # noqa: F401 + from tensorcast.artifact_runtime.locator import ArtifactLocator # noqa: F401 + from tensorcast.artifact_runtime.policy import RuntimePolicy # noqa: F401 + from tensorcast.artifact_runtime.publication.actions import ( # noqa: F401 + RuntimeReplicaPublicationSettings, + project_runtime_replica_publication_state, + publish_runtime_replica, + retire_runtime_replica, + runtime_replica_publication_settings, + ) + from tensorcast.artifact_runtime.reload import ( # noqa: F401 + merge_runtime_reload_extra_config, + normalize_runtime_reload_request_payload, + reload_runtime_attachment, + ) + from tensorcast.artifact_runtime.state import ( # noqa: F401 + ModelAttributeNames, + ModelAttributeRuntimeState, + OneShotRuntimeHook, + ) + from tensorcast.artifact_runtime.view import ( # noqa: F401 + BindingValueRefProjection, + RuntimeEndpointProjection, + RuntimeWorkerView, + SourceSelectionProjection, + WeightVersionProjection, + aggregate_runtime_view_outputs, + ) + from tensorcast.retained_realization import ( # noqa: F401 + RetainedRealizationClaim, + RetainedRealizationExpectedDigests, + parse_retained_realization_claim, + retained_realization_claim_extra_from_handoff, + retained_realization_claim_extra_json_from_handoff, + retained_realization_claim_mode, + retained_realization_trusted_reservation_bytes, + ) from tensorcast.startup import ( # noqa: F401 PortConfig, init, @@ -664,14 +806,13 @@ def __dir__() -> list[str]: __all__ = [ "__version__", - "serving", + "runtime", "init", "is_initialized", "shutdown", "Store", "StoreOptions", "RegisteredArtifact", - "RegisteredServingPublication", "ArtifactError", "ArtifactFuture", "BindingReservationCapability", @@ -689,8 +830,9 @@ def __dir__() -> list[str]: "RegisterArtifactOptions", "GetArtifactOptions", "PrefetchRetentionPolicy", - "PrefetchedServingBinding", - "PrefetchedServingBindingSet", + "PrefetchHandoff", + "PrefetchHandoffMemberFailure", + "PrefetchHandoffSet", "calculate_tensor_device_offsets", "build_indices_from_safetensors", "binding_realization_plan_to_proto", @@ -698,6 +840,8 @@ def __dir__() -> list[str]: "CollectiveLoadGroup", "GroupRealization", "GroupVersionSetRef", + "RealizationTarget", + "RealizationTargetSet", "ExecutionDiagnostics", "BindingUpdateEpoch", "HashBackend", @@ -726,20 +870,21 @@ def __dir__() -> list[str]: "resolve_public_disk_source", "Artifact", "ArtifactDescriptor", + "ArtifactRealizationHandle", + "ArtifactRealizationReport", + "ArtifactRealizationSpec", "PublicDiskSourceHandle", "SourceBoundCapability", - "ServingBindingMemberRef", - "ServingBindingReadiness", - "ServingBindingResolvedLayout", - "ServingBindingResolvedSpecCacheEntry", - "ServingBindingSetTarget", - "ServingBindingSourceKind", - "ServingBindingSourceMemberRef", - "ServingBindingSourceRef", - "ServingBindingSourceReuseDecision", - "ServingBindingSourceReuseMode", - "ServingBindingTarget", - "ServingPublicationSubject", + "RuntimeBindingMemberRef", + "RuntimeBindingReadiness", + "RuntimeBindingResolvedLayout", + "RuntimeRealizationSpecCacheEntry", + "RuntimeBindingSourceKind", + "RuntimeBindingSourceMemberRef", + "RuntimeBindingSourceRef", + "RuntimeBindingSourceReuseDecision", + "RuntimeBindingSourceReuseMode", + "RuntimeTopologyRef", "store", "register", "register_async", @@ -755,4 +900,50 @@ def __dir__() -> list[str]: "unregister_vram_region", "deregister_artifact", "BuildConfigMismatchError", + "RuntimeAttachment", + "RuntimeAdmissionDecision", + "RuntimeAdmissionPolicy", + "RuntimeAdmissionRequest", + "RuntimeArtifactBuildIntent", + "RuntimeArtifactManifest", + "RuntimeArtifactPolicy", + "RuntimeArtifactPolicyInput", + "RuntimeBindingState", + "RuntimeHostCapabilities", + "RuntimePlacement", + "RuntimeProfile", + "RuntimeRequestContext", + "ArtifactLocator", + "RuntimeArtifactLocator", + "RuntimePolicy", + "RuntimeRealizationReport", + "RuntimeStartPlanError", + "RuntimeTensorView", + "TensorCastRuntimeConfig", + "ModelAttributeNames", + "ModelAttributeRuntimeState", + "OneShotRuntimeHook", + "BindingValueRefProjection", + "RuntimeEndpointProjection", + "RuntimeWorkerView", + "SourceSelectionProjection", + "WeightVersionProjection", + "aggregate_runtime_view_outputs", + "RuntimeReplicaPublicationSettings", + "coerce_runtime_artifact_policy", + "RetainedRealizationClaim", + "RetainedRealizationExpectedDigests", + "parse_retained_realization_claim", + "project_runtime_replica_publication_state", + "publish_runtime_replica", + "reload_runtime_attachment", + "merge_runtime_reload_extra_config", + "normalize_runtime_reload_request_payload", + "plan_runtime_start", + "retained_realization_claim_extra_from_handoff", + "retained_realization_claim_extra_json_from_handoff", + "retained_realization_claim_mode", + "retained_realization_trusted_reservation_bytes", + "retire_runtime_replica", + "runtime_replica_publication_settings", ] diff --git a/tensorcast/api/__init__.py b/tensorcast/api/__init__.py index 852fe0f6..052f585f 100644 --- a/tensorcast/api/__init__.py +++ b/tensorcast/api/__init__.py @@ -69,6 +69,9 @@ Artifact, ArtifactError, ArtifactFuture, + ArtifactRealizationHandle, + ArtifactRealizationReport, + ArtifactRealizationSpec, AssemblyAttemptRef, AssemblyCloseoutContract, AssemblyReadinessPolicy, @@ -91,33 +94,36 @@ HashLocation, IdentityMintStrategy, PartialSealResult, - PrefetchedServingBinding, - PrefetchedServingBindingSet, + PrefetchHandoff, + PrefetchHandoffMemberFailure, + PrefetchHandoffSet, PrefetchRetentionPolicy, - PreparedServingRegistration, + PreparedRuntimeArtifactRegistration, PublicDiskSourceHandle, PublishedModelVersion, + RealizationTarget, + RealizationTargetSet, RegisteredArtifact, - RegisteredServingPublication, + RegisteredRuntimeArtifactPublication, RepresentationPublishContract, RepresentationPublishSpec, - ServingAdmissionFacts, - ServingArtifactManifest, - ServingBindingMemberRef, - ServingBindingReadiness, - ServingBindingResolvedLayout, - ServingBindingResolvedSpecCacheEntry, - ServingBindingSetTarget, - ServingBindingSourceKind, - ServingBindingSourceMemberRef, - ServingBindingSourceRef, - ServingBindingSourceReuseDecision, - ServingBindingSourceReuseMode, - ServingBindingTarget, - ServingBuildIntent, - ServingPublicationSubject, - ServingRuntimePolicy, - ServingSupportLevel, + RuntimeAdmissionFacts, + RuntimeArtifactBuildIntent, + RuntimeArtifactManifest, + RuntimeArtifactPolicy, + RuntimeArtifactPolicyInput, + RuntimeBindingMemberRef, + RuntimeBindingReadiness, + RuntimeBindingResolvedLayout, + RuntimeBindingSourceKind, + RuntimeBindingSourceMemberRef, + RuntimeBindingSourceRef, + RuntimeBindingSourceReuseDecision, + RuntimeBindingSourceReuseMode, + RuntimePublicationSubject, + RuntimeRealizationSpecCacheEntry, + RuntimeSupportLevel, + RuntimeTopologyRef, SourceBoundCapability, Store, StoreOptions, @@ -129,10 +135,10 @@ build_pure_transform_publication_spec, build_pure_transform_transform_spec, build_representation_publish_requirements, + build_runtime_artifact_publication_bundle, + build_runtime_artifact_publication_bundle_from_registered_artifact, build_serving_manifest_ref, - build_serving_publication_bundle, - build_serving_publication_bundle_from_registered_artifact, - coerce_serving_runtime_policy, + coerce_runtime_artifact_policy, complete_binding_finalize_publication_from_binding, complete_canonical_representation_publish_attempt, complete_plan_repo_owned_representation_publish_attempt, @@ -142,17 +148,17 @@ complete_representation_publish_attempt, complete_structural_representation_publish_attempt, compute_pure_transform_representation_contract_hash, - compute_serving_tensor_schema_hash, - count_canonical_serving_tensors, + compute_runtime_artifact_tensor_schema_hash, + count_canonical_runtime_tensors, from_disk, import_from_disk, list_artifact_layouts, normalize_binding_realization_plan, parse_serving_manifest_ref, persist_artifact, - prepare_binding_finalize_serving_registration, - prepare_pure_transform_serving_registration, - prepare_serving_registration, + prepare_binding_finalize_runtime_registration, + prepare_pure_transform_runtime_registration, + prepare_runtime_artifact_registration, promote_mounted_source, realize_into_binding, register_pure_transform_publication, @@ -218,6 +224,9 @@ "BindingRealizationEntry", "BindingRealizationPlan", "Artifact", + "ArtifactRealizationHandle", + "ArtifactRealizationReport", + "ArtifactRealizationSpec", "CallContext", "CollectiveLoadGroup", "GroupRealization", @@ -234,38 +243,42 @@ "QosClass", "PartialSealResult", "PrefetchRetentionPolicy", - "PrefetchedServingBinding", - "PrefetchedServingBindingSet", + "PrefetchHandoff", + "PrefetchHandoffMemberFailure", + "PrefetchHandoffSet", "PublicDiskSourceHandle", - "PreparedServingRegistration", + "PreparedRuntimeArtifactRegistration", "PublishedModelVersion", + "RealizationTarget", + "RealizationTargetSet", "BindingUpdateEpoch", - "ServingBindingMemberRef", - "ServingBindingReadiness", - "ServingBindingResolvedLayout", - "ServingBindingResolvedSpecCacheEntry", - "ServingBindingSetTarget", - "ServingBindingSourceKind", - "ServingBindingSourceMemberRef", - "ServingBindingSourceRef", - "ServingBindingSourceReuseDecision", - "ServingBindingSourceReuseMode", - "ServingBindingTarget", + "RuntimeBindingMemberRef", + "RuntimeBindingReadiness", + "RuntimeBindingResolvedLayout", + "RuntimeRealizationSpecCacheEntry", + "RuntimeBindingSourceKind", + "RuntimeBindingSourceMemberRef", + "RuntimeBindingSourceRef", + "RuntimeBindingSourceReuseDecision", + "RuntimeBindingSourceReuseMode", + "RuntimeTopologyRef", "ExecutionDiagnostics", "HashBackend", "HashLocation", "IdentityMintStrategy", "RepresentationPublishSpec", - "RegisteredServingPublication", + "RegisteredRuntimeArtifactPublication", "RepresentationPublishContract", + "RuntimeArtifactBuildIntent", + "RuntimeArtifactManifest", + "RuntimeArtifactPolicy", + "RuntimeArtifactPolicyInput", "SourceBoundCapability", - "ServingPublicationSubject", - "ServingAdmissionFacts", - "ServingArtifactManifest", + "RuntimePublicationSubject", + "RuntimeAdmissionFacts", "SERVING_BUILD_DIGEST_VERSION", - "ServingBuildIntent", - "ServingRuntimePolicy", - "ServingSupportLevel", + "RuntimeSupportLevel", + "coerce_runtime_artifact_policy", "SpanAttributeValue", "TimeoutErrorDetails", "context", @@ -291,23 +304,22 @@ "build_binding_finalize_admission_facts", "binding_realization_plan_to_proto", "build_binding_finalize_publication_bundle", - "build_serving_publication_bundle", - "build_serving_publication_bundle_from_registered_artifact", + "build_runtime_artifact_publication_bundle", + "build_runtime_artifact_publication_bundle_from_registered_artifact", "build_pure_transform_publication_bundle", "build_pure_transform_publication_bundle_from_registered_artifact", "build_pure_transform_publication_spec", "build_representation_publish_requirements", "build_pure_transform_transform_spec", "build_serving_manifest_ref", - "coerce_serving_runtime_policy", "complete_binding_finalize_publication_from_binding", "complete_pure_transform_publication", "complete_pure_transform_publication_from_binding", "complete_canonical_representation_publish_attempt", "complete_plan_repo_owned_representation_publish_attempt", "compute_pure_transform_representation_contract_hash", - "compute_serving_tensor_schema_hash", - "count_canonical_serving_tensors", + "compute_runtime_artifact_tensor_schema_hash", + "count_canonical_runtime_tensors", "from_disk", "import_from_disk", "promote_mounted_source", @@ -315,9 +327,9 @@ "list_artifact_layouts", "parse_serving_manifest_ref", "realize_into_binding", - "prepare_binding_finalize_serving_registration", - "prepare_serving_registration", - "prepare_pure_transform_serving_registration", + "prepare_binding_finalize_runtime_registration", + "prepare_runtime_artifact_registration", + "prepare_pure_transform_runtime_registration", "persist_artifact", "register_pure_transform_publication", "resolve_public_disk_source", diff --git a/tensorcast/api/_config.py b/tensorcast/api/_config.py index 08098479..b13e4cb1 100644 --- a/tensorcast/api/_config.py +++ b/tensorcast/api/_config.py @@ -2,7 +2,6 @@ from __future__ import annotations -import os import threading from collections.abc import Mapping from enum import Enum @@ -26,7 +25,6 @@ # Global daemon address configuration _daemon_address_lock = threading.RLock() _global_daemon_address: str | None = None -_global_store_address = os.environ.get("TENSORCAST_GLOBAL_STORE", "127.0.0.1:8085") def set_daemon_address(address: str) -> None: @@ -56,15 +54,6 @@ def has_daemon_address() -> bool: return _global_daemon_address is not None -def set_global_store_address(address: str) -> None: - global _global_store_address - _global_store_address = address - - -def get_global_store_address() -> str: - return _global_store_address - - class PlanType(Enum): DRAM_STABLE = "dram_stable" VRAM_COALESCED = "vram_coalesced" @@ -729,9 +718,12 @@ class CollectivePolicyMode(str, Enum): def parse(value: object) -> "CollectivePolicyMode": if isinstance(value, CollectivePolicyMode): return value - normalized = ( - "require_collective" if value is None else str(value).strip().lower() - ) + if value is None or value == "": + raise ValueError( + "collective_policy must be explicit when parsed directly; " + "leave it unset for request normalization to choose the default." + ) + normalized = str(value).strip().lower() if normalized == "require_collective": return CollectivePolicyMode.REQUIRE_COLLECTIVE if normalized == "collective_first": @@ -893,8 +885,6 @@ def _normalize_wait_for_shared_disk_ms(cls, value: object) -> int: "policy_requires_persistence", "clear_daemon_address", "get_daemon_address", - "get_global_store_address", "has_daemon_address", "set_daemon_address", - "set_global_store_address", ] diff --git a/tensorcast/api/plan/__init__.py b/tensorcast/api/plan/__init__.py index e6203b04..180be953 100644 --- a/tensorcast/api/plan/__init__.py +++ b/tensorcast/api/plan/__init__.py @@ -23,7 +23,7 @@ ) from tensorcast.api.plan.targets import TargetSpec from tensorcast.api.plan.transforms import TransformSpec -from tensorcast.api.store.serving_builder import build_pure_transform_transform_spec +from tensorcast.api.store.publication_builder import build_pure_transform_transform_spec __all__ = [ "ARTIFACT_SET_CARRIER_INLINE", diff --git a/tensorcast/api/plan/plan.py b/tensorcast/api/plan/plan.py index 372261a7..46699fba 100644 --- a/tensorcast/api/plan/plan.py +++ b/tensorcast/api/plan/plan.py @@ -39,7 +39,7 @@ PrefetchedReplica, _decode_capability_token, ) -from tensorcast.api.store.serving_builder import build_pure_transform_transform_spec +from tensorcast.api.store.publication_builder import build_pure_transform_transform_spec from tensorcast.api.store.view_composer import compute_view_id from tensorcast.engine_adapter.artifact_api import ( BatchOutcome, @@ -59,20 +59,20 @@ AssemblyContractFamily, AssemblyReadinessPolicy, AssemblyRequirementSetRef, - PrefetchedServingBinding, - PrefetchedServingBindingSet, + PrefetchHandoff, + PrefetchHandoffSet, PrefetchRetentionPolicy, + RealizationTarget, + RealizationTargetSet, RepresentationPublishContract, RepresentationPublishSpec, - ServingArtifactManifest, - ServingBindingReadiness, - ServingBindingSetTarget, - ServingBindingTarget, + RuntimeArtifactManifest, + RuntimeBindingReadiness, ) if TYPE_CHECKING: from tensorcast.api.store import Store - from tensorcast.types import ServingBuildIntent + from tensorcast.types import RuntimeArtifactBuildIntent T = TypeVar("T") ArtifactActionResult = ( @@ -223,8 +223,8 @@ class _PrefetchAction: artifact: Artifact device: str | int | None device_id: int - target: ServingBindingTarget | ServingBindingSetTarget | None = None - readiness: ServingBindingReadiness = "serving_local_ready" + target: RealizationTarget | RealizationTargetSet | None = None + readiness: RuntimeBindingReadiness = "runtime_local_ready" retention: PrefetchRetentionPolicy | None = None @@ -426,7 +426,7 @@ def _artifact_result_from_proto( serving_manifest_ref=str( result.pure_transform_publication.serving_manifest_ref ), - serving_manifest=ServingArtifactManifest.from_bytes( + serving_manifest=RuntimeArtifactManifest.from_bytes( bytes(result.pure_transform_publication.serving_manifest_bytes) ), serving_manifest_bytes=bytes( @@ -761,13 +761,11 @@ def prefetch( art: Artifact, *, device: str | int | None = None, - target: ServingBindingTarget | ServingBindingSetTarget | None = None, - readiness: ServingBindingReadiness = "serving_local_ready", + target: RealizationTarget | RealizationTargetSet | None = None, + readiness: RuntimeBindingReadiness = "runtime_local_ready", retention: PrefetchRetentionPolicy | None = None, depends_on: Sequence[PlanStepRef[Any]] | None = None, - ) -> PlanStepRef[ - PrefetchedReplica | PrefetchedServingBinding | PrefetchedServingBindingSet - ]: + ) -> PlanStepRef[PrefetchedReplica | PrefetchHandoff | PrefetchHandoffSet]: if target is not None and device is not None: raise ArtifactError( "prefetch target and device are mutually exclusive", @@ -1012,7 +1010,7 @@ def transform_register_pure_transform( self, art: Artifact, *, - build_intent: "ServingBuildIntent", + build_intent: "RuntimeArtifactBuildIntent", contract_family: str | None = None, out_key: str, transform_name: str = "identity.v1", @@ -1329,7 +1327,7 @@ def to_spec(self) -> plan_pb2.PlanSpec: _fill_selection_proto(selection, prefetch_action.selection) prefetch_action.device_id = int(step.action.device_id) if step.action.target is not None: - if isinstance(step.action.target, ServingBindingTarget): + if isinstance(step.action.target, RealizationTarget): prefetch_action.serving_binding_target.CopyFrom( step.action.target.to_proto() ) diff --git a/tensorcast/api/store/README.md b/tensorcast/api/store/README.md index f40ca8d9..bdea7c53 100644 --- a/tensorcast/api/store/README.md +++ b/tensorcast/api/store/README.md @@ -45,8 +45,12 @@ managing clients manually. metadata-first mounted-source path for same-daemon loading. Successful calls return a lazy `Artifact` seeded from `ResolvePublicDiskSource` metadata, usually with primary `artifact_id = msa1:...`, without hashing payload bytes - during metadata resolution. Use `show_progress=True` or call - `import_from_disk(...)` explicitly when you need streamed daemon import. + during metadata resolution. The returned mounted-source artifact keeps the + daemon-attested source handle for direct + `Artifact.realize(ArtifactRealizationSpec.model_runtime(...), + runtime_host=...)` startup through framework runtime host capabilities. Use + `show_progress=True` or call `import_from_disk(...)` explicitly when you need + streamed daemon import. - `tensorcast.import_from_disk(path)` / `Store.import_from_disk(path)` keep the explicit daemon import contract via `ImportArtifactFromPath` / `ImportArtifactFromPathStream`. This path returns `mi2:` and remains the @@ -162,15 +166,15 @@ Design and execution details: `../../../docs/designs/0077-unified-reference-only `serving_manifest_ref`. - Phase 1 currently supports the reserved manifest-tensor carrier `tensor:__tensorcast_meta__.manifest_json`. - - `ServingArtifactManifest` now self-describes its phase-1 carrier through + - `RuntimeArtifactManifest` now self-describes its phase-1 carrier through `serving_manifest_ref`, and the typed serving-lineage models can derive a strict runtime gate: `RepresentationPublishContract.to_runtime_policy()`, - `ServingArtifactManifest.to_runtime_policy()`, and - `PublishedModelVersion.require_serving_runtime_policy()`. + `RuntimeArtifactManifest.to_runtime_policy()`, and + `PublishedModelVersion.require_runtime_artifact_policy()`. - The repo-owned serving-lineage carriers now also expose explicit phase-1 build identity fields: - `ServingArtifactManifest.serving_build_digest_version` and + `RuntimeArtifactManifest.serving_build_digest_version` and `RepresentationPublishContract.serving_build_digest_version`. Runtime policy gates on `serving_manifest_ref`, `representation_contract_hash`, and `serving_build_digest`. @@ -185,12 +189,12 @@ Design and execution details: `../../../docs/designs/0077-unified-reference-only spec directly and forward it into `start_assembly_attempt(...)` through the typed `representation_publish_spec` daemon ingress instead of re-authoring the generic closeout shell at each call site. - When the spec carries optional `ServingAdmissionFacts`, TensorCast validates + When the spec carries optional `RuntimeAdmissionFacts`, TensorCast validates the supplied finalize classification, same-binding proof, and support level for consistency without inferring missing integration-private rollout state. - `BINDING_FINALIZE` publication is same-binding-only. Use `Store.complete_binding_finalize_publication_from_binding(...)` after the - serving binding current value has been realized, finalized, and sealed. + runtime binding current value has been realized, finalized, and sealed. The resulting spec must carry a binding-value publication subject and `same_binding_fast_path_validated=True`. - Tensor-entry `BINDING_FINALIZE` publication helpers have been removed. @@ -257,8 +261,8 @@ Design and execution details: `../../../docs/designs/0077-unified-reference-only `transform_register` path now also prepares the reserved manifest tensor before registration, so the resulting serving artifact can already carry `tensor:__tensorcast_meta__.manifest_json`. - - For steady-state serving bind or swap, pass - `serving_runtime_policy=...` to `artifact.bind(...)`, + - For steady-state runtime bind or swap, pass + `runtime_artifact_policy=...` to `artifact.bind(...)`, `artifact.bind_into(...)`, or `binding.swap(...)`. This keeps generic artifact load permissive while giving serving runtime an explicit strict gate. When the policy is present, the daemon requires a @@ -267,12 +271,12 @@ Design and execution details: `../../../docs/designs/0077-unified-reference-only artifact is accepted into the serving path. If you pass a full `RepresentationPublishSpec` instead of a plain runtime policy, TensorCast also requires - `ServingSupportLevel.RUNTIME_BIND_SWAP_READY` when caller-supplied + `RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY` when caller-supplied admission facts are present. - The same runtime-ready gate now also applies to serving-key activation on typed `representation_publish` specs: a spec carrying `serving_version_key` must be admitted at - `ServingSupportLevel.RUNTIME_BIND_SWAP_READY`. + `RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY`. - `Store.seal_assembly(assembly_id, publish_canonical=True)` seals an assembly into a stable MI2 identity and returns the bound descriptor. @@ -399,19 +403,19 @@ binding.swap("model:v2") `lease_mode=NO_LEASE` so it does not create PID-bound UseLeases and does not mint IPC handle leases. Prefetch is supported for both GPU VRAM (`"cuda:0"`/`0`) and daemon-owned host DRAM (`"cpu"`/`"dram"`/`-1`). Handle-exporting APIs remain PID/lease-bound and are separate from daemon-owned warm replicas. -- Retained serving prefetch lowers through the unified realization facade: +- Retained realization prefetch lowers through the unified realization facade: `ArtifactRealizationSpec.retained_binding(...)` for one retained binding and `ArtifactRealizationSpec.target_set(...)` for TP/group target sets. - `artifact.prefetch(target=ServingBindingTarget(...))` and - `artifact.prefetch(target=ServingBindingSetTarget(...))` remain ergonomic + `artifact.prefetch(target=RealizationTarget(...))` and + `artifact.prefetch(target=RealizationTargetSet(...))` remain ergonomic wrappers, but target sets must use the target-set realization path so group admission, strategy, lifecycle, resource-envelope, and report state all carry `target_kind="target_set"`. Ordinary `device=` prefetch behavior is - unchanged. Serving targets require runtime-provided resolved layout/index + unchanged. Runtime targets require runtime-provided resolved layout/index metadata before daemon allocation; unresolved layouts fail closed before GPU memory is reserved. The daemon keeps serving prefetch behind `daemon_config.serving_prefetch.enabled` and returns a typed - `PrefetchedServingBinding` / `PrefetchedServingBindingSet` result once the + `PrefetchHandoff` / `PrefetchHandoffSet` result once the retained binding materialization path is enabled. - Prefetch idempotency derives a stable action fingerprint from selection identity (`artifact_id`, `logical_layout_hash`, `selection_hash`) and target placement (daemon + device/tier). `selection_hash` is computed via diff --git a/tensorcast/api/store/__init__.py b/tensorcast/api/store/__init__.py index db7d3f48..f32013f7 100644 --- a/tensorcast/api/store/__init__.py +++ b/tensorcast/api/store/__init__.py @@ -86,6 +86,24 @@ OwnedBindingSlot, restore_owned_binding_tensors, ) +from tensorcast.api.store.publication_builder import ( + PreparedRuntimeArtifactRegistration, + RegisteredRuntimeArtifactPublication, + build_binding_finalize_admission_facts, + build_binding_finalize_publication_bundle, + build_pure_transform_publication_bundle, + build_pure_transform_publication_bundle_from_registered_artifact, + build_pure_transform_publication_spec, + build_pure_transform_transform_spec, + build_runtime_artifact_publication_bundle, + build_runtime_artifact_publication_bundle_from_registered_artifact, + compute_pure_transform_representation_contract_hash, + compute_runtime_artifact_tensor_schema_hash, + count_canonical_runtime_tensors, + prepare_binding_finalize_runtime_registration, + prepare_pure_transform_runtime_registration, + prepare_runtime_artifact_registration, +) from tensorcast.api.store.realization_kernel import ( ArtifactRealizationHandle, ArtifactRealizationReport, @@ -156,11 +174,11 @@ from tensorcast.api.store.runtime import ( get_context as get_runtime_context, ) -from tensorcast.api.store.serving_binding_reference_consumer import ( +from tensorcast.api.store.runtime_realization_reference_consumer import ( REFERENCE_RUNTIME, - ReferenceServingAcquireResult, - ReferenceServingResolvedSpec, - ReferenceServingTensorSpec, + ReferenceRuntimeAcquireResult, + ReferenceRuntimeResolvedSpec, + ReferenceRuntimeTensorSpec, acquire_reference_binding, build_reference_resolved_spec, build_reference_target_layout, @@ -169,39 +187,21 @@ prefetch_reference_binding_set, release_reference_acquire, target_from_reference_cache_record, - unpack_prefetched_serving_binding, - unpack_prefetched_serving_binding_set, + unpack_prefetch_handoff, + unpack_prefetch_handoff_set, write_reference_resolved_spec_cache_entry, ) -from tensorcast.api.store.serving_binding_spec_cache import ( - ServingBindingSpecCacheGroupIndex, - ServingBindingSpecCacheRecord, +from tensorcast.api.store.runtime_realization_spec_cache import ( + RuntimeRealizationSpecCacheGroupIndex, + RuntimeRealizationSpecCacheRecord, canonical_json_bytes, read_matching_resolved_spec_cache_entry, read_resolved_spec_cache_entry, read_resolved_spec_cache_group_index, - serving_binding_spec_cache_root, + runtime_realization_spec_cache_root, write_resolved_spec_cache_entry, write_resolved_spec_cache_group_index, ) -from tensorcast.api.store.serving_builder import ( - PreparedServingRegistration, - RegisteredServingPublication, - build_binding_finalize_admission_facts, - build_binding_finalize_publication_bundle, - build_pure_transform_publication_bundle, - build_pure_transform_publication_bundle_from_registered_artifact, - build_pure_transform_publication_spec, - build_pure_transform_transform_spec, - build_serving_publication_bundle, - build_serving_publication_bundle_from_registered_artifact, - compute_pure_transform_representation_contract_hash, - compute_serving_tensor_schema_hash, - count_canonical_serving_tensors, - prepare_binding_finalize_serving_registration, - prepare_pure_transform_serving_registration, - prepare_serving_registration, -) from tensorcast.api.store.target_region_lifecycle import ( register_store_target_regions_for_realization, ) @@ -255,38 +255,40 @@ IdentityMintStrategy, LocalRegionHandle, PartialSealResult, - PrefetchedServingBinding, - PrefetchedServingBindingSet, + PrefetchHandoff, + PrefetchHandoffMemberFailure, + PrefetchHandoffSet, PrefetchRetentionPolicy, PublicDiskSourceHandle, PublishedModelVersion, + RealizationTarget, + RealizationTargetSet, RegionMemoryKind, RepresentationPublishContract, RepresentationPublishSpec, + RuntimeAdmissionFacts, + RuntimeArtifactBuildIntent, + RuntimeArtifactManifest, + RuntimeArtifactPolicy, + RuntimeArtifactPolicyInput, + RuntimeBindingMemberRef, + RuntimeBindingReadiness, + RuntimeBindingResolvedLayout, + RuntimeBindingSourceKind, + RuntimeBindingSourceMemberRef, + RuntimeBindingSourceRef, + RuntimeBindingSourceReuseDecision, + RuntimeBindingSourceReuseMode, + RuntimePublicationSubject, + RuntimeRealizationSpecCacheEntry, + RuntimeSupportLevel, + RuntimeTopologyRef, SealAssemblyResult, - ServingAdmissionFacts, - ServingArtifactManifest, - ServingBindingMemberRef, - ServingBindingReadiness, - ServingBindingResolvedLayout, - ServingBindingResolvedSpecCacheEntry, - ServingBindingSetTarget, - ServingBindingSourceKind, - ServingBindingSourceMemberRef, - ServingBindingSourceRef, - ServingBindingSourceReuseDecision, - ServingBindingSourceReuseMode, - ServingBindingTarget, - ServingBuildIntent, - ServingPublicationSubject, - ServingRuntimePolicy, - ServingRuntimePolicyInput, - ServingSupportLevel, SourceBoundCapability, SourceBoundPlanDiagnostics, VramRegionHandle, build_serving_manifest_ref, - coerce_serving_runtime_policy, + coerce_runtime_artifact_policy, parse_serving_manifest_ref, ) from tensorcast.types import ( @@ -875,36 +877,29 @@ def _decode_published_model_version_from_response( total_size=int(artifact.total_size), id_kind=_artifact_id_kind_from_proto(artifact.id_kind, artifact.artifact_id), ) + serving_descriptor = None + if payload.HasField("serving_artifact") and payload.serving_artifact.artifact_id: + serving_artifact = payload.serving_artifact + serving_descriptor = TypedArtifactDescriptor( + artifact_id=str(serving_artifact.artifact_id), + index_multihash=str(serving_artifact.index_multihash or "") or None, + data_multihash=str(serving_artifact.data_multihash or "") or None, + schema_version=str(serving_artifact.schema_version or "") or None, + encoding=str(serving_artifact.encoding or "") or None, + total_size=int(serving_artifact.total_size), + id_kind=_artifact_id_kind_from_proto( + serving_artifact.id_kind, + serving_artifact.artifact_id, + ), + ) return PublishedModelVersion( assembly_id=assembly_id, source_artifact_id=descriptor.artifact_id, source_descriptor=descriptor, serving_artifact_id=( - str(payload.serving_artifact.artifact_id) - if payload.HasField("serving_artifact") - and payload.serving_artifact.artifact_id - else None - ), - serving_descriptor=( - TypedArtifactDescriptor( - artifact_id=str(payload.serving_artifact.artifact_id), - index_multihash=str(payload.serving_artifact.index_multihash or "") - or None, - data_multihash=str(payload.serving_artifact.data_multihash or "") - or None, - schema_version=str(payload.serving_artifact.schema_version or "") - or None, - encoding=str(payload.serving_artifact.encoding or "") or None, - total_size=int(payload.serving_artifact.total_size), - id_kind=_artifact_id_kind_from_proto( - payload.serving_artifact.id_kind, - payload.serving_artifact.artifact_id, - ), - ) - if payload.HasField("serving_artifact") - and payload.serving_artifact.artifact_id - else None + serving_descriptor.artifact_id if serving_descriptor is not None else None ), + serving_descriptor=serving_descriptor, source_version_key=str(payload.source_version_key or "") or None, serving_version_key=str(payload.serving_version_key or "") or None, representation_contract_hash=( @@ -1789,7 +1784,7 @@ def register_pure_transform_publication( self, tensors: TensorDict, *, - build_intent: ServingBuildIntent, + build_intent: RuntimeArtifactBuildIntent, source_artifact: Artifact | RegisteredArtifact | CanonicalIndex @@ -1804,8 +1799,8 @@ def register_pure_transform_publication( serving_version_key: str | None = None, logical_topology_json: str | None = None, serving_manifest_ref: str | None = None, - ) -> RegisteredServingPublication: - prepared = prepare_pure_transform_serving_registration( + ) -> RegisteredRuntimeArtifactPublication: + prepared = prepare_pure_transform_runtime_registration( build_intent=build_intent, source_artifact=source_artifact, tensors=tensors, @@ -1829,7 +1824,7 @@ def register_pure_transform_publication( logical_topology_json=logical_topology_json, serving_manifest_ref=prepared.serving_manifest_ref, ) - return RegisteredServingPublication( + return RegisteredRuntimeArtifactPublication( registered_artifact=registered_artifact, prepared_registration=prepared, publication=publication, @@ -1839,7 +1834,7 @@ def complete_pure_transform_publication( self, tensors: TensorDict, *, - build_intent: ServingBuildIntent, + build_intent: RuntimeArtifactBuildIntent, source_artifact: Artifact | RegisteredArtifact | CanonicalIndex @@ -1909,7 +1904,7 @@ def complete_pure_transform_publication_from_binding( self, binding: Binding | SealedBindingValue, *, - build_intent: ServingBuildIntent, + build_intent: RuntimeArtifactBuildIntent, source_artifact: Artifact | RegisteredArtifact | CanonicalIndex @@ -1933,7 +1928,7 @@ def complete_pure_transform_publication_from_binding( authoritative_canonical_index = _build_bound_publication_canonical_index( resolved_binding.layout ) - prepared = prepare_pure_transform_serving_registration( + prepared = prepare_pure_transform_runtime_registration( build_intent=build_intent, source_artifact=source_artifact, tensors=dict(resolved_binding.tensors), @@ -1969,7 +1964,7 @@ def complete_pure_transform_publication_from_binding( def _complete_registered_representation_publication( self, *, - publication: RegisteredServingPublication, + publication: RegisteredRuntimeArtifactPublication, contract_family: AssemblyContractFamily | str | None = None, source_artifact: Artifact | RegisteredArtifact @@ -2183,8 +2178,8 @@ def complete_binding_finalize_publication_from_binding( self, binding: Binding | SealedBindingValue, *, - build_intent: ServingBuildIntent, - admission_facts: ServingAdmissionFacts, + build_intent: RuntimeArtifactBuildIntent, + admission_facts: RuntimeAdmissionFacts, source_artifact: Artifact | RegisteredArtifact | CanonicalIndex @@ -2209,7 +2204,7 @@ def complete_binding_finalize_publication_from_binding( authoritative_canonical_index = _build_bound_publication_canonical_index( resolved_binding.layout ) - prepared = prepare_binding_finalize_serving_registration( + prepared = prepare_binding_finalize_runtime_registration( build_intent=build_intent, tensors=dict(resolved_binding.tensors), representation_contract_hash=representation_contract_hash, @@ -3078,6 +3073,7 @@ def _artifact_from_disk_metadata( event_name: str, resolution_mode: str, trusted_content_artifact_id: str | None = None, + source_subject: object | None = None, ) -> Artifact: if not artifact_id: raise ArtifactError( @@ -3133,6 +3129,7 @@ def _artifact_from_disk_metadata( canonical_index_bytes=canonical_index_bytes or None, canonical_index=canonical_index, generation=generation, + source_subject=source_subject, ) def import_from_disk( @@ -3365,6 +3362,7 @@ def from_disk( event_name="store.from_disk.summary", resolution_mode="attested_mounted_source", trusted_content_artifact_id=source.trusted_content_artifact_id, + source_subject=source, ) def resolve_public_disk_source( @@ -3896,7 +3894,7 @@ def register_piece( def register_pure_transform_publication( tensors: TensorDict, *, - build_intent: ServingBuildIntent, + build_intent: RuntimeArtifactBuildIntent, source_artifact: Artifact | RegisteredArtifact | CanonicalIndex @@ -3911,7 +3909,7 @@ def register_pure_transform_publication( serving_version_key: str | None = None, logical_topology_json: str | None = None, serving_manifest_ref: str | None = None, -) -> RegisteredServingPublication: +) -> RegisteredRuntimeArtifactPublication: return _coerce_store().register_pure_transform_publication( tensors, build_intent=build_intent, @@ -3931,7 +3929,7 @@ def register_pure_transform_publication( def complete_pure_transform_publication( tensors: TensorDict, *, - build_intent: ServingBuildIntent, + build_intent: RuntimeArtifactBuildIntent, source_artifact: Artifact | RegisteredArtifact | CanonicalIndex @@ -3982,7 +3980,7 @@ def complete_pure_transform_publication( def complete_pure_transform_publication_from_binding( binding: Binding | SealedBindingValue, *, - build_intent: ServingBuildIntent, + build_intent: RuntimeArtifactBuildIntent, source_artifact: Artifact | RegisteredArtifact | CanonicalIndex @@ -4019,8 +4017,8 @@ def complete_pure_transform_publication_from_binding( def complete_binding_finalize_publication_from_binding( binding: Binding | SealedBindingValue, *, - build_intent: ServingBuildIntent, - admission_facts: ServingAdmissionFacts, + build_intent: RuntimeArtifactBuildIntent, + admission_facts: RuntimeAdmissionFacts, source_artifact: Artifact | RegisteredArtifact | CanonicalIndex @@ -4645,12 +4643,13 @@ def realize_into_binding( "RealizationTargetSetReport", "RealizationTargetPlan", "PrefetchRetentionPolicy", + "PrefetchHandoff", + "PrefetchHandoffMemberFailure", + "PrefetchHandoffSet", "PrefetchedReplica", - "PrefetchedServingBinding", - "PrefetchedServingBindingSet", "PartialSealResult", "PublicDiskSourceHandle", - "PreparedServingRegistration", + "PreparedRuntimeArtifactRegistration", "PublishedModelVersion", "RegionMemoryKind", "ExecutionDiagnostics", @@ -4658,37 +4657,41 @@ def realize_into_binding( "HashBackend", "HashLocation", "IdentityMintStrategy", - "RegisteredServingPublication", + "RegisteredRuntimeArtifactPublication", "RegisteredArtifact", + "RealizationTarget", + "RealizationTargetSet", "RepresentationAdmissionPlan", "RepresentationPublishContract", "RepresentationPublishSpec", + "RuntimeArtifactBuildIntent", + "RuntimeArtifactManifest", + "RuntimeArtifactPolicy", + "RuntimeArtifactPolicyInput", "SourceBoundCapability", - "ServingPublicationSubject", + "RuntimePublicationSubject", "ReplicaInfo", "RetryPolicy", "ResolvedArtifactSelection", "SERVING_MANIFEST_TENSOR_NAME", "SealedBindingValue", "StagedBindingValue", - "ServingArtifactManifest", - "ServingAdmissionFacts", - "ServingBindingMemberRef", - "ServingBindingReadiness", - "ServingBindingResolvedLayout", - "ServingBindingResolvedSpecCacheEntry", - "ServingBindingSetTarget", - "ServingBindingSourceKind", - "ServingBindingSourceMemberRef", - "ServingBindingSourceRef", - "ServingBindingSourceReuseDecision", - "ServingBindingSourceReuseMode", - "ServingBindingTarget", - "ServingBindingSpecCacheRecord", + "RuntimeAdmissionFacts", + "RuntimeBindingMemberRef", + "RuntimeBindingReadiness", + "RuntimeBindingResolvedLayout", + "RuntimeRealizationSpecCacheEntry", + "RuntimeBindingSourceKind", + "RuntimeBindingSourceMemberRef", + "RuntimeBindingSourceRef", + "RuntimeBindingSourceReuseDecision", + "RuntimeBindingSourceReuseMode", + "RuntimeTopologyRef", + "RuntimeRealizationSpecCacheRecord", "REFERENCE_RUNTIME", - "ReferenceServingAcquireResult", - "ReferenceServingResolvedSpec", - "ReferenceServingTensorSpec", + "ReferenceRuntimeAcquireResult", + "ReferenceRuntimeResolvedSpec", + "ReferenceRuntimeTensorSpec", "artifact_realization_profile_payload", "artifact_realization_report_to_dict", "acquire_reference_binding", @@ -4721,19 +4724,17 @@ def realize_into_binding( "read_matching_resolved_spec_cache_entry", "read_resolved_spec_cache_entry", "release_reference_acquire", - "serving_binding_spec_cache_root", + "runtime_realization_spec_cache_root", "target_set_report_for_retained_bindings", "target_set_strategy_plan_for", "target_from_reference_cache_record", - "unpack_prefetched_serving_binding", - "unpack_prefetched_serving_binding_set", + "unpack_prefetch_handoff", + "unpack_prefetch_handoff_set", "write_resolved_spec_cache_entry", "write_reference_resolved_spec_cache_entry", - "ServingBuildIntent", "SERVING_BUILD_DIGEST_VERSION", - "ServingRuntimePolicy", - "ServingRuntimePolicyInput", - "ServingSupportLevel", + "RuntimeSupportLevel", + "coerce_runtime_artifact_policy", "StoreCapabilities", "Store", "StoreOptions", @@ -4751,8 +4752,8 @@ def realize_into_binding( "build_binding_finalize_admission_facts", "build_binding_finalize_publication_bundle", "build_owned_layout", - "build_serving_publication_bundle", - "build_serving_publication_bundle_from_registered_artifact", + "build_runtime_artifact_publication_bundle", + "build_runtime_artifact_publication_bundle_from_registered_artifact", "build_pure_transform_publication_bundle", "build_pure_transform_publication_bundle_from_registered_artifact", "build_pure_transform_publication_spec", @@ -4768,22 +4769,21 @@ def realize_into_binding( "complete_structural_representation_publish_attempt", "compute_pure_transform_representation_contract_hash", "build_serving_manifest_ref", - "coerce_serving_runtime_policy", - "compute_serving_tensor_schema_hash", - "count_canonical_serving_tensors", - "prepare_binding_finalize_serving_registration", - "prepare_serving_registration", - "prepare_pure_transform_serving_registration", + "compute_runtime_artifact_tensor_schema_hash", + "count_canonical_runtime_tensors", + "prepare_binding_finalize_runtime_registration", + "prepare_runtime_artifact_registration", + "prepare_pure_transform_runtime_registration", "parse_serving_manifest_ref", "TargetTensors", "PersistenceStatusResult", "PersistenceShardStatus", - "ServingBindingSpecCacheGroupIndex", - "ServingBindingSpecCacheRecord", + "RuntimeRealizationSpecCacheGroupIndex", + "RuntimeRealizationSpecCacheRecord", "REFERENCE_RUNTIME", - "ReferenceServingAcquireResult", - "ReferenceServingResolvedSpec", - "ReferenceServingTensorSpec", + "ReferenceRuntimeAcquireResult", + "ReferenceRuntimeResolvedSpec", + "ReferenceRuntimeTensorSpec", "acquire_reference_binding", "build_reference_resolved_spec", "build_reference_target_layout", @@ -4795,10 +4795,10 @@ def realize_into_binding( "read_resolved_spec_cache_entry", "read_resolved_spec_cache_group_index", "release_reference_acquire", - "serving_binding_spec_cache_root", + "runtime_realization_spec_cache_root", "target_from_reference_cache_record", - "unpack_prefetched_serving_binding", - "unpack_prefetched_serving_binding_set", + "unpack_prefetch_handoff", + "unpack_prefetch_handoff_set", "write_resolved_spec_cache_entry", "write_resolved_spec_cache_group_index", "write_reference_resolved_spec_cache_entry", diff --git a/tensorcast/api/store/artifact.py b/tensorcast/api/store/artifact.py index 4e418496..9b69127f 100644 --- a/tensorcast/api/store/artifact.py +++ b/tensorcast/api/store/artifact.py @@ -103,6 +103,7 @@ retained_binding_lifecycle_plan_for, retained_binding_reports_for, risk_labels_for_target, + selection_report_fields, strategy_plan_for_execution, ) from tensorcast.api.store.retry import ( @@ -134,21 +135,27 @@ from tensorcast.proto.operation.v1 import operation_pb2 from tensorcast.types import ( GroupRealizationAcquireRef, - PrefetchedServingBinding, - PrefetchedServingBindingSet, + PrefetchHandoff, + PrefetchHandoffSet, PrefetchRetentionPolicy, - ServingBindingReadiness, - ServingBindingSetTarget, - ServingBindingSourceReuseDecision, - ServingBindingTarget, - ServingRuntimePolicy, - ServingRuntimePolicyInput, - coerce_serving_runtime_policy, + RealizationTarget, + RealizationTargetSet, + RuntimeArtifactPolicy, + RuntimeArtifactPolicyInput, + RuntimeBindingReadiness, + RuntimeBindingSourceReuseDecision, + coerce_runtime_artifact_policy, ) logger = logging.getLogger(__name__) +def _resolve_runtime_artifact_policy( + runtime_artifact_policy: RuntimeArtifactPolicyInput | None, +) -> RuntimeArtifactPolicy | None: + return coerce_runtime_artifact_policy(runtime_artifact_policy) + + def _has_validated_byte_artifact_profile(artifact_id: str) -> bool: if not is_byte_artifact_id(artifact_id): return False @@ -249,22 +256,22 @@ class PrefetchedReplica: report: ArtifactRealizationReport | None = None -ServingPrefetchResult = PrefetchedServingBinding | PrefetchedServingBindingSet +RuntimePrefetchResult = PrefetchHandoff | PrefetchHandoffSet def _parse_serving_prefetch_result_any( result: Any, -) -> ServingPrefetchResult: +) -> RuntimePrefetchResult: binding_result = operation_pb2.PrefetchServingBindingResult() if result.Is(binding_result.DESCRIPTOR): result.Unpack(binding_result) - return PrefetchedServingBinding.from_proto(binding_result) + return PrefetchHandoff.from_proto(binding_result) set_result = operation_pb2.PrefetchServingBindingSetResult() if result.Is(set_result.DESCRIPTOR): result.Unpack(set_result) - return PrefetchedServingBindingSet.from_proto(set_result) + return PrefetchHandoffSet.from_proto(set_result) raise ArtifactError( - "Serving prefetch operation did not return a typed serving binding result", + "Runtime prefetch operation did not return a typed prefetch handoff result", status_code="DATA_LOSS", retryable=False, ) @@ -272,13 +279,13 @@ def _parse_serving_prefetch_result_any( def _serving_prefetch_result_from_operation_response( response: operation_pb2.GetOperationResponse, -) -> ServingPrefetchResult: +) -> RuntimePrefetchResult: if response.status.HasField("result"): return _parse_serving_prefetch_result_any(response.status.result) if response.HasField("snapshot"): return _parse_serving_prefetch_result_any(response.snapshot) raise ArtifactError( - "Serving prefetch operation completed without result metadata", + "Runtime prefetch operation completed without result metadata", status_code="DATA_LOSS", retryable=False, ) @@ -293,11 +300,11 @@ def _digest_hex(label: str, payload: bytes) -> str: def _serving_target_layout_digest( - target: ServingBindingTarget | ServingBindingSetTarget, + target: RealizationTarget | RealizationTargetSet, *, target_bytes: bytes, ) -> str: - if isinstance(target, ServingBindingTarget): + if isinstance(target, RealizationTarget): return str(target.resolved_layout.target_layout_hash or "") or _digest_hex( "serving-target-layout", target_bytes, @@ -306,11 +313,11 @@ def _serving_target_layout_digest( def _serving_target_copy_plan_digest( - target: ServingBindingTarget | ServingBindingSetTarget, + target: RealizationTarget | RealizationTargetSet, *, target_bytes: bytes, ) -> str: - if isinstance(target, ServingBindingTarget): + if isinstance(target, RealizationTarget): digest = str(target.resolved_layout.spec_digest or "") if digest: return digest @@ -320,16 +327,43 @@ def _serving_target_copy_plan_digest( return _digest_hex("serving-target-copy-plan", target_bytes) +def _binding_layout_target_digest(binding_layout_id: str) -> str | None: + normalized = str(binding_layout_id or "").strip() + if not normalized: + return None + return f"binding-layout:{normalized}" + + +def _mapped_target_specs_from_layout( + layout: object | None, +) -> tuple[dict[str, object], ...]: + if layout is None: + return () + specs = getattr(layout, "dst_specs", None) + if specs is None: + return () + return tuple( + { + "name": str(getattr(spec, "name", "")), + "dtype": str(getattr(spec, "dtype", "")), + "shape": tuple(int(v) for v in getattr(spec, "shape", ())), + "stride": tuple(int(v) for v in getattr(spec, "stride", ())), + "logical_length": int(getattr(spec, "logical_length", 0) or 0), + } + for spec in specs + ) + + def _with_retained_binding_report( - result: ServingPrefetchResult, + result: RuntimePrefetchResult, *, selection: ResolvedArtifactSelection, - target: ServingBindingTarget | ServingBindingSetTarget, + target: RealizationTarget | RealizationTargetSet, target_bytes: bytes, operation_id: str, -) -> ServingPrefetchResult: +) -> RuntimePrefetchResult: retained_bindings = retained_binding_reports_for(result) - is_target_set = isinstance(result, PrefetchedServingBindingSet) + is_target_set = isinstance(result, PrefetchHandoffSet) target_plan = RealizationTargetPlan( kind="target_set" if is_target_set else "retained_binding", target_layout_digest=_serving_target_layout_digest( @@ -373,6 +407,7 @@ def _with_retained_binding_report( artifact_profile=selection.artifact_profile, authority_scope=selection.authority_scope, generation_hint=selection.generation_hint, + **selection_report_fields(selection), envelope=envelope, target_plan=target_plan, strategy_plan=strategy_plan_for_execution(envelope=envelope), @@ -425,13 +460,13 @@ def _operation_status_from_proto( def _serving_target_source_reuse( - target: ServingBindingTarget | ServingBindingSetTarget, -) -> ServingBindingSourceReuseDecision: - if isinstance(target, ServingBindingTarget): + target: RealizationTarget | RealizationTargetSet, +) -> RuntimeBindingSourceReuseDecision: + if isinstance(target, RealizationTarget): return target.resolved_layout.source_reuse if not target.members: raise ArtifactError( - "Serving binding set members must use one source reuse decision", + "Realization target set members must use one source reuse decision", status_code="FAILED_PRECONDITION", retryable=False, ) @@ -441,7 +476,7 @@ def _serving_target_source_reuse( for member in target.members[1:] ): raise ArtifactError( - "Serving binding set members must use one source reuse decision", + "Realization target set members must use one source reuse decision", status_code="FAILED_PRECONDITION", retryable=False, ) @@ -756,9 +791,11 @@ def __init__( canonical_index_bytes: bytes | None = None, canonical_index: CanonicalIndex | None = None, generation: int | None = None, + key_generation: int | None = None, view_spec: ViewSpecBuildResult | None = None, view_metadata: ViewMetadataCache | None = None, view_depth: int = 0, + source_subject: Any | None = None, ) -> None: identifiers = [bool(artifact_id), bool(key)] if sum(identifiers) == 0: @@ -775,6 +812,7 @@ def __init__( self._view_spec = view_spec self._view_metadata = view_metadata self._view_depth = max(0, int(view_depth)) + self._source_subject = source_subject effective_index = ( view_metadata.selected_index if view_metadata is not None @@ -785,6 +823,7 @@ def __init__( entry.name: _meta_from_entry(entry) for entry in effective_index.entries } self._generation = generation + self._key_generation = key_generation self._store_ref = store_ref self._lock = threading.RLock() self._released = False @@ -861,11 +900,174 @@ def _realization_handle( emit_artifact_realization_profile_event(handle.report) return handle + def _model_runtime_request_facts( + self, + spec: ArtifactRealizationSpec, + runtime_context: Any | None, + ) -> tuple[ArtifactRealizationSpec, Any]: + from tensorcast.artifact_runtime.request_facts import ( + ModelRuntimeRequestFactsError, + resolve_model_runtime_request_facts, + ) + + try: + facts = resolve_model_runtime_request_facts( + spec=spec, + runtime_context=runtime_context, + ) + except ModelRuntimeRequestFactsError as exc: + raise ArtifactError( + str(exc), + status_code="INVALID_ARGUMENT", + retryable=False, + ) from exc + return cast(ArtifactRealizationSpec, facts.spec), facts.context + + def _store_bound_runtime_artifact_resolver(self) -> Any: + from tensorcast.artifact_runtime.artifact.resolver import ( + RuntimeArtifactResolver, + ) + from tensorcast.types import ( + SERVING_MANIFEST_TENSOR_NAME, + RuntimeArtifactManifest, + ) + + store, _runtime, _pipeline = self._require_components() + return RuntimeArtifactResolver( + manifest_tensor_name=SERVING_MANIFEST_TENSOR_NAME, + schema_version=int( + RuntimeArtifactManifest.model_fields["schema_version"].default + ), + open_artifact_fn=lambda ref: store.artifact(ref=ref), + ) + + def _execute_model_runtime_realization( + self, + spec: ArtifactRealizationSpec, + *, + runtime_host: Any | None, + runtime_context: Any | None, + runtime_resolver: Any | None, + profile_sink: Any | None, + ) -> ArtifactRealizationHandle: + if runtime_host is None: + raise ArtifactError( + "model_runtime realization requires runtime_host", + status_code="INVALID_ARGUMENT", + retryable=False, + ) + + from tensorcast.artifact_runtime.lifecycle import ArtifactRuntimeIntegration + + artifact_id = self._ensure_identified() + resolved_spec, context = self._model_runtime_request_facts( + spec, + runtime_context, + ) + resolved_runtime_resolver = runtime_resolver + if resolved_runtime_resolver is None and not artifact_id.startswith("msa1:"): + resolved_runtime_resolver = self._store_bound_runtime_artifact_resolver() + integration = ArtifactRuntimeIntegration( + resolver=resolved_runtime_resolver, + profile_sink=profile_sink, + host=runtime_host, + ) + from tensorcast.artifact_runtime.request_facts import ( + ModelRuntimeRequestFactsError, + ) + + try: + if artifact_id.startswith("msa1:"): + if self._source_subject is None: + raise ArtifactError( + "mounted-source model_runtime realization requires a " + "daemon-attested source handle; create the artifact with " + "tensorcast.from_disk(...)", + status_code="FAILED_PRECONDITION", + retryable=False, + ) + source_selection = self._resolve_model_runtime_source_selection( + artifact_id + ) + attachment = integration.realize_mounted_source_model_runtime( + artifact_ref=artifact_id, + source_subject=self._source_subject, + spec=resolved_spec, + context=context, + source_selection=source_selection, + materialization=resolved_spec.options, + ) + else: + source_selection = self._resolve_model_runtime_source_selection( + artifact_id + ) + attachment = integration.realize_model_runtime( + artifact_ref=artifact_id, + spec=resolved_spec, + context=context, + source_selection=source_selection, + runtime_artifact_policy=resolved_spec.runtime_artifact_policy, + materialization=resolved_spec.options, + ) + except ModelRuntimeRequestFactsError as exc: + raise ArtifactError( + str(exc), + status_code="INVALID_ARGUMENT", + retryable=False, + ) from exc + handle = getattr(attachment.state, "model_runtime_handle", None) + if not isinstance(handle, ArtifactRealizationHandle): + raise ArtifactError( + "model_runtime realization completed without a realization handle", + status_code="INTERNAL", + retryable=False, + ) + return handle + + def _resolve_model_runtime_source_selection( + self, + artifact_id: str, + ) -> ResolvedArtifactSelection | None: + if not artifact_id.startswith("msa1:"): + if ( + self._canonical_index_bytes is None + and not self._model_runtime_can_resolve_artifact_index() + and self._view_spec is None + and self._view_metadata is None + ): + return None + return self._resolve_realization_selection() + canonical_index_bytes = self._canonical_index_bytes + if canonical_index_bytes is None and self._source_subject is not None: + canonical_index_bytes = bytes( + getattr(self._source_subject, "canonical_index_bytes", None) or b"" + ) + return resolve_artifact_selection( + artifact_id=artifact_id, + canonical_index_bytes=canonical_index_bytes, + generation_hint=( + self._key_generation + if self._key_generation is not None + else self._generation + ), + ) + + def _model_runtime_can_resolve_artifact_index(self) -> bool: + store = self._store_ref() if self._store_ref is not None else None + if store is None or bool(getattr(store, "closed", False)): + return False + runtime = getattr(store, "_runtime", None) + return callable(getattr(runtime, "ensure_client", None)) + def realize( self, spec: ArtifactRealizationSpec, *, ctx: CallContext | None = None, + runtime_host: Any | None = None, + runtime_context: Any | None = None, + runtime_resolver: Any | None = None, + profile_sink: Any | None = None, ) -> ArtifactRealizationHandle: if spec.target_kind == "tensor_dict": if spec.device is None: @@ -900,6 +1102,7 @@ def realize( artifact_profile=selection.artifact_profile, authority_scope=selection.authority_scope, generation_hint=selection.generation_hint, + **selection_report_fields(selection), envelope=envelope, target_plan=target_plan, strategy_plan=strategy_plan_for_execution( @@ -975,6 +1178,7 @@ def realize( artifact_profile=selection.artifact_profile, authority_scope=selection.authority_scope, generation_hint=selection.generation_hint, + **selection_report_fields(selection), envelope=envelope, target_plan=target_plan, strategy_plan=strategy_plan_for_execution( @@ -1014,17 +1218,48 @@ def realize( options=cast("GetArtifactOptions | None", spec.options), capacity_bytes=spec.capacity_bytes, publish=spec.publish, - serving_runtime_policy=cast( - ServingRuntimePolicyInput | None, - spec.serving_runtime_policy, + runtime_artifact_policy=cast( + RuntimeArtifactPolicyInput | None, + spec.runtime_artifact_policy, ), ctx=ctx, ) - binding_layout_id = str(getattr(binding, "binding_layout_id", "") or "") + binding_layout = getattr(binding, "layout", None) + layout_binding_id = str( + getattr(binding_layout, "binding_layout_id", "") or "" + ) + binding_layout_id = str( + getattr(binding, "binding_layout_id", "") or layout_binding_id + ) + mapped_view_id = None + copy_plan_digest = None + if spec.mapping is not None: + target_layout = getattr(binding_layout, "target_layout", None) + mapped_view_id = str(getattr(target_layout, "view_id", "") or "") + if not mapped_view_id: + target_specs = _mapped_target_specs_from_layout(binding_layout) + if target_specs: + mapped_view_id = compute_mapped_view_id_from_specs( + canonical_index_bytes=selection.canonical_index_bytes, + source_view_id=selection.view_id, + plan=normalize_copy_plan(cast(CopyPlan, spec.mapping)), + target_specs=target_specs, + ) + if not mapped_view_id: + raise ArtifactError( + "mapped owned binding realization requires mapped target " + "layout identity", + status_code="FAILED_PRECONDITION", + retryable=False, + ) + copy_plan_digest = mapped_view_id target_plan = RealizationTargetPlan( kind="binding_owned", device=spec.device, + target_layout_digest=_binding_layout_target_digest(binding_layout_id), binding_layout_id=binding_layout_id, + mapped_view_id=mapped_view_id, + copy_plan_digest=copy_plan_digest, ) envelope = envelope_for_binding( binding, @@ -1065,9 +1300,9 @@ def realize( packing=spec.packing, options=cast("GetArtifactOptions | None", spec.options), publish=spec.publish, - serving_runtime_policy=cast( - ServingRuntimePolicyInput | None, - spec.serving_runtime_policy, + runtime_artifact_policy=cast( + RuntimeArtifactPolicyInput | None, + spec.runtime_artifact_policy, ), ctx=ctx, ) @@ -1153,7 +1388,11 @@ def realize( selection = resolve_artifact_selection( artifact_id=source_artifact_id, canonical_index_bytes=canonical_index_bytes, - generation_hint=self._generation, + generation_hint=( + self._key_generation + if self._key_generation is not None + else self._generation + ), ) target_plan = RealizationTargetPlan( kind="mounted_source", @@ -1182,10 +1421,12 @@ def realize( promote_fn=lambda: promoted, ) if spec.target_kind == "model_runtime": - raise ArtifactError( - "model_runtime realization is not lowered through Artifact.realize yet", - status_code="UNIMPLEMENTED", - retryable=False, + return self._execute_model_runtime_realization( + spec, + runtime_host=runtime_host, + runtime_context=runtime_context, + runtime_resolver=runtime_resolver, + profile_sink=profile_sink, ) raise ArtifactError( f"Unsupported realization target kind: {spec.target_kind}", @@ -1198,7 +1439,7 @@ def realize_async( spec: ArtifactRealizationSpec, *, ctx: CallContext | None = None, - ) -> Operation[PrefetchedReplica] | Operation[ServingPrefetchResult]: + ) -> Operation[PrefetchedReplica] | Operation[RuntimePrefetchResult]: if spec.target_kind == "retained_replica": if spec.device is None: raise ArtifactError( @@ -1218,21 +1459,19 @@ def realize_async( status_code="INVALID_ARGUMENT", retryable=False, ) - if isinstance(spec.target, ServingBindingSetTarget): + if isinstance(spec.target, RealizationTargetSet): raise ArtifactError( - "ServingBindingSetTarget requires target_set realization", + "RealizationTargetSet requires target_set realization", status_code="INVALID_ARGUMENT", retryable=False, ) readiness = ( - cast(ServingBindingReadiness, spec.readiness) + cast(RuntimeBindingReadiness, spec.readiness) if spec.readiness is not None - else "serving_local_ready" + else "runtime_local_ready" ) return self._execute_prefetch( - target=cast( - ServingBindingTarget | ServingBindingSetTarget, spec.target - ), + target=cast(RealizationTarget | RealizationTargetSet, spec.target), readiness=readiness, retention=cast(PrefetchRetentionPolicy | None, spec.retention), ctx=ctx, @@ -1244,16 +1483,16 @@ def realize_async( status_code="INVALID_ARGUMENT", retryable=False, ) - if not isinstance(spec.target, ServingBindingSetTarget): + if not isinstance(spec.target, RealizationTargetSet): raise ArtifactError( - "target_set realization requires ServingBindingSetTarget", + "target_set realization requires RealizationTargetSet", status_code="INVALID_ARGUMENT", retryable=False, ) readiness = ( - cast(ServingBindingReadiness, spec.readiness) + cast(RuntimeBindingReadiness, spec.readiness) if spec.readiness is not None - else "serving_local_ready" + else "runtime_local_ready" ) return self._execute_prefetch( target=spec.target, @@ -1560,7 +1799,7 @@ def bind( options: GetArtifactOptions | None = None, capacity_bytes: int | None = None, publish: bool = False, - serving_runtime_policy: ServingRuntimePolicyInput | None = None, + runtime_artifact_policy: RuntimeArtifactPolicyInput | None = None, ctx: CallContext | None = None, ) -> Binding: handle = self.realize( @@ -1571,7 +1810,7 @@ def bind( options=options, capacity_bytes=capacity_bytes, publish=publish, - serving_runtime_policy=serving_runtime_policy, + runtime_artifact_policy=runtime_artifact_policy, ), ctx=ctx, ) @@ -1586,7 +1825,7 @@ def _execute_bind_owned( options: GetArtifactOptions | None = None, capacity_bytes: int | None = None, publish: bool = False, - serving_runtime_policy: ServingRuntimePolicyInput | None = None, + runtime_artifact_policy: RuntimeArtifactPolicyInput | None = None, ctx: CallContext | None = None, ) -> Binding: """Allocate daemon-owned target tensors, fill from this artifact, and return a Binding.""" @@ -1641,8 +1880,8 @@ def _execute_bind_owned( packing=packing, options=options, publish=publish, - serving_runtime_policy=coerce_serving_runtime_policy( - serving_runtime_policy + runtime_artifact_policy=_resolve_runtime_artifact_policy( + runtime_artifact_policy ), ctx=ctx, ) @@ -1655,7 +1894,7 @@ def bind_into( packing: str = "byte_space", options: GetArtifactOptions | None = None, publish: bool = False, - serving_runtime_policy: ServingRuntimePolicyInput | None = None, + runtime_artifact_policy: RuntimeArtifactPolicyInput | None = None, ctx: CallContext | None = None, ) -> Binding: handle = self.realize( @@ -1665,7 +1904,7 @@ def bind_into( packing=packing, options=options, publish=publish, - serving_runtime_policy=serving_runtime_policy, + runtime_artifact_policy=runtime_artifact_policy, ), ctx=ctx, ) @@ -1679,10 +1918,13 @@ def _execute_bind_into( packing: str = "byte_space", options: GetArtifactOptions | None = None, publish: bool = False, - serving_runtime_policy: ServingRuntimePolicyInput | None = None, + runtime_artifact_policy: RuntimeArtifactPolicyInput | None = None, ctx: CallContext | None = None, ) -> Binding: """Adopt user-owned CUDA tensors, fill once, and return a Binding.""" + resolved_runtime_artifact_policy = _resolve_runtime_artifact_policy( + runtime_artifact_policy + ) if mapping is not None: return self._bind_into_mapped( target_tensors=target_tensors, @@ -1690,9 +1932,7 @@ def _execute_bind_into( packing=packing, options=options, publish=publish, - serving_runtime_policy=coerce_serving_runtime_policy( - serving_runtime_policy - ), + runtime_artifact_policy=resolved_runtime_artifact_policy, ctx=ctx, ) store, runtime, pipeline = self._require_components() @@ -1823,9 +2063,7 @@ def _execute_bind_into( target_layout=region_layout.layout, device_uuid=device_uuid_for(device_id), source_policy=source_policy, - serving_runtime_policy=coerce_serving_runtime_policy( - serving_runtime_policy - ), + runtime_artifact_policy=resolved_runtime_artifact_policy, operation_id=operation_id, group_realization=ctx.group_realization if ctx is not None @@ -1930,7 +2168,7 @@ def _bind_into_mapped( packing: str, options: GetArtifactOptions | None, publish: bool, - serving_runtime_policy: ServingRuntimePolicy | None, + runtime_artifact_policy: RuntimeArtifactPolicy | None, ctx: CallContext | None, ) -> Binding: store, runtime, pipeline = self._require_components() @@ -2076,7 +2314,7 @@ def _bind_into_mapped( target_layout=region_layout.layout, device_uuid=device_uuid_for(device_id), source_policy=source_policy, - serving_runtime_policy=serving_runtime_policy, + runtime_artifact_policy=runtime_artifact_policy, copy_plan=copy_plan, dst_tensors=target_tensors, operation_id=operation_id, @@ -2216,7 +2454,7 @@ def _bind_owned( packing: str, options: GetArtifactOptions | None, publish: bool, - serving_runtime_policy: ServingRuntimePolicy | None, + runtime_artifact_policy: RuntimeArtifactPolicy | None, ctx: CallContext | None, ) -> Binding: store, runtime, _ = self._require_components() @@ -2374,7 +2612,7 @@ def _bind_owned( device_uuid=device_uuid_for(device_id), binding_layout_id=owner_layout.binding_layout_id, source_policy=source_policy, - serving_runtime_policy=serving_runtime_policy, + runtime_artifact_policy=runtime_artifact_policy, copy_plan=copy_plan_proto, dst_specs=dst_specs, operation_id=operation_id, @@ -2577,24 +2815,24 @@ async def tensor_dict_async( def _prefetch_serving_binding( self, *, - target: ServingBindingTarget | ServingBindingSetTarget, - readiness: ServingBindingReadiness, + target: RealizationTarget | RealizationTargetSet, + readiness: RuntimeBindingReadiness, retention: PrefetchRetentionPolicy | None, ctx: CallContext | None, - ) -> Operation[ServingPrefetchResult]: + ) -> Operation[RuntimePrefetchResult]: artifact_id = self._ensure_identified() _, runtime, _ = self._require_components() resolved_selection = self._resolve_realization_selection() selection = resolved_selection.proto source_reuse = _serving_target_source_reuse(target) - if source_reuse.mode in {"serving_transform_required", "unsupported"}: + if source_reuse.mode in {"runtime_transform_required", "unsupported"}: reason = source_reuse.reason or ( - "source-to-target serving transform requires a topology-scoped executor" - if source_reuse.mode == "serving_transform_required" - else "serving binding source is unsupported" + "source-to-target runtime transform requires a topology-scoped executor" + if source_reuse.mode == "runtime_transform_required" + else "runtime binding source is unsupported" ) raise ArtifactError( - f"serving binding prefetch rejected before allocation: {reason}", + f"runtime binding prefetch rejected before allocation: {reason}", status_code="FAILED_PRECONDITION", retryable=False, ) @@ -2663,7 +2901,7 @@ def _prefetch_serving_binding( def _result_factory( operation_response: operation_pb2.GetOperationResponse, - ) -> ServingPrefetchResult: + ) -> RuntimePrefetchResult: return _with_retained_binding_report( _serving_prefetch_result_from_operation_response(operation_response), selection=resolved_selection, @@ -2689,12 +2927,12 @@ def prefetch( self, *, device: torch.device | str | int | None = None, - target: ServingBindingTarget | ServingBindingSetTarget | None = None, - readiness: ServingBindingReadiness = "serving_local_ready", + target: RealizationTarget | RealizationTargetSet | None = None, + readiness: RuntimeBindingReadiness = "runtime_local_ready", retention: PrefetchRetentionPolicy | None = None, ctx: CallContext | None = None, options: GetArtifactOptions | None = None, - ) -> Operation[PrefetchedReplica] | Operation[ServingPrefetchResult]: + ) -> Operation[PrefetchedReplica] | Operation[RuntimePrefetchResult]: if target is not None: if device is not None: raise ArtifactError( @@ -2708,7 +2946,7 @@ def prefetch( readiness=readiness, retention=retention, ) - if isinstance(target, ServingBindingSetTarget) + if isinstance(target, RealizationTargetSet) else ArtifactRealizationSpec.retained_binding( target=target, readiness=readiness, @@ -2735,12 +2973,12 @@ def _execute_prefetch( self, *, device: torch.device | str | int | None = None, - target: ServingBindingTarget | ServingBindingSetTarget | None = None, - readiness: ServingBindingReadiness = "serving_local_ready", + target: RealizationTarget | RealizationTargetSet | None = None, + readiness: RuntimeBindingReadiness = "runtime_local_ready", retention: PrefetchRetentionPolicy | None = None, ctx: CallContext | None = None, options: GetArtifactOptions | None = None, - ) -> Operation[PrefetchedReplica] | Operation[ServingPrefetchResult]: + ) -> Operation[PrefetchedReplica] | Operation[RuntimePrefetchResult]: from tensorcast.api._config import GetArtifactOptions artifact_id = self._ensure_identified() @@ -2868,13 +3106,7 @@ def _execute_prefetch( retryable=False, ) - source: str | None = None - if payload.source == store_daemon_pb2.MATERIALIZATION_SOURCE_P2P: - source = "p2p" - elif payload.source == store_daemon_pb2.MATERIALIZATION_SOURCE_DISK: - source = "disk" - elif payload.source == store_daemon_pb2.MATERIALIZATION_SOURCE_LOCAL_REPLICA: - source = "local" + source = materialization_source_label(payload.source) target_plan = RealizationTargetPlan( kind="retained_replica", @@ -2896,6 +3128,7 @@ def _execute_prefetch( artifact_profile=resolved_selection.artifact_profile, authority_scope=resolved_selection.authority_scope, generation_hint=resolved_selection.generation_hint, + **selection_report_fields(resolved_selection), envelope=envelope, target_plan=target_plan, strategy_plan=strategy_plan_for_execution( @@ -3203,7 +3436,11 @@ def _resolve_realization_selection( tensor_names=requested_names, view_subset_hash=inputs.view_subset_hash, view_index_hint=view_index_hint, - generation_hint=self._generation, + generation_hint=( + self._key_generation + if self._key_generation is not None + else self._generation + ), allow_view_id_without_spec=bool( inputs.view_id_hint and not (view_spec_proto is not None and view_spec_proto.tensors) @@ -3419,9 +3656,19 @@ def _ensure_identified(self) -> str: if self._artifact_id: return self._artifact_id if self._key_hint: - artifact_id, _disk_path = runtime.resolve_key_mapping_cached( + resolved_mapping = runtime.resolve_key_mapping_cached( key=self._key_hint ) + if isinstance(resolved_mapping, tuple): + artifact_id = resolved_mapping[0] + generation = ( + int(resolved_mapping[2]) + if len(resolved_mapping) > 2 and resolved_mapping[2] is not None + else None + ) + else: + artifact_id = getattr(resolved_mapping, "artifact_id", None) + generation = getattr(resolved_mapping, "generation", None) if not artifact_id: raise ArtifactError( f"Artifact key '{self._key_hint}' is not mapped", @@ -3429,6 +3676,8 @@ def _ensure_identified(self) -> str: retryable=False, ) self._artifact_id = artifact_id + if self._key_generation is None and generation is not None: + self._key_generation = int(generation) return artifact_id raise ArtifactError( "Artifact handle missing identity", @@ -3737,6 +3986,7 @@ def _derive_view( view_spec=self._view_spec, view_metadata=self._view_metadata, view_depth=self._view_depth, + source_subject=self._source_subject, ) base_index = self._effective_index() entry_shapes = {entry.name: tuple(entry.shape) for entry in base_index.entries} @@ -3762,9 +4012,11 @@ def _derive_view( canonical_index_bytes=self._canonical_index_bytes, canonical_index=self._canonical_index, generation=self._generation, + key_generation=self._key_generation, view_spec=composed_spec, view_metadata=view_cache, view_depth=depth, + source_subject=self._source_subject, ) def _hydrate_from_cache_entry(self, entry: ArtifactCacheEntry) -> None: diff --git a/tensorcast/api/store/binding.py b/tensorcast/api/store/binding.py index 52e5bfc9..d53a0b52 100644 --- a/tensorcast/api/store/binding.py +++ b/tensorcast/api/store/binding.py @@ -33,9 +33,10 @@ GroupRealizationAcquireRef, PartialSealResult, PublicDiskSourceHandle, - ServingRuntimePolicyInput, + RuntimeArtifactPolicy, + RuntimeArtifactPolicyInput, SourceBoundPlanDiagnostics, - coerce_serving_runtime_policy, + coerce_runtime_artifact_policy, ) if TYPE_CHECKING: @@ -124,6 +125,12 @@ def _reject_live_swap_group_realization(ctx: CallContext | None) -> None: ) +def _resolve_runtime_artifact_policy( + runtime_artifact_policy: RuntimeArtifactPolicyInput | None, +) -> RuntimeArtifactPolicy | None: + return coerce_runtime_artifact_policy(runtime_artifact_policy) + + def _clone_view_spec( view_spec: common_pb2.ViewSpec | None, ) -> common_pb2.ViewSpec | None: @@ -682,7 +689,7 @@ def swap( *, options: "GetArtifactOptions | None" = None, publish: bool = False, - serving_runtime_policy: ServingRuntimePolicyInput | None = None, + runtime_artifact_policy: RuntimeArtifactPolicyInput | None = None, activate_key: str | None = None, expected_active_artifact_id: str | None = None, expected_active_generation: int | None = None, @@ -709,8 +716,8 @@ def swap( artifact, options=options, publish=publish, - serving_runtime_policy=coerce_serving_runtime_policy( - serving_runtime_policy + runtime_artifact_policy=_resolve_runtime_artifact_policy( + runtime_artifact_policy ), wait=wait, drain_timeout_s=drain_timeout_s, diff --git a/tensorcast/api/store/inplace_slot.py b/tensorcast/api/store/inplace_slot.py index bef86a23..e97679ed 100644 --- a/tensorcast/api/store/inplace_slot.py +++ b/tensorcast/api/store/inplace_slot.py @@ -47,7 +47,7 @@ ) from tensorcast.proto.common.v1 import common_pb2 from tensorcast.proto.daemon.v2 import store_daemon_pb2 -from tensorcast.types import ServingRuntimePolicy +from tensorcast.types import RuntimeArtifactPolicy logger = logging.getLogger(__name__) @@ -589,7 +589,7 @@ def swap( *, options: GetArtifactOptions | None = None, publish: bool = False, - serving_runtime_policy: ServingRuntimePolicy | None = None, + runtime_artifact_policy: RuntimeArtifactPolicy | None = None, wait: bool = True, drain_timeout_s: float | None = None, ctx: CallContext | None = None, @@ -690,7 +690,7 @@ def swap( target_layout=region_layout.layout, device_uuid=device_uuid_for(self._device_id), source_policy=source_policy, - serving_runtime_policy=serving_runtime_policy, + runtime_artifact_policy=runtime_artifact_policy, copy_plan=self._copy_plan, dst_tensors=self._tensors, operation_id=operation_id, @@ -822,7 +822,7 @@ def swap( target_layout=region_layout.layout, device_uuid=device_uuid_for(self._device_id), source_policy=source_policy, - serving_runtime_policy=serving_runtime_policy, + runtime_artifact_policy=runtime_artifact_policy, operation_id=operation_id, timeout_s=rpc_timeout_s if rpc_timeout_s is not None else 600.0, ) diff --git a/tensorcast/api/store/materialization.py b/tensorcast/api/store/materialization.py index 8a42084b..013dcddd 100644 --- a/tensorcast/api/store/materialization.py +++ b/tensorcast/api/store/materialization.py @@ -1993,7 +1993,7 @@ def _materialize_payload( if isinstance(resolved_mapping, tuple): resolved_artifact_id = resolved_mapping[0] else: - resolved_artifact_id = resolved_mapping + resolved_artifact_id = resolved_mapping.artifact_id except Exception: # noqa: BLE001 logger.exception( "store.materialize.key_mapping_prefetch_failed", diff --git a/tensorcast/api/store/owned_binding_slot.py b/tensorcast/api/store/owned_binding_slot.py index a019138c..052fb7e6 100644 --- a/tensorcast/api/store/owned_binding_slot.py +++ b/tensorcast/api/store/owned_binding_slot.py @@ -52,8 +52,8 @@ ExecutionDiagnostics, GroupRealizationAcquireRef, PublicDiskSourceHandle, + RuntimeArtifactPolicy, ServerConfig, - ServingRuntimePolicy, SourceBoundCapability, SourceBoundPlanDiagnostics, ) @@ -342,7 +342,7 @@ def _build_source_execution_contract( if policy_mode is None and explicit_collective_group is not None: from tensorcast.api._config import CollectivePolicyMode - policy_mode = CollectivePolicyMode.REQUIRE_COLLECTIVE + policy_mode = CollectivePolicyMode.COLLECTIVE_FIRST if str(getattr(policy_mode, "value", policy_mode) or "") == "disable_collective": if explicit_collective_group is not None: @@ -1262,7 +1262,7 @@ def swap( *, options: GetArtifactOptions | None = None, publish: bool = False, - serving_runtime_policy: ServingRuntimePolicy | None = None, + runtime_artifact_policy: RuntimeArtifactPolicy | None = None, wait: bool = True, drain_timeout_s: float | None = None, ctx: CallContext | None = None, @@ -1304,7 +1304,7 @@ def swap( source_policy=source_policy, execution_topology=execution_topology, collective_policy=collective_policy, - serving_runtime_policy=serving_runtime_policy, + runtime_artifact_policy=runtime_artifact_policy, operation_id=operation_id, timeout_s=rpc_timeout_s if rpc_timeout_s is not None else 600.0, ) diff --git a/tensorcast/api/store/serving_builder.py b/tensorcast/api/store/publication_builder.py similarity index 83% rename from tensorcast/api/store/serving_builder.py rename to tensorcast/api/store/publication_builder.py index 8404b020..3d09cca2 100644 --- a/tensorcast/api/store/serving_builder.py +++ b/tensorcast/api/store/publication_builder.py @@ -1,4 +1,9 @@ # Copyright (c) 2026, TensorCast Team. +"""Build serving-manifest publication payloads for runtime artifacts. + +The serving names in this module are intentional publication/manifest ABI +terms. Do not add runtime session, resolver, or source-authority behavior here. +""" from __future__ import annotations @@ -32,11 +37,11 @@ PureTransformPublicationSpec, RepresentationPublishContract, RepresentationPublishSpec, - ServingAdmissionFacts, - ServingArtifactManifest, - ServingBuildIntent, - ServingPublicationSubject, - ServingSupportLevel, + RuntimeAdmissionFacts, + RuntimeArtifactBuildIntent, + RuntimeArtifactManifest, + RuntimePublicationSubject, + RuntimeSupportLevel, build_serving_manifest_ref, ) @@ -47,28 +52,28 @@ @dataclass(frozen=True, slots=True) -class PreparedServingRegistration: +class PreparedRuntimeArtifactRegistration: tensors: dict[str, torch.Tensor] serving_manifest_ref: str manifest_tensor_name: str - serving_manifest: ServingArtifactManifest + serving_manifest: RuntimeArtifactManifest serving_manifest_bytes: bytes representation_contract_hash: str canonical_index: CanonicalIndex @dataclass(frozen=True, slots=True) -class ServingManifestCarrier: +class RuntimeArtifactManifestCarrier: serving_manifest_ref: str manifest_tensor_name: str - serving_manifest: ServingArtifactManifest + serving_manifest: RuntimeArtifactManifest serving_manifest_bytes: bytes @dataclass(frozen=True, slots=True) -class RegisteredServingPublication: +class RegisteredRuntimeArtifactPublication: registered_artifact: RegisteredArtifact - prepared_registration: PreparedServingRegistration + prepared_registration: PreparedRuntimeArtifactRegistration publication: RepresentationPublishSpec @@ -77,10 +82,6 @@ class RegisteredServingPublication: _PURE_TRANSFORM_TARGET_REALIZATION_KIND = "artifact_publishable" -def _canonical_json_bytes(payload: object) -> bytes: - return json.dumps(payload, sort_keys=True, separators=(",", ":")).encode("utf-8") - - def _pad_manifest_carrier_bytes(payload: bytes) -> bytes: align = int(DEFAULT_ALIGN) if align <= 1: @@ -97,7 +98,7 @@ def _manifest_byte_mismatch_message( *, existing_bytes: bytes, expected_bytes: bytes, - expected_manifest: ServingArtifactManifest, + expected_manifest: RuntimeArtifactManifest, ) -> str: def _sha(data: bytes) -> str: return hashlib.sha256(data).hexdigest() @@ -127,7 +128,7 @@ def _summarize_value(value: object) -> object: "first_diff_offset": _first_diff_offset(existing_bytes, expected_bytes), } try: - existing_manifest = ServingArtifactManifest.from_bytes(existing_bytes) + existing_manifest = RuntimeArtifactManifest.from_bytes(existing_bytes) except Exception as exc: # pragma: no cover - diagnostic path details["existing_manifest_error"] = type(exc).__name__ details["existing_manifest_error_message"] = str(exc) @@ -154,15 +155,15 @@ def _summarize_value(value: object) -> object: ) -def prepare_serving_manifest_carrier( +def prepare_runtime_artifact_manifest_carrier( *, - build_intent: ServingBuildIntent, + build_intent: RuntimeArtifactBuildIntent, canonical_index: CanonicalIndex, representation_contract_hash: str | None = None, logical_topology_json: str | None = None, serving_manifest_ref: str | None = None, topology_admission_digest: str | None = None, -) -> ServingManifestCarrier: +) -> RuntimeArtifactManifestCarrier: normalized_logical_topology = _normalize_logical_topology_payload( logical_topology_json ) @@ -177,14 +178,14 @@ def prepare_serving_manifest_carrier( helper_name="serving manifest carrier", ) ) - manifest = ServingArtifactManifest.from_build_intent( + manifest = RuntimeArtifactManifest.from_build_intent( intent=build_intent, representation_contract_hash=resolved_representation_contract_hash, - tensor_schema_hash=compute_serving_tensor_schema_hash( + tensor_schema_hash=compute_runtime_artifact_tensor_schema_hash( canonical_index, manifest_tensor_name=manifest_tensor_name, ), - canonical_tensor_count=count_canonical_serving_tensors( + canonical_tensor_count=count_canonical_runtime_tensors( canonical_index, manifest_tensor_name=manifest_tensor_name, ), @@ -198,7 +199,7 @@ def prepare_serving_manifest_carrier( ), topology_admission_digest=topology_admission_digest, ) - return ServingManifestCarrier( + return RuntimeArtifactManifestCarrier( serving_manifest_ref=resolved_manifest_ref, manifest_tensor_name=manifest_tensor_name, serving_manifest=manifest, @@ -206,32 +207,6 @@ def prepare_serving_manifest_carrier( ) -def _multibase_multihash_sha256(digest: bytes) -> str: - if len(digest) != 32: - raise ValueError("SHA256 digest must be 32 bytes") - import base64 - - multihash = b"\x12\x20" + digest - encoded = base64.b32encode(multihash).decode("ascii").lower().rstrip("=") - return f"b{encoded}" - - -def _hash_payload_to_multihash(payload: object) -> str: - import hashlib - - return _multibase_multihash_sha256( - hashlib.sha256(_canonical_json_bytes(payload)).digest() - ) - - -def _hash_versioned_payload_to_multihash(version: str, payload: object) -> str: - import hashlib - - serialized = _canonical_json_bytes(payload) - versioned_payload = version.encode("utf-8") + b"\n" + serialized - return _multibase_multihash_sha256(hashlib.sha256(versioned_payload).digest()) - - def _dtype_to_string(dtype: torch.dtype) -> str: return str(dtype) @@ -257,12 +232,12 @@ def _normalize_contract_family( return cast(AssemblyContractFamily, normalized) -def _coerce_serving_support_level( - value: ServingSupportLevel | str, -) -> ServingSupportLevel: - if isinstance(value, ServingSupportLevel): +def _coerce_runtime_support_level( + value: RuntimeSupportLevel | str, +) -> RuntimeSupportLevel: + if isinstance(value, RuntimeSupportLevel): return value - return ServingSupportLevel(str(value).strip()) + return RuntimeSupportLevel(str(value).strip()) def _resolve_manifest_tensor_name( @@ -293,7 +268,7 @@ def _resolve_manifest_tensor_name( def _resolve_explicit_representation_contract_hash( *, - build_intent: ServingBuildIntent, + build_intent: RuntimeArtifactBuildIntent, representation_contract_hash: str | None, helper_name: str, ) -> str: @@ -303,14 +278,14 @@ def _resolve_explicit_representation_contract_hash( if not resolved_representation_contract_hash: raise ArtifactError( f"{helper_name} requires an explicit representation_contract_hash in " - "the argument or ServingBuildIntent", + "the argument or RuntimeArtifactBuildIntent", status_code="FAILED_PRECONDITION", retryable=False, ) return resolved_representation_contract_hash -def _canonical_serving_entries( +def _canonical_runtime_entries( canonical_index: CanonicalIndex, *, manifest_tensor_name: str, @@ -330,7 +305,7 @@ def _repack_canonical_index( repacked_entries: list[CanonicalIndexEntry] = [] offset = 0 for entry in sorted( - _canonical_serving_entries( + _canonical_runtime_entries( canonical_index, manifest_tensor_name=manifest_tensor_name, ), @@ -355,40 +330,28 @@ def _repack_canonical_index( ) -def compute_serving_tensor_schema_hash( +def compute_runtime_artifact_tensor_schema_hash( canonical_index: CanonicalIndex, *, manifest_tensor_name: str = SERVING_MANIFEST_TENSOR_NAME, ) -> str: - tensors = [ - { - "name": str(entry.name), - "dtype": _dtype_to_string(entry.dtype), - "shape": [int(dim) for dim in entry.shape], - "stride": [int(dim) for dim in entry.stride], - "element_size": int(entry.dtype.itemsize), - } - for entry in sorted( - _canonical_serving_entries( - canonical_index, - manifest_tensor_name=manifest_tensor_name, - ), - key=lambda entry: str(entry.name), - ) - ] - return _hash_versioned_payload_to_multihash( - "tensorcast.representation.tensor_schema.v1", - {"tensors": tensors}, + from tensorcast.artifact_runtime.contract import ( + compute_canonical_runtime_tensor_schema_hash, + ) + + return compute_canonical_runtime_tensor_schema_hash( + canonical_index, + manifest_tensor_name=manifest_tensor_name, ) -def count_canonical_serving_tensors( +def count_canonical_runtime_tensors( canonical_index: CanonicalIndex, *, manifest_tensor_name: str = SERVING_MANIFEST_TENSOR_NAME, ) -> int: return len( - _canonical_serving_entries( + _canonical_runtime_entries( canonical_index, manifest_tensor_name=manifest_tensor_name, ) @@ -424,8 +387,8 @@ def _resolve_publication_subject( | "StoreArtifactDescriptor" | str | None = None, - publication_subject: ServingPublicationSubject | BindingValueRef | None = None, -) -> ServingPublicationSubject: + publication_subject: RuntimePublicationSubject | BindingValueRef | None = None, +) -> RuntimePublicationSubject: if (serving_artifact is None) == (publication_subject is None): raise ArtifactError( "publication bundle requires exactly one of serving_artifact or publication_subject", @@ -434,10 +397,10 @@ def _resolve_publication_subject( ) if publication_subject is not None: if isinstance(publication_subject, BindingValueRef): - return ServingPublicationSubject(binding_value_ref=publication_subject) + return RuntimePublicationSubject(binding_value_ref=publication_subject) return publication_subject assert serving_artifact is not None - return ServingPublicationSubject( + return RuntimePublicationSubject( serving_artifact_id=_artifact_id_from_input(serving_artifact) ) @@ -544,72 +507,16 @@ def _canonical_index_from_tensors( def _normalize_logical_topology_payload( logical_topology_json: str | None, ) -> dict[str, object] | None: - if logical_topology_json is None: - return None + from tensorcast.artifact_runtime.contract import normalize_logical_topology_payload + try: - payload = json.loads(logical_topology_json) - except Exception as exc: # noqa: BLE001 + return normalize_logical_topology_payload(logical_topology_json) + except ValueError as exc: raise ArtifactError( - "logical_topology_json must be valid JSON", + str(exc), status_code="INVALID_ARGUMENT", retryable=False, ) from exc - if not isinstance(payload, dict): - raise ArtifactError( - "logical_topology_json must encode an object", - status_code="INVALID_ARGUMENT", - retryable=False, - ) - family = str(payload.get("family", "")).strip() - version = str(payload.get("version", "")).strip() - raw_dimensions = payload.get("dimensions", []) - if not family: - raise ArtifactError( - "logical_topology_json.family must not be empty", - status_code="INVALID_ARGUMENT", - retryable=False, - ) - if not version: - raise ArtifactError( - "logical_topology_json.version must not be empty", - status_code="INVALID_ARGUMENT", - retryable=False, - ) - if not isinstance(raw_dimensions, list): - raise ArtifactError( - "logical_topology_json.dimensions must be a list", - status_code="INVALID_ARGUMENT", - retryable=False, - ) - dimensions: list[dict[str, int | str]] = [] - for raw_dimension in raw_dimensions: - if not isinstance(raw_dimension, dict): - raise ArtifactError( - "logical_topology_json.dimensions items must be objects", - status_code="INVALID_ARGUMENT", - retryable=False, - ) - name = str(raw_dimension.get("name", "")).strip() - if not name: - raise ArtifactError( - "logical_topology_json dimensions require non-empty name", - status_code="INVALID_ARGUMENT", - retryable=False, - ) - size = raw_dimension.get("size", None) - if not isinstance(size, int) or size <= 0: - raise ArtifactError( - "logical_topology_json dimensions require positive integer size", - status_code="INVALID_ARGUMENT", - retryable=False, - ) - dimensions.append({"name": name, "size": int(size)}) - dimensions.sort(key=lambda item: (str(item["name"]), int(item["size"]))) - return { - "family": family, - "version": version, - "dimensions": dimensions, - } def _tensor_spec_payload( @@ -674,14 +581,14 @@ def compute_pure_transform_representation_contract_hash( ) source_entries = { str(entry.name): entry - for entry in _canonical_serving_entries( + for entry in _canonical_runtime_entries( source_canonical_index, manifest_tensor_name=manifest_tensor_name, ) } target_entries = { str(entry.name): entry - for entry in _canonical_serving_entries( + for entry in _canonical_runtime_entries( target_canonical_index, manifest_tensor_name=manifest_tensor_name, ) @@ -696,7 +603,7 @@ def compute_pure_transform_representation_contract_hash( retryable=False, ) - tensor_schema_hash = compute_serving_tensor_schema_hash( + tensor_schema_hash = compute_runtime_artifact_tensor_schema_hash( target_canonical_index, manifest_tensor_name=manifest_tensor_name, ) @@ -760,31 +667,33 @@ def compute_pure_transform_representation_contract_hash( "segments": [], }, } - return _hash_versioned_payload_to_multihash( + from tensorcast.artifact_runtime.contract import hash_versioned_payload_to_multihash + + return hash_versioned_payload_to_multihash( "tensorcast.representation.contract.v1", payload, ) -def prepare_pure_transform_serving_registration( +def prepare_pure_transform_runtime_registration( *, - build_intent: ServingBuildIntent, + build_intent: RuntimeArtifactBuildIntent, source_artifact: RegisteredArtifact | CanonicalIndex | object | None = None, tensors: Mapping[str, torch.Tensor], logical_topology_json: str | None = None, serving_manifest_ref: str | None = None, topology_admission_digest: str | None = None, -) -> PreparedServingRegistration: +) -> PreparedRuntimeArtifactRegistration: if build_intent.builder_mode is not BuilderMode.PURE_TRANSFORM: raise ArtifactError( - "prepare_pure_transform_serving_registration requires ServingBuildIntent.builder_mode=PURE_TRANSFORM", + "prepare_pure_transform_runtime_registration requires RuntimeArtifactBuildIntent.builder_mode=PURE_TRANSFORM", status_code="FAILED_PRECONDITION", retryable=False, ) prepared_tensors = {str(name): tensor for name, tensor in dict(tensors).items()} resolved_manifest_ref, manifest_tensor_name = _resolve_manifest_tensor_name( serving_manifest_ref, - helper_name="PURE_TRANSFORM serving registration", + helper_name="PURE_TRANSFORM runtime artifact registration", ) manifest_tensor = prepared_tensors.pop(manifest_tensor_name, None) base_canonical_index = _canonical_index_from_tensors(prepared_tensors) @@ -797,7 +706,7 @@ def prepare_pure_transform_serving_registration( manifest_tensor_name=manifest_tensor_name, ) ) - carrier = prepare_serving_manifest_carrier( + carrier = prepare_runtime_artifact_manifest_carrier( build_intent=build_intent, canonical_index=base_canonical_index, representation_contract_hash=resolved_representation_contract_hash, @@ -832,7 +741,7 @@ def prepare_pure_transform_serving_registration( ) prepared_tensors[manifest_tensor_name] = manifest_tensor final_canonical_index = _canonical_index_from_tensors(prepared_tensors) - return PreparedServingRegistration( + return PreparedRuntimeArtifactRegistration( tensors=prepared_tensors, serving_manifest_ref=carrier.serving_manifest_ref, manifest_tensor_name=carrier.manifest_tensor_name, @@ -843,23 +752,23 @@ def prepare_pure_transform_serving_registration( ) -def prepare_serving_registration( +def prepare_runtime_artifact_registration( *, - build_intent: ServingBuildIntent, + build_intent: RuntimeArtifactBuildIntent, tensors: Mapping[str, torch.Tensor], representation_contract_hash: str | None = None, logical_topology_json: str | None = None, serving_manifest_ref: str | None = None, topology_admission_digest: str | None = None, -) -> PreparedServingRegistration: +) -> PreparedRuntimeArtifactRegistration: prepared_tensors = {str(name): tensor for name, tensor in dict(tensors).items()} resolved_manifest_ref, manifest_tensor_name = _resolve_manifest_tensor_name( serving_manifest_ref, - helper_name="serving registration", + helper_name="runtime artifact registration", ) manifest_tensor = prepared_tensors.pop(manifest_tensor_name, None) base_canonical_index = _canonical_index_from_tensors(prepared_tensors) - carrier = prepare_serving_manifest_carrier( + carrier = prepare_runtime_artifact_manifest_carrier( build_intent=build_intent, canonical_index=base_canonical_index, representation_contract_hash=representation_contract_hash, @@ -871,7 +780,7 @@ def prepare_serving_registration( _resolve_explicit_representation_contract_hash( build_intent=build_intent, representation_contract_hash=representation_contract_hash, - helper_name="serving registration", + helper_name="runtime artifact registration", ) ) manifest_device = ( @@ -901,7 +810,7 @@ def prepare_serving_registration( ) prepared_tensors[manifest_tensor_name] = manifest_tensor final_canonical_index = _canonical_index_from_tensors(prepared_tensors) - return PreparedServingRegistration( + return PreparedRuntimeArtifactRegistration( tensors=prepared_tensors, serving_manifest_ref=carrier.serving_manifest_ref, manifest_tensor_name=carrier.manifest_tensor_name, @@ -912,23 +821,23 @@ def prepare_serving_registration( ) -def prepare_binding_finalize_serving_registration( +def prepare_binding_finalize_runtime_registration( *, - build_intent: ServingBuildIntent, + build_intent: RuntimeArtifactBuildIntent, tensors: Mapping[str, torch.Tensor], representation_contract_hash: str | None = None, logical_topology_json: str | None = None, serving_manifest_ref: str | None = None, topology_admission_digest: str | None = None, -) -> PreparedServingRegistration: +) -> PreparedRuntimeArtifactRegistration: if build_intent.builder_mode is not BuilderMode.BINDING_FINALIZE: raise ArtifactError( - "prepare_binding_finalize_serving_registration requires " - "ServingBuildIntent.builder_mode=BINDING_FINALIZE", + "prepare_binding_finalize_runtime_registration requires " + "RuntimeArtifactBuildIntent.builder_mode=BINDING_FINALIZE", status_code="FAILED_PRECONDITION", retryable=False, ) - return prepare_serving_registration( + return prepare_runtime_artifact_registration( build_intent=build_intent, tensors=tensors, representation_contract_hash=representation_contract_hash, @@ -940,13 +849,13 @@ def prepare_binding_finalize_serving_registration( def build_binding_finalize_admission_facts( *, - support_level: ServingSupportLevel | str, + support_level: RuntimeSupportLevel | str, topology_admission_digest: str | None = None, same_binding_fast_path_validated: bool, -) -> ServingAdmissionFacts: - return ServingAdmissionFacts( +) -> RuntimeAdmissionFacts: + return RuntimeAdmissionFacts( finalize_class=FinalizeClass.REPRESENTATION_CHANGING, - support_level=_coerce_serving_support_level(support_level), + support_level=_coerce_runtime_support_level(support_level), topology_admission_digest=topology_admission_digest, same_binding_fast_path_validated=bool(same_binding_fast_path_validated), ) @@ -954,7 +863,7 @@ def build_binding_finalize_admission_facts( def build_pure_transform_publication_spec( *, - build_intent: ServingBuildIntent, + build_intent: RuntimeArtifactBuildIntent, contract_family: AssemblyContractFamily | str | None = None, source_version_key: str | None = None, serving_version_key: str | None = None, @@ -964,7 +873,7 @@ def build_pure_transform_publication_spec( requirements: AssemblyRequirementSetRef | None = None, readiness_policy: AssemblyReadinessPolicy | None = None, structural_view_ids: tuple[str, ...] = (), - admission_facts: ServingAdmissionFacts | None = None, + admission_facts: RuntimeAdmissionFacts | None = None, ) -> PureTransformPublicationSpec: return PureTransformPublicationSpec( build_intent=build_intent, @@ -990,7 +899,7 @@ def build_pure_transform_publication_spec( def build_pure_transform_transform_spec( *, transform_name: str, - build_intent: ServingBuildIntent, + build_intent: RuntimeArtifactBuildIntent, contract_family: AssemblyContractFamily | str | None = None, source_version_key: str | None = None, serving_version_key: str | None = None, @@ -1000,7 +909,7 @@ def build_pure_transform_transform_spec( requirements: AssemblyRequirementSetRef | None = None, readiness_policy: AssemblyReadinessPolicy | None = None, structural_view_ids: tuple[str, ...] = (), - admission_facts: ServingAdmissionFacts | None = None, + admission_facts: RuntimeAdmissionFacts | None = None, transform_args: dict[str, str | int] | None = None, layout_hash: str | None = None, ) -> TransformSpec: @@ -1032,7 +941,7 @@ def build_pure_transform_transform_spec( def build_pure_transform_publication_bundle( *, - build_intent: ServingBuildIntent, + build_intent: RuntimeArtifactBuildIntent, source_artifact: RegisteredArtifact | CanonicalIndex | object | None = None, contract_family: AssemblyContractFamily | str | None = None, serving_artifact: RegisteredArtifact @@ -1040,7 +949,7 @@ def build_pure_transform_publication_bundle( | "StoreArtifactDescriptor" | str | None = None, - publication_subject: ServingPublicationSubject | BindingValueRef | None = None, + publication_subject: RuntimePublicationSubject | BindingValueRef | None = None, canonical_index: CanonicalIndex, source_version_key: str | None = None, serving_version_key: str | None = None, @@ -1050,11 +959,11 @@ def build_pure_transform_publication_bundle( requirements: AssemblyRequirementSetRef | None = None, readiness_policy: AssemblyReadinessPolicy | None = None, structural_view_ids: tuple[str, ...] = (), - admission_facts: ServingAdmissionFacts | None = None, + admission_facts: RuntimeAdmissionFacts | None = None, ) -> RepresentationPublishSpec: if build_intent.builder_mode is not BuilderMode.PURE_TRANSFORM: raise ArtifactError( - "build_pure_transform_publication_bundle requires ServingBuildIntent.builder_mode=PURE_TRANSFORM", + "build_pure_transform_publication_bundle requires RuntimeArtifactBuildIntent.builder_mode=PURE_TRANSFORM", status_code="FAILED_PRECONDITION", retryable=False, ) @@ -1066,11 +975,11 @@ def build_pure_transform_publication_bundle( helper_name="PURE_TRANSFORM publication bundle", ) - tensor_schema_hash = compute_serving_tensor_schema_hash( + tensor_schema_hash = compute_runtime_artifact_tensor_schema_hash( canonical_index, manifest_tensor_name=manifest_tensor_name, ) - canonical_tensor_count = count_canonical_serving_tensors( + canonical_tensor_count = count_canonical_runtime_tensors( canonical_index, manifest_tensor_name=manifest_tensor_name, ) @@ -1093,7 +1002,7 @@ def build_pure_transform_publication_bundle( status_code="FAILED_PRECONDITION", retryable=False, ) - manifest = ServingArtifactManifest.from_build_intent( + manifest = RuntimeArtifactManifest.from_build_intent( intent=build_intent, representation_contract_hash=resolved_representation_contract_hash, tensor_schema_hash=tensor_schema_hash, @@ -1153,9 +1062,9 @@ def build_pure_transform_publication_bundle( ) -def build_serving_publication_bundle( +def build_runtime_artifact_publication_bundle( *, - build_intent: ServingBuildIntent, + build_intent: RuntimeArtifactBuildIntent, source_artifact: RegisteredArtifact | CanonicalIndex | object | None = None, contract_family: AssemblyContractFamily | str | None = None, serving_artifact: RegisteredArtifact @@ -1163,7 +1072,7 @@ def build_serving_publication_bundle( | "StoreArtifactDescriptor" | str | None = None, - publication_subject: ServingPublicationSubject | BindingValueRef | None = None, + publication_subject: RuntimePublicationSubject | BindingValueRef | None = None, canonical_index: CanonicalIndex, representation_contract_hash: str | None = None, source_version_key: str | None = None, @@ -1174,21 +1083,21 @@ def build_serving_publication_bundle( requirements: AssemblyRequirementSetRef | None = None, readiness_policy: AssemblyReadinessPolicy | None = None, structural_view_ids: tuple[str, ...] = (), - admission_facts: ServingAdmissionFacts | None = None, + admission_facts: RuntimeAdmissionFacts | None = None, ) -> RepresentationPublishSpec: normalized_logical_topology = _normalize_logical_topology_payload( logical_topology_json ) resolved_manifest_ref, manifest_tensor_name = _resolve_manifest_tensor_name( serving_manifest_ref, - helper_name="serving publication bundle", + helper_name="runtime artifact publication bundle", ) - tensor_schema_hash = compute_serving_tensor_schema_hash( + tensor_schema_hash = compute_runtime_artifact_tensor_schema_hash( canonical_index, manifest_tensor_name=manifest_tensor_name, ) - canonical_tensor_count = count_canonical_serving_tensors( + canonical_tensor_count = count_canonical_runtime_tensors( canonical_index, manifest_tensor_name=manifest_tensor_name, ) @@ -1196,10 +1105,10 @@ def build_serving_publication_bundle( _resolve_explicit_representation_contract_hash( build_intent=build_intent, representation_contract_hash=representation_contract_hash, - helper_name="serving publication bundle", + helper_name="runtime artifact publication bundle", ) ) - manifest = ServingArtifactManifest.from_build_intent( + manifest = RuntimeArtifactManifest.from_build_intent( intent=build_intent, representation_contract_hash=resolved_representation_contract_hash, tensor_schema_hash=tensor_schema_hash, @@ -1261,10 +1170,10 @@ def build_serving_publication_bundle( def build_binding_finalize_publication_bundle( *, - build_intent: ServingBuildIntent, + build_intent: RuntimeArtifactBuildIntent, source_artifact: RegisteredArtifact | CanonicalIndex | object | None = None, contract_family: AssemblyContractFamily | str | None = None, - publication_subject: ServingPublicationSubject | BindingValueRef | None = None, + publication_subject: RuntimePublicationSubject | BindingValueRef | None = None, canonical_index: CanonicalIndex, representation_contract_hash: str | None = None, source_version_key: str | None = None, @@ -1275,12 +1184,12 @@ def build_binding_finalize_publication_bundle( requirements: AssemblyRequirementSetRef | None = None, readiness_policy: AssemblyReadinessPolicy | None = None, structural_view_ids: tuple[str, ...] = (), - admission_facts: ServingAdmissionFacts | None = None, + admission_facts: RuntimeAdmissionFacts | None = None, ) -> RepresentationPublishSpec: if build_intent.builder_mode is not BuilderMode.BINDING_FINALIZE: raise ArtifactError( "build_binding_finalize_publication_bundle requires " - "ServingBuildIntent.builder_mode=BINDING_FINALIZE", + "RuntimeArtifactBuildIntent.builder_mode=BINDING_FINALIZE", status_code="FAILED_PRECONDITION", retryable=False, ) @@ -1297,7 +1206,7 @@ def build_binding_finalize_publication_bundle( retryable=False, ) if ( - isinstance(publication_subject, ServingPublicationSubject) + isinstance(publication_subject, RuntimePublicationSubject) and publication_subject.binding_value_ref is None ): raise ArtifactError( @@ -1305,7 +1214,7 @@ def build_binding_finalize_publication_bundle( status_code="FAILED_PRECONDITION", retryable=False, ) - return build_serving_publication_bundle( + return build_runtime_artifact_publication_bundle( build_intent=build_intent, source_artifact=source_artifact, contract_family=contract_family, @@ -1327,7 +1236,7 @@ def build_binding_finalize_publication_bundle( def build_pure_transform_publication_bundle_from_registered_artifact( *, - build_intent: ServingBuildIntent, + build_intent: RuntimeArtifactBuildIntent, source_artifact: RegisteredArtifact | CanonicalIndex | object | None = None, contract_family: AssemblyContractFamily | str | None = None, serving_artifact: RegisteredArtifact, @@ -1339,7 +1248,7 @@ def build_pure_transform_publication_bundle_from_registered_artifact( requirements: AssemblyRequirementSetRef | None = None, readiness_policy: AssemblyReadinessPolicy | None = None, structural_view_ids: tuple[str, ...] = (), - admission_facts: ServingAdmissionFacts | None = None, + admission_facts: RuntimeAdmissionFacts | None = None, ) -> RepresentationPublishSpec: return build_pure_transform_publication_bundle( build_intent=build_intent, @@ -1359,9 +1268,9 @@ def build_pure_transform_publication_bundle_from_registered_artifact( ) -def build_serving_publication_bundle_from_registered_artifact( +def build_runtime_artifact_publication_bundle_from_registered_artifact( *, - build_intent: ServingBuildIntent, + build_intent: RuntimeArtifactBuildIntent, source_artifact: RegisteredArtifact | CanonicalIndex | object | None = None, contract_family: AssemblyContractFamily | str | None = None, serving_artifact: RegisteredArtifact, @@ -1374,9 +1283,9 @@ def build_serving_publication_bundle_from_registered_artifact( requirements: AssemblyRequirementSetRef | None = None, readiness_policy: AssemblyReadinessPolicy | None = None, structural_view_ids: tuple[str, ...] = (), - admission_facts: ServingAdmissionFacts | None = None, + admission_facts: RuntimeAdmissionFacts | None = None, ) -> RepresentationPublishSpec: - return build_serving_publication_bundle( + return build_runtime_artifact_publication_bundle( build_intent=build_intent, source_artifact=source_artifact, contract_family=contract_family, @@ -1396,22 +1305,22 @@ def build_serving_publication_bundle_from_registered_artifact( __all__ = [ - "PreparedServingRegistration", + "PreparedRuntimeArtifactRegistration", "PureTransformPublicationSpec", "RepresentationPublishSpec", - "RegisteredServingPublication", - "build_serving_publication_bundle", - "build_serving_publication_bundle_from_registered_artifact", + "RegisteredRuntimeArtifactPublication", + "build_runtime_artifact_publication_bundle", + "build_runtime_artifact_publication_bundle_from_registered_artifact", "build_binding_finalize_admission_facts", "build_binding_finalize_publication_bundle", "build_pure_transform_publication_spec", "build_pure_transform_transform_spec", "build_pure_transform_publication_bundle", "build_pure_transform_publication_bundle_from_registered_artifact", - "compute_serving_tensor_schema_hash", + "compute_runtime_artifact_tensor_schema_hash", "compute_pure_transform_representation_contract_hash", - "count_canonical_serving_tensors", - "prepare_binding_finalize_serving_registration", - "prepare_serving_registration", - "prepare_pure_transform_serving_registration", + "count_canonical_runtime_tensors", + "prepare_binding_finalize_runtime_registration", + "prepare_runtime_artifact_registration", + "prepare_pure_transform_runtime_registration", ] diff --git a/tensorcast/api/store/realization_kernel.py b/tensorcast/api/store/realization_kernel.py index 3bfba8be..2c7afb69 100644 --- a/tensorcast/api/store/realization_kernel.py +++ b/tensorcast/api/store/realization_kernel.py @@ -9,7 +9,7 @@ import time from collections.abc import Callable, Mapping, Sequence from dataclasses import asdict, dataclass, field, replace -from typing import Any, Literal, NoReturn +from typing import Any, Literal, NoReturn, TypedDict from tensorcast.api.store.common import canonical_index_from_bytes from tensorcast.api.store.types import ArtifactError @@ -189,6 +189,22 @@ class ResolvedArtifactSelection: diagnostics: Mapping[str, object] = field(default_factory=dict) +class SelectionReportFields(TypedDict): + view_subset_hash: str + logical_layout_hash: str + selection_hash: str + + +def selection_report_fields( + selection: ResolvedArtifactSelection, +) -> SelectionReportFields: + return { + "view_subset_hash": selection.view_subset_hash.hex(), + "logical_layout_hash": selection.logical_layout_hash.hex(), + "selection_hash": selection.selection_hash.hex(), + } + + def resolve_artifact_selection( *, artifact_id: str | None, @@ -456,7 +472,7 @@ def validate_for_target(self, target: RealizationTargetPlan) -> None: retryable=False, ) if ( - target.kind in {"binding_adopted", "caller_tensors"} + target.kind in {"binding_owned", "binding_adopted", "caller_tensors"} and not target.target_layout_digest ): raise ArtifactError( @@ -727,6 +743,8 @@ class ArtifactRealizationReport: materialize_sec: float | None = None tensor_bind_sec: float | None = None total_sec: float | None = None + runtime_attach_sec: float | None = None + runtime_finalize_sec: float | None = None source: str | None = None operation_id: str | None = None operation_backend: str | None = None @@ -742,6 +760,9 @@ class ArtifactRealizationReport: execution_commit: RealizationExecutionCommitReport | None = None execution_diagnostics: object | None = None source_bound_plan_diagnostics: object | None = None + view_subset_hash: str = "" + logical_layout_hash: str = "" + selection_hash: str = "" def validate_for_handle(self, target_kind: RealizationTargetKind) -> None: if self.target_kind != target_kind: @@ -793,6 +814,9 @@ def artifact_realization_profile_payload( "artifact_profile": report.artifact_profile, "authority_scope": report.authority_scope, "source_selection_digest": report.source_selection_digest, + "view_subset_hash": report.view_subset_hash, + "logical_layout_hash": report.logical_layout_hash, + "selection_hash": report.selection_hash, "target_layout_digest": report.target_layout_digest, "copy_plan_digest": report.copy_plan_digest, "operation_backend": report.operation_backend, @@ -802,6 +826,8 @@ def artifact_realization_profile_payload( "materialize_sec": report.materialize_sec, "tensor_bind_sec": report.tensor_bind_sec, "total_sec": report.total_sec, + "runtime_attach_sec": report.runtime_attach_sec, + "runtime_finalize_sec": report.runtime_finalize_sec, "envelope_backing_kind": envelope.backing_kind, "envelope_export_kind": envelope.export_kind, "envelope_projection_kind": envelope.projection_kind, @@ -1771,8 +1797,8 @@ def _target_member_runtime_profile_digest( "load_config_digest": _optional_str( _safe_attr(target_member, "load_config_digest") ), - "serving_build_digest": _optional_str( - _safe_attr(target_member, "serving_build_digest") + "runtime_build_digest": _optional_str( + _safe_attr(target_member, "runtime_build_digest") ), } if not any(payload.values()): @@ -1833,7 +1859,7 @@ def _source_selection_mode( source: object | None, ) -> str: source_kind = _literal_value(_safe_attr(source, "source_kind")) - if source_kind == "serving_artifact_set": + if source_kind == "runtime_artifact_set": source_members = _source_members_by_member_id(source) artifact_refs = { artifact_ref @@ -2029,7 +2055,7 @@ def target_set_report_for_retained_bindings( ready_member_count=sum( 1 for report in retained - if report.readiness in {"serving_local_ready", "serving_published_ready"} + if report.readiness in {"runtime_local_ready", "runtime_published_ready"} ), staged_member_count=sum(1 for report in retained if report.staged_value), total_reservation_bytes=sum(report.reservation_bytes for report in retained), @@ -2518,6 +2544,7 @@ def report_for_binding_realization( artifact_profile=selection.artifact_profile, authority_scope=selection.authority_scope, generation_hint=selection.generation_hint, + **selection_report_fields(selection), envelope=envelope, target_plan=target_plan, strategy_plan=strategy_plan_for_execution( @@ -2597,6 +2624,7 @@ def report_for_runtime_attachment( artifact_profile=selection.artifact_profile, authority_scope=selection.authority_scope, generation_hint=selection.generation_hint, + **selection_report_fields(selection), envelope=envelope, target_plan=target_plan, strategy_plan=strategy_plan_for_execution( @@ -2711,7 +2739,7 @@ def report_for_publication( copy_plan_digest=target_plan.copy_plan_digest, artifact_id=str(artifact_id), view_id="", - artifact_profile="serving_artifact", + artifact_profile="runtime_artifact", authority_scope="daemon_publication", generation_hint=None, envelope=envelope, @@ -2765,6 +2793,7 @@ def report_for_mounted_source( artifact_profile=selection.artifact_profile, authority_scope=selection.authority_scope, generation_hint=selection.generation_hint, + **selection_report_fields(selection), envelope=envelope, target_plan=target_plan, representation_admission=representation_admission_for_target(target_plan), @@ -2808,6 +2837,7 @@ def report_for_target_set( artifact_profile=selection.artifact_profile, authority_scope=selection.authority_scope, generation_hint=selection.generation_hint, + **selection_report_fields(selection), envelope=envelope, target_plan=target_plan, strategy_plan=target_set_strategy_plan_for( @@ -2950,6 +2980,7 @@ def __init__( tensor_dict_value: Mapping[str, Any] | None = None, binding_value: Any | None = None, prefetch_value: Any | None = None, + attachment_value: Any | None = None, promote_fn: Callable[..., Any] | None = None, attach_fn: Callable[..., Any] | None = None, release_contract: RealizationReleaseContract | None = None, @@ -2962,6 +2993,7 @@ def __init__( self._tensor_dict_projection: TensorDictProjection | None = None self._binding_value = binding_value self._prefetch_value = prefetch_value + self._attachment_value = attachment_value self._promote_fn = promote_fn self._attach_fn = attach_fn self._release_contract = release_contract or release_contract_for( @@ -3007,9 +3039,20 @@ def complete(self) -> None: def attach(self, *args: object, **kwargs: object) -> Any: if self._attach_fn is None: - self._unsupported("attach") + if args or kwargs or self._attachment_value is None: + self._unsupported("attach") + return self._attachment_value + if self._attachment_value is not None and not args and not kwargs: + return self._attachment_value return self._attach_fn(*args, **kwargs) + def attachment(self) -> Any: + if self._attachment_value is not None: + return self._attachment_value + if self._attach_fn is None: + self._unsupported("attach") + return self._attach_fn() + def publish_replica(self, *args: object, **kwargs: object) -> Any: binding_value = self._binding_value publish = getattr(binding_value, "publish_replica", None) @@ -3050,7 +3093,7 @@ class ArtifactRealizationSpec: packing: str = "byte_space" capacity_bytes: int | None = None publish: bool = False - serving_runtime_policy: object | None = None + runtime_artifact_policy: object | None = None readiness: object | None = None retention: object | None = None verify_checksums: bool = True @@ -3095,7 +3138,7 @@ def binding( options: object | None = None, capacity_bytes: int | None = None, publish: bool = False, - serving_runtime_policy: object | None = None, + runtime_artifact_policy: object | None = None, ) -> "ArtifactRealizationSpec": return cls( target_kind="binding_owned", @@ -3105,7 +3148,7 @@ def binding( options=options, capacity_bytes=capacity_bytes, publish=publish, - serving_runtime_policy=serving_runtime_policy, + runtime_artifact_policy=runtime_artifact_policy, ) @classmethod @@ -3117,7 +3160,7 @@ def adopted_binding( packing: str = "byte_space", options: object | None = None, publish: bool = False, - serving_runtime_policy: object | None = None, + runtime_artifact_policy: object | None = None, ) -> "ArtifactRealizationSpec": return cls( target_kind="binding_adopted", @@ -3126,7 +3169,7 @@ def adopted_binding( packing=packing, options=options, publish=publish, - serving_runtime_policy=serving_runtime_policy, + runtime_artifact_policy=runtime_artifact_policy, ) @classmethod @@ -3198,6 +3241,7 @@ def model_runtime( adapter_version: str | None = None, runtime_abi_version: str | None = None, options: object | None = None, + runtime_artifact_policy: object | None = None, ) -> "ArtifactRealizationSpec": if not str(framework or "").strip(): raise ArtifactError( @@ -3214,10 +3258,11 @@ def model_runtime( member=member, adapter_version=adapter_version, runtime_abi_version=runtime_abi_version, + runtime_artifact_policy=runtime_artifact_policy, ) @classmethod - def publication( + def _publication( cls, *, target: object, diff --git a/tensorcast/api/store/runtime.py b/tensorcast/api/store/runtime.py index 0c9c5a29..31e4cd9e 100644 --- a/tensorcast/api/store/runtime.py +++ b/tensorcast/api/store/runtime.py @@ -13,6 +13,7 @@ import weakref from collections.abc import Callable from contextlib import contextmanager +from dataclasses import dataclass from typing import Iterator, Mapping from opentelemetry import trace @@ -63,17 +64,19 @@ def close(self) -> None: _log_best_effort_cleanup_failure("store_runtime.fork_handle_cleanup") +@dataclass(frozen=True, slots=True) +class ResolvedKeyMapping: + artifact_id: str | None + disk_path: str | None + generation: int | None + + +@dataclass(frozen=True, slots=True) class _KeyCacheEntry: - def __init__( - self, - *, - artifact_id: str | None, - disk_path: str | None, - expires_at: float, - ) -> None: - self.artifact_id = artifact_id - self.disk_path = disk_path - self.expires_at = expires_at + artifact_id: str | None + disk_path: str | None + generation: int | None + expires_at: float class StoreRuntimeContext: @@ -292,6 +295,7 @@ def cache_key_mapping( *, artifact_id: str | None, disk_path: str | None = None, + generation: int | None = None, ttl_override: float | None = None, ) -> None: if not key: @@ -304,28 +308,40 @@ def cache_key_mapping( self._key_cache[key] = _KeyCacheEntry( artifact_id=artifact_id, disk_path=disk_path, + generation=generation, expires_at=expires_at, ) - def resolve_key_mapping_cached(self, *, key: str) -> tuple[str | None, str | None]: + def resolve_key_mapping_cached(self, *, key: str) -> ResolvedKeyMapping: now = time.monotonic() with self._key_cache_lock: cached = self._key_cache.get(key) if cached and cached.expires_at > now: - return cached.artifact_id, cached.disk_path + return ResolvedKeyMapping( + artifact_id=cached.artifact_id, + disk_path=cached.disk_path, + generation=cached.generation, + ) if cached is not None: del self._key_cache[key] mapping = self.ensure_client().resolve_key_mapping(key) resolved_id = mapping.artifact_id or None resolved_path = getattr(mapping, "used_disk_path", "") or None + raw_generation = int(getattr(mapping, "generation", 0) or 0) + generation = raw_generation if raw_generation > 0 else None ttl_override = float(mapping.cache_ttl_seconds) self.cache_key_mapping( key, artifact_id=resolved_id, disk_path=resolved_path, + generation=generation, ttl_override=ttl_override, ) - return resolved_id, resolved_path + return ResolvedKeyMapping( + artifact_id=resolved_id, + disk_path=resolved_path, + generation=generation, + ) def get_artifact_index_cached(self, artifact_id: str) -> ArtifactCacheEntry | None: return self._artifact_cache.get_artifact_index_cached(artifact_id) diff --git a/tensorcast/api/store/serving_binding_reference_consumer.py b/tensorcast/api/store/runtime_realization_reference_consumer.py similarity index 77% rename from tensorcast/api/store/serving_binding_reference_consumer.py rename to tensorcast/api/store/runtime_realization_reference_consumer.py index 7fa4900e..0847bbe1 100644 --- a/tensorcast/api/store/serving_binding_reference_consumer.py +++ b/tensorcast/api/store/runtime_realization_reference_consumer.py @@ -1,6 +1,6 @@ # Copyright (c) 2026, TensorCast Team. -"""Reference serving-binding consumer helpers for examples and E2E tests.""" +"""Reference runtime-realization consumer helpers for examples and E2E tests.""" from __future__ import annotations @@ -12,8 +12,9 @@ from pydantic import BaseModel, ConfigDict, model_validator from tensorcast.api.context import GroupRealization -from tensorcast.api.store.serving_binding_spec_cache import ( - ServingBindingSpecCacheRecord, +from tensorcast.api.store.realization_kernel import resolve_artifact_selection +from tensorcast.api.store.runtime_realization_spec_cache import ( + RuntimeRealizationSpecCacheRecord, read_matching_resolved_spec_cache_entry, write_resolved_spec_cache_entry, ) @@ -24,17 +25,17 @@ from tensorcast.types import ( BindingValueRef, BlobRef, - PrefetchedServingBinding, - PrefetchedServingBindingSet, + PrefetchHandoff, + PrefetchHandoffSet, PrefetchRetentionPolicy, - ServingBindingMemberRef, - ServingBindingResolvedLayout, - ServingBindingResolvedSpecCacheEntry, - ServingBindingSetTarget, - ServingBindingSourceRef, - ServingBindingSourceReuseDecision, - ServingBindingTarget, - ServingTopologyRef, + RealizationTarget, + RealizationTargetSet, + RuntimeBindingMemberRef, + RuntimeBindingResolvedLayout, + RuntimeBindingSourceRef, + RuntimeBindingSourceReuseDecision, + RuntimeRealizationSpecCacheEntry, + RuntimeTopologyRef, ) REFERENCE_RUNTIME = "tensorcast-reference" @@ -42,7 +43,7 @@ _TARGET_INDEX_BLOB = "target_index" -class ReferenceServingTensorSpec(BaseModel): +class ReferenceRuntimeTensorSpec(BaseModel): model_config = ConfigDict(frozen=True) name: str = "alpha" @@ -52,7 +53,7 @@ class ReferenceServingTensorSpec(BaseModel): stride: tuple[int, ...] = (1,) @model_validator(mode="after") - def _validate_spec(self) -> "ReferenceServingTensorSpec": + def _validate_spec(self) -> "ReferenceRuntimeTensorSpec": if not self.name: raise ValueError("name must not be empty") if int(self.size_bytes) <= 0: @@ -66,15 +67,15 @@ def _validate_spec(self) -> "ReferenceServingTensorSpec": return self -class ReferenceServingResolvedSpec(BaseModel): +class ReferenceRuntimeResolvedSpec(BaseModel): model_config = ConfigDict(frozen=True) - cache_entry: ServingBindingResolvedSpecCacheEntry - target: ServingBindingTarget + cache_entry: RuntimeRealizationSpecCacheEntry + target: RealizationTarget blobs: dict[str, bytes] -class ReferenceServingAcquireResult(BaseModel): +class ReferenceRuntimeAcquireResult(BaseModel): model_config = ConfigDict(frozen=True) binding_value_ref: BindingValueRef @@ -92,7 +93,7 @@ def _canonical_json_bytes(payload: object) -> bytes: def build_reference_tensor_index_bytes( - tensor: ReferenceServingTensorSpec, + tensor: ReferenceRuntimeTensorSpec, ) -> bytes: payload = { tensor.name: [ @@ -108,7 +109,7 @@ def build_reference_tensor_index_bytes( def build_reference_target_layout( - tensor: ReferenceServingTensorSpec, + tensor: ReferenceRuntimeTensorSpec, *, device_id: int = 0, ) -> store_daemon_pb2.TargetLayout: @@ -145,40 +146,40 @@ def build_reference_resolved_spec( source_artifact_id: str, artifact_selection_digest: str, device_uuid: str, - tensor: ReferenceServingTensorSpec | None = None, + tensor: ReferenceRuntimeTensorSpec | None = None, runtime: str = REFERENCE_RUNTIME, - topology: ServingTopologyRef | None = None, - member: ServingBindingMemberRef | None = None, + topology: RuntimeTopologyRef | None = None, + member: RuntimeBindingMemberRef | None = None, source_schema_hash: str = "reference-source-schema", model_config_digest: str = "reference-model-config", - serving_build_digest: str = "reference-serving-build", + runtime_build_digest: str = "reference-runtime-build", representation_contract_hash: str = "reference-representation-contract", binding_layout_id: str = "reference-layout-0", -) -> ReferenceServingResolvedSpec: +) -> ReferenceRuntimeResolvedSpec: if not source_artifact_id: raise ValueError("source_artifact_id is required") if not artifact_selection_digest: raise ValueError("artifact_selection_digest is required") if not device_uuid: raise ValueError("device_uuid is required") - resolved_tensor = tensor or ReferenceServingTensorSpec() - resolved_topology = topology or ServingTopologyRef( + resolved_tensor = tensor or ReferenceRuntimeTensorSpec() + resolved_topology = topology or RuntimeTopologyRef( schema_topology_digest="reference-topology" ) - resolved_member = member or ServingBindingMemberRef( + resolved_member = member or RuntimeBindingMemberRef( member_id="member-0", member_index=0, member_count=1, group_id="reference-group", ) - source = ServingBindingSourceRef( + source = RuntimeBindingSourceRef( source_kind="checkpoint_artifact", artifact_selection_digest=artifact_selection_digest, source_artifact_ref=source_artifact_id, source_schema_hash=source_schema_hash, ) - source_reuse = ServingBindingSourceReuseDecision( - mode="checkpoint_to_serving", + source_reuse = RuntimeBindingSourceReuseDecision( + mode="checkpoint_to_runtime", representation_contract_hash=representation_contract_hash, ) target_layout = build_reference_target_layout(resolved_tensor) @@ -186,7 +187,7 @@ def build_reference_resolved_spec( target_index_bytes = build_reference_tensor_index_bytes(resolved_tensor) layout_hash = _sha256_bytes(target_layout_bytes) tensor_schema_hash = _sha256_bytes(target_index_bytes) - draft_entry = ServingBindingResolvedSpecCacheEntry( + draft_entry = RuntimeRealizationSpecCacheEntry( schema_version=1, cache_key_digest="placeholder", spec_digest="placeholder", @@ -197,7 +198,7 @@ def build_reference_resolved_spec( member=resolved_member, source_schema_hash=source_schema_hash, model_config_digest=model_config_digest, - serving_build_digest=serving_build_digest, + runtime_build_digest=runtime_build_digest, binding_layout_id=binding_layout_id, target_layout_hash=layout_hash, tensor_schema_hash=tensor_schema_hash, @@ -212,7 +213,7 @@ def build_reference_resolved_spec( entry = entry_with_key.model_copy( update={"spec_digest": entry_with_key.computed_spec_digest()} ) - resolved_layout = ServingBindingResolvedLayout( + resolved_layout = RuntimeBindingResolvedLayout( binding_layout_id=binding_layout_id, source=source, source_reuse=source_reuse, @@ -225,7 +226,7 @@ def build_reference_resolved_spec( spec_digest=entry.spec_digest, source_schema_hash=source_schema_hash, ) - target = ServingBindingTarget( + target = RealizationTarget( runtime=runtime, device="cuda:0", device_uuid=device_uuid, @@ -233,10 +234,10 @@ def build_reference_resolved_spec( topology=resolved_topology, member=resolved_member, model_config_digest=model_config_digest, - serving_build_digest=serving_build_digest, + runtime_build_digest=runtime_build_digest, resolved_layout=resolved_layout, ) - return ReferenceServingResolvedSpec( + return ReferenceRuntimeResolvedSpec( cache_entry=entry, target=target, blobs={ @@ -249,8 +250,8 @@ def build_reference_resolved_spec( def write_reference_resolved_spec_cache_entry( cache_root: str | os.PathLike[str], *, - resolved_spec: ReferenceServingResolvedSpec, -) -> ServingBindingSpecCacheRecord: + resolved_spec: ReferenceRuntimeResolvedSpec, +) -> RuntimeRealizationSpecCacheRecord: write_resolved_spec_cache_entry( cache_root, entry=resolved_spec.cache_entry, @@ -263,11 +264,11 @@ def write_reference_resolved_spec_cache_entry( def target_from_reference_cache_record( - record: ServingBindingSpecCacheRecord, + record: RuntimeRealizationSpecCacheRecord, *, device_uuid: str, device: str = "cuda:0", -) -> ServingBindingTarget: +) -> RealizationTarget: target_layout = record.blobs.get(_TARGET_LAYOUT_BLOB) target_index = record.blobs.get(_TARGET_INDEX_BLOB) if target_layout is None: @@ -275,7 +276,7 @@ def target_from_reference_cache_record( if target_index is None: raise ValueError("reference cache record is missing target_index blob") entry = record.entry - resolved_layout = ServingBindingResolvedLayout( + resolved_layout = RuntimeBindingResolvedLayout( binding_layout_id=entry.binding_layout_id, source=entry.source, source_reuse=entry.source_reuse, @@ -288,7 +289,7 @@ def target_from_reference_cache_record( spec_digest=entry.spec_digest, source_schema_hash=entry.source_schema_hash, ) - return ServingBindingTarget( + return RealizationTarget( runtime=entry.runtime, device=device, device_uuid=device_uuid, @@ -297,44 +298,62 @@ def target_from_reference_cache_record( member=entry.member, model_config_digest=entry.model_config_digest, load_config_digest=entry.load_config_digest, - serving_build_digest=entry.serving_build_digest, + runtime_build_digest=entry.runtime_build_digest, resolved_layout=resolved_layout, ) -def unpack_prefetched_serving_binding( +def unpack_prefetch_handoff( result_any: Any, -) -> PrefetchedServingBinding: +) -> PrefetchHandoff: proto = operation_pb2.PrefetchServingBindingResult() if not result_any.Unpack(proto): raise ValueError("operation result is not PrefetchServingBindingResult") - return PrefetchedServingBinding.from_proto(proto) + return PrefetchHandoff.from_proto(proto) -def unpack_prefetched_serving_binding_set( +def unpack_prefetch_handoff_set( result_any: Any, -) -> PrefetchedServingBindingSet: +) -> PrefetchHandoffSet: proto = operation_pb2.PrefetchServingBindingSetResult() if not result_any.Unpack(proto): raise ValueError("operation result is not PrefetchServingBindingSetResult") - return PrefetchedServingBindingSet.from_proto(proto) + return PrefetchHandoffSet.from_proto(proto) + + +def _reference_source_selection( + *, + source_artifact_id: str, + target: RealizationTarget | RealizationTargetSet, +) -> common_pb2.ArtifactSelection: + if isinstance(target, RealizationTarget): + index_bytes = bytes(target.resolved_layout.target_index_bytes) + else: + index_bytes = bytes(target.members[0].resolved_layout.target_index_bytes) + return resolve_artifact_selection( + artifact_id=source_artifact_id, + canonical_index_bytes=index_bytes, + ).proto def prefetch_reference_binding( client: DaemonCtl, *, source_artifact_id: str, - target: ServingBindingTarget, + target: RealizationTarget, retention_policy: PrefetchRetentionPolicy | None = None, operation_id: str | None = None, group_realization: GroupRealization | None = None, timeout_s: float = 30.0, -) -> PrefetchedServingBinding: - selection = common_pb2.ArtifactSelection(artifact_id=source_artifact_id) +) -> PrefetchHandoff: + selection = _reference_source_selection( + source_artifact_id=source_artifact_id, + target=target, + ) response = client.prefetch_serving_binding( source_selection=selection, target=target, - requested_readiness="serving_local_ready", + requested_readiness="runtime_local_ready", retention_policy=retention_policy, operation_id=operation_id, group_realization=group_realization, @@ -345,24 +364,27 @@ def prefetch_reference_binding( if response.status.HasField("error"): message = response.status.error.message or message raise RuntimeError(message) - return unpack_prefetched_serving_binding(response.status.result) + return unpack_prefetch_handoff(response.status.result) def prefetch_reference_binding_set( client: DaemonCtl, *, source_artifact_id: str, - target: ServingBindingSetTarget, + target: RealizationTargetSet, retention_policy: PrefetchRetentionPolicy | None = None, operation_id: str | None = None, group_realization: GroupRealization | None = None, timeout_s: float = 30.0, -) -> PrefetchedServingBindingSet: - selection = common_pb2.ArtifactSelection(artifact_id=source_artifact_id) +) -> PrefetchHandoffSet: + selection = _reference_source_selection( + source_artifact_id=source_artifact_id, + target=target, + ) response = client.prefetch_serving_binding( source_selection=selection, target=target, - requested_readiness="serving_local_ready", + requested_readiness="runtime_local_ready", retention_policy=retention_policy, operation_id=operation_id, group_realization=group_realization, @@ -373,17 +395,17 @@ def prefetch_reference_binding_set( if response.status.HasField("error"): message = response.status.error.message or message raise RuntimeError(message) - return unpack_prefetched_serving_binding_set(response.status.result) + return unpack_prefetch_handoff_set(response.status.result) def acquire_reference_binding( client: DaemonCtl, *, - prefetched: PrefetchedServingBinding, - target: ServingBindingTarget, + prefetched: PrefetchHandoff, + target: RealizationTarget, caller_pid: int | None = None, timeout_s: float = 30.0, -) -> ReferenceServingAcquireResult: +) -> ReferenceRuntimeAcquireResult: response = acquire_reference_binding_response( client, prefetched=prefetched, @@ -399,7 +421,7 @@ def acquire_reference_binding( binding_value_id=str(response.current_value.binding_value_id), seal_generation=int(response.current_value.seal_generation), ) - return ReferenceServingAcquireResult( + return ReferenceRuntimeAcquireResult( binding_value_ref=binding_value_ref, lease_token=bytes(response.mem_handle.lease_token), has_cuda_ipc_handle=response.mem_handle.HasField("cuda_ipc_handle"), @@ -410,8 +432,8 @@ def acquire_reference_binding( def acquire_reference_binding_response( client: DaemonCtl, *, - prefetched: PrefetchedServingBinding, - target: ServingBindingTarget, + prefetched: PrefetchHandoff, + target: RealizationTarget, caller_pid: int | None = None, timeout_s: float = 30.0, ) -> store_daemon_pb2.AcquireBindingValueResponse: @@ -421,7 +443,7 @@ def acquire_reference_binding_response( expected_device_uuid=prefetched.device_uuid, expected_target_layout_hash=target.resolved_layout.target_layout_hash, expected_tensor_schema_hash=target.resolved_layout.tensor_schema_hash, - expected_serving_build_digest=target.serving_build_digest, + expected_serving_build_digest=target.runtime_build_digest, expected_daemon_id=prefetched.daemon_id, expected_daemon_session_id=prefetched.daemon_session_id, expected_member=prefetched.member, @@ -435,7 +457,7 @@ def acquire_reference_binding_response( def release_reference_acquire( client: DaemonCtl, *, - acquire_result: ReferenceServingAcquireResult, + acquire_result: ReferenceRuntimeAcquireResult, timeout_s: float = 5.0, ) -> None: if acquire_result.lease_token: @@ -447,9 +469,9 @@ def release_reference_acquire( __all__ = [ "REFERENCE_RUNTIME", - "ReferenceServingAcquireResult", - "ReferenceServingResolvedSpec", - "ReferenceServingTensorSpec", + "ReferenceRuntimeAcquireResult", + "ReferenceRuntimeResolvedSpec", + "ReferenceRuntimeTensorSpec", "acquire_reference_binding", "acquire_reference_binding_response", "build_reference_resolved_spec", @@ -459,7 +481,7 @@ def release_reference_acquire( "prefetch_reference_binding_set", "release_reference_acquire", "target_from_reference_cache_record", - "unpack_prefetched_serving_binding", - "unpack_prefetched_serving_binding_set", + "unpack_prefetch_handoff", + "unpack_prefetch_handoff_set", "write_reference_resolved_spec_cache_entry", ] diff --git a/tensorcast/api/store/serving_binding_spec_cache.py b/tensorcast/api/store/runtime_realization_spec_cache.py similarity index 86% rename from tensorcast/api/store/serving_binding_spec_cache.py rename to tensorcast/api/store/runtime_realization_spec_cache.py index e3320921..9507aa7f 100644 --- a/tensorcast/api/store/serving_binding_spec_cache.py +++ b/tensorcast/api/store/runtime_realization_spec_cache.py @@ -1,5 +1,7 @@ # Copyright (c) 2026, TensorCast Team. +"""Runtime realization resolved-spec cache helpers.""" + from __future__ import annotations import contextlib @@ -16,33 +18,33 @@ from tensorcast.types import ( BlobRef, - ServingBindingResolvedSpecCacheEntry, - ServingTopologyRef, + RuntimeRealizationSpecCacheEntry, + RuntimeTopologyRef, ) -_MANIFEST_PRODUCER = "tensorcast.serving_binding_spec_cache" +_MANIFEST_PRODUCER = "tensorcast.runtime_realization_spec_cache" _MANIFEST_PRODUCER_VERSION = 1 def _validate_runtime(runtime: str) -> None: if not str(runtime).strip(): - raise ValueError("serving runtime must not be empty") + raise ValueError("runtime must not be empty") -class ServingBindingSpecCacheRecord(BaseModel): +class RuntimeRealizationSpecCacheRecord(BaseModel): model_config = ConfigDict(frozen=True) - entry: ServingBindingResolvedSpecCacheEntry + entry: RuntimeRealizationSpecCacheEntry blobs: Mapping[str, bytes] -class ServingBindingSpecCacheGroupIndex(BaseModel): +class RuntimeRealizationSpecCacheGroupIndex(BaseModel): model_config = ConfigDict(frozen=True) schema_version: int = 1 group_cache_key_digest: str runtime: str - topology: ServingTopologyRef + topology: RuntimeTopologyRef group_id: str member_cache_key_digests: Mapping[str, str] @@ -68,8 +70,8 @@ def canonical_json_bytes(payload: object) -> bytes: return json.dumps(payload, sort_keys=True, separators=(",", ":")).encode("utf-8") -def serving_binding_spec_cache_root(cache_root: str | os.PathLike[str]) -> Path: - return Path(cache_root) / "serving_binding_specs" / "v1" +def runtime_realization_spec_cache_root(cache_root: str | os.PathLike[str]) -> Path: + return Path(cache_root) / "runtime_realization_specs" / "v1" def _sha256_bytes(data: bytes) -> str: @@ -94,7 +96,7 @@ def _validate_blob_ref(name: str, blob_ref: BlobRef, data: bytes) -> None: def _validate_entry( - entry: ServingBindingResolvedSpecCacheEntry, blobs: Mapping[str, bytes] + entry: RuntimeRealizationSpecCacheEntry, blobs: Mapping[str, bytes] ) -> None: _validate_runtime(entry.runtime) if entry.cache_key_digest != entry.computed_cache_key_digest(): @@ -134,7 +136,7 @@ def _group_path(root: Path, group_cache_key_digest: str) -> Path: return root / "groups" / "sha256" / f"{group_cache_key_digest}.json" -def _validate_group_index(index: ServingBindingSpecCacheGroupIndex) -> None: +def _validate_group_index(index: RuntimeRealizationSpecCacheGroupIndex) -> None: if int(index.schema_version) != 1: raise ValueError("unsupported group index schema_version") if index.group_cache_key_digest != index.computed_group_cache_key_digest(): @@ -150,7 +152,7 @@ def _validate_group_index(index: ServingBindingSpecCacheGroupIndex) -> None: raise ValueError("member cache key digests must be distinct") -def _manifest_payload(entry: ServingBindingResolvedSpecCacheEntry) -> dict[str, object]: +def _manifest_payload(entry: RuntimeRealizationSpecCacheEntry) -> dict[str, object]: return { "schema_version": 1, "producer": _MANIFEST_PRODUCER, @@ -162,10 +164,10 @@ def _manifest_payload(entry: ServingBindingResolvedSpecCacheEntry) -> dict[str, def write_resolved_spec_cache_entry( cache_root: str | os.PathLike[str], *, - entry: ServingBindingResolvedSpecCacheEntry, + entry: RuntimeRealizationSpecCacheEntry, blobs: Mapping[str, bytes], ) -> None: - root = serving_binding_spec_cache_root(cache_root) + root = runtime_realization_spec_cache_root(cache_root) _validate_entry(entry, blobs) root.mkdir(parents=True, exist_ok=True) @@ -225,9 +227,9 @@ def write_resolved_spec_cache_entry( def write_resolved_spec_cache_group_index( cache_root: str | os.PathLike[str], *, - index: ServingBindingSpecCacheGroupIndex, + index: RuntimeRealizationSpecCacheGroupIndex, ) -> None: - root = serving_binding_spec_cache_root(cache_root) + root = runtime_realization_spec_cache_root(cache_root) _validate_group_index(index) root.mkdir(parents=True, exist_ok=True) @@ -263,7 +265,7 @@ def _read_json(path: Path) -> dict[str, object]: return payload -def _read_record_from_spec_dir(*, spec_dir: Path) -> ServingBindingSpecCacheRecord: +def _read_record_from_spec_dir(*, spec_dir: Path) -> RuntimeRealizationSpecCacheRecord: manifest = _read_json(spec_dir / "manifest.json") if manifest.get("schema_version") != 1: raise ValueError("unsupported spec manifest schema_version") @@ -274,20 +276,20 @@ def _read_record_from_spec_dir(*, spec_dir: Path) -> ServingBindingSpecCacheReco entry_payload = manifest.get("entry") if not isinstance(entry_payload, dict): raise ValueError("spec manifest missing entry") - entry = ServingBindingResolvedSpecCacheEntry.model_validate(entry_payload) + entry = RuntimeRealizationSpecCacheEntry.model_validate(entry_payload) blobs: dict[str, bytes] = {} for name, blob_ref in entry.blob_refs.items(): blob_path = spec_dir / _safe_relative_blob_path(blob_ref.path) blobs[name] = blob_path.read_bytes() _validate_entry(entry, blobs) - return ServingBindingSpecCacheRecord(entry=entry, blobs=blobs) + return RuntimeRealizationSpecCacheRecord(entry=entry, blobs=blobs) def read_resolved_spec_cache_entry( cache_root: str | os.PathLike[str], cache_key_digest: str, -) -> ServingBindingSpecCacheRecord: - root = serving_binding_spec_cache_root(cache_root) +) -> RuntimeRealizationSpecCacheRecord: + root = runtime_realization_spec_cache_root(cache_root) key_payload = _read_json(_key_path(root, cache_key_digest)) if key_payload.get("schema_version") != 1: raise ValueError("unsupported cache key schema_version") @@ -299,7 +301,7 @@ def read_resolved_spec_cache_entry( entry_payload = key_payload.get("entry") if not isinstance(entry_payload, dict): raise ValueError("cache key missing entry") - key_entry = ServingBindingResolvedSpecCacheEntry.model_validate(entry_payload) + key_entry = RuntimeRealizationSpecCacheEntry.model_validate(entry_payload) if key_entry.cache_key_digest != cache_key_digest: raise ValueError("cache key entry digest mismatch") if key_entry.spec_digest != spec_digest: @@ -313,8 +315,8 @@ def read_resolved_spec_cache_entry( def read_matching_resolved_spec_cache_entry( cache_root: str | os.PathLike[str], *, - expected_entry: ServingBindingResolvedSpecCacheEntry, -) -> ServingBindingSpecCacheRecord: + expected_entry: RuntimeRealizationSpecCacheEntry, +) -> RuntimeRealizationSpecCacheRecord: if expected_entry.cache_key_digest != expected_entry.computed_cache_key_digest(): raise ValueError("expected cache_key_digest does not match canonical key") if expected_entry.spec_digest != expected_entry.computed_spec_digest(): @@ -329,8 +331,8 @@ def read_matching_resolved_spec_cache_entry( def read_resolved_spec_cache_group_index( cache_root: str | os.PathLike[str], group_cache_key_digest: str, -) -> ServingBindingSpecCacheGroupIndex: - root = serving_binding_spec_cache_root(cache_root) +) -> RuntimeRealizationSpecCacheGroupIndex: + root = runtime_realization_spec_cache_root(cache_root) payload = _read_json(_group_path(root, group_cache_key_digest)) if payload.get("schema_version") != 1: raise ValueError("unsupported group index schema_version") @@ -341,7 +343,7 @@ def read_resolved_spec_cache_group_index( index_payload = payload.get("index") if not isinstance(index_payload, dict): raise ValueError("group index missing index") - index = ServingBindingSpecCacheGroupIndex.model_validate(index_payload) + index = RuntimeRealizationSpecCacheGroupIndex.model_validate(index_payload) if index.group_cache_key_digest != group_cache_key_digest: raise ValueError("group cache key digest mismatch") _validate_group_index(index) @@ -354,3 +356,16 @@ def read_resolved_spec_cache_group_index( if record.entry.topology != index.topology: raise ValueError("group index member topology mismatch") return index + + +__all__ = [ + "RuntimeRealizationSpecCacheGroupIndex", + "RuntimeRealizationSpecCacheRecord", + "canonical_json_bytes", + "read_matching_resolved_spec_cache_entry", + "read_resolved_spec_cache_entry", + "read_resolved_spec_cache_group_index", + "runtime_realization_spec_cache_root", + "write_resolved_spec_cache_entry", + "write_resolved_spec_cache_group_index", +] diff --git a/tensorcast/api/store/views.py b/tensorcast/api/store/views.py index 9708dd58..d875e3f2 100644 --- a/tensorcast/api/store/views.py +++ b/tensorcast/api/store/views.py @@ -116,7 +116,7 @@ def resolve_view_inputs( if isinstance(resolved, tuple): resolved_artifact_id = resolved[0] else: - resolved_artifact_id = resolved + resolved_artifact_id = resolved.artifact_id if not resolved_artifact_id: raise ArtifactError( f"Artifact key '{resolved_key}' is not mapped", diff --git a/tensorcast/artifact_runtime/__init__.py b/tensorcast/artifact_runtime/__init__.py new file mode 100644 index 00000000..86cf9b9f --- /dev/null +++ b/tensorcast/artifact_runtime/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) 2026, TensorCast Team. +"""Artifact-centered model runtime implementation package.""" diff --git a/tensorcast/serving/admin.py b/tensorcast/artifact_runtime/admin.py similarity index 70% rename from tensorcast/serving/admin.py rename to tensorcast/artifact_runtime/admin.py index 72125cb9..bd4315fa 100644 --- a/tensorcast/serving/admin.py +++ b/tensorcast/artifact_runtime/admin.py @@ -1,23 +1,23 @@ # Copyright (c) 2026, TensorCast Team. -"""Admin/offline serving helpers that are not runtime integration APIs.""" +"""Admin/offline runtime helpers that are not runtime integration APIs.""" from dataclasses import dataclass -from tensorcast.serving._runtime_impl.lifecycle import ( +from tensorcast.artifact_runtime.host import SourceSubjectCoordinator +from tensorcast.artifact_runtime.intent import LocalSourceBootstrap +from tensorcast.artifact_runtime.lifecycle import ( build_local_ready_prepared_artifact, ) -from tensorcast.serving.builder.publication import ( +from tensorcast.artifact_runtime.publication.context import ( RecipePublicationContext, build_binding_finalize_build_intent, build_pure_transform_build_intent, ) -from tensorcast.serving.hosts import SourceSubjectCoordinator -from tensorcast.serving.local_ready import ( +from tensorcast.artifact_runtime.recipe.local_ready import ( freeze_local_ready_binding, - prepare_local_ready_serving, prepare_same_binding_manifest_carrier, + realize_local_ready_binding_from_source, ) -from tensorcast.serving.runtime import LocalSourceBootstrap @dataclass(frozen=True) @@ -41,6 +41,6 @@ class AdminLocalSourceBootstrap(LocalSourceBootstrap): "build_local_ready_prepared_artifact", "build_pure_transform_build_intent", "freeze_local_ready_binding", - "prepare_local_ready_serving", "prepare_same_binding_manifest_carrier", + "realize_local_ready_binding_from_source", ] diff --git a/tensorcast/artifact_runtime/artifact/__init__.py b/tensorcast/artifact_runtime/artifact/__init__.py new file mode 100644 index 00000000..655cf0ca --- /dev/null +++ b/tensorcast/artifact_runtime/artifact/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) 2026, TensorCast Team. +"""Runtime artifact manifest and resolver helpers.""" diff --git a/tensorcast/artifact_runtime/artifact/manifest.py b/tensorcast/artifact_runtime/artifact/manifest.py new file mode 100644 index 00000000..fe2fa58b --- /dev/null +++ b/tensorcast/artifact_runtime/artifact/manifest.py @@ -0,0 +1,160 @@ +# Copyright (c) 2026, TensorCast Team. +"""Runtime artifact manifest parse and validation helpers.""" + +from __future__ import annotations + +from typing import Any + +import torch + +import tensorcast as tc + +RUNTIME_ARTIFACT_SCHEMA_VERSION = int( + tc.RuntimeArtifactManifest.model_fields["schema_version"].default +) +SERVING_MANIFEST_TENSOR_NAME = tc.SERVING_MANIFEST_TENSOR_NAME + + +class _InvalidRuntimeManifestTensor(RuntimeError): + pass + + +def runtime_manifest_from_tensor_bytes( + data: bytes | bytearray, +) -> tc.RuntimeArtifactManifest: + return tc.RuntimeArtifactManifest.from_bytes(bytes(data)) + + +def _runtime_manifest_bytes_from_device( + subset: Any, + *, + device: torch.device | str, + manifest_tensor_name: str, +) -> bytes: + result = subset.tensor_dict_with_diagnostics(device=device) + try: + manifest_tensor = result.tensors[manifest_tensor_name] + if manifest_tensor.dtype != torch.uint8 or manifest_tensor.dim() != 1: + raise _InvalidRuntimeManifestTensor( + "TensorCast runtime manifest tensor must be 1D torch.uint8" + ) + return bytes(manifest_tensor.detach().cpu().tolist()) + finally: + result.release() + + +def read_runtime_artifact_manifest_tensor( + artifact: Any, + *, + artifact_ref: str, + manifest_tensor_name: str = SERVING_MANIFEST_TENSOR_NAME, +) -> tc.RuntimeArtifactManifest: + subset = artifact.subset([manifest_tensor_name]) + try: + manifest_bytes = _runtime_manifest_bytes_from_device( + subset, + device="cpu", + manifest_tensor_name=manifest_tensor_name, + ) + except _InvalidRuntimeManifestTensor: + raise + except Exception as cpu_exc: + try: + cuda_device = torch.device("cuda", torch.cuda.current_device()) + manifest_bytes = _runtime_manifest_bytes_from_device( + subset, + device=cuda_device, + manifest_tensor_name=manifest_tensor_name, + ) + except _InvalidRuntimeManifestTensor: + raise + except Exception as cuda_exc: + raise RuntimeError( + f"Failed to materialize runtime manifest from '{artifact_ref}' " + f"(cpu_error={cpu_exc!r}; cuda_error={cuda_exc!r})" + ) from cuda_exc + return runtime_manifest_from_tensor_bytes(manifest_bytes) + + +def cross_check_runtime_artifact_manifest( + *, + manifest: Any | None, + descriptor_tensor_schema_hash: str, + tensor_names: tuple[str, ...], + expected_tensor_schema_hash: str, + runtime_artifact_policy: tc.RuntimeArtifactPolicy | None = None, + expected_schema_version: int = RUNTIME_ARTIFACT_SCHEMA_VERSION, +) -> Any: + if manifest is None: + raise RuntimeError("TensorCast runtime artifact manifest is missing") + if manifest.schema_version != expected_schema_version: + raise RuntimeError( + "TensorCast runtime artifact schema version mismatch: " + f"{manifest.schema_version} != {expected_schema_version}" + ) + if manifest.artifact_kind != "serving": + raise RuntimeError( + "TensorCast runtime artifact has unsupported artifact_kind: " + f"{manifest.artifact_kind}" + ) + if ( + runtime_artifact_policy is not None + and runtime_artifact_policy.serving_manifest_ref is not None + and manifest.serving_manifest_ref + != runtime_artifact_policy.serving_manifest_ref + ): + raise RuntimeError("TensorCast runtime artifact manifest ref mismatch") + if ( + runtime_artifact_policy is not None + and runtime_artifact_policy.expected_representation_contract_hash is not None + and manifest.representation_contract_hash + != runtime_artifact_policy.expected_representation_contract_hash + ): + raise RuntimeError( + "TensorCast runtime artifact representation contract mismatch" + ) + if ( + runtime_artifact_policy is not None + and runtime_artifact_policy.expected_serving_build_digest is not None + and manifest.serving_build_digest + != runtime_artifact_policy.expected_serving_build_digest + ): + raise RuntimeError("TensorCast runtime artifact build digest mismatch") + if ( + runtime_artifact_policy is not None + and getattr( + runtime_artifact_policy, + "expected_topology_admission_digest", + None, + ) + is not None + and getattr(manifest, "topology_admission_digest", None) + != runtime_artifact_policy.expected_topology_admission_digest + ): + raise RuntimeError( + "TensorCast runtime artifact topology admission digest mismatch" + ) + if manifest.tensor_schema_hash != expected_tensor_schema_hash: + raise RuntimeError( + "TensorCast runtime artifact tensor schema hash mismatch: " + f"manifest={manifest.tensor_schema_hash}, " + f"expected={expected_tensor_schema_hash}" + ) + if descriptor_tensor_schema_hash != expected_tensor_schema_hash: + raise RuntimeError( + "TensorCast runtime artifact descriptor schema hash mismatch: " + f"descriptor={descriptor_tensor_schema_hash}, " + f"expected={expected_tensor_schema_hash}" + ) + if manifest.canonical_tensor_count != len(tensor_names): + raise RuntimeError("TensorCast runtime artifact tensor count mismatch") + return manifest + + +__all__ = [ + "RUNTIME_ARTIFACT_SCHEMA_VERSION", + "SERVING_MANIFEST_TENSOR_NAME", + "cross_check_runtime_artifact_manifest", + "read_runtime_artifact_manifest_tensor", + "runtime_manifest_from_tensor_bytes", +] diff --git a/tensorcast/serving/resolver.py b/tensorcast/artifact_runtime/artifact/resolver.py similarity index 76% rename from tensorcast/serving/resolver.py rename to tensorcast/artifact_runtime/artifact/resolver.py index d3bb0005..e45566fc 100644 --- a/tensorcast/serving/resolver.py +++ b/tensorcast/artifact_runtime/artifact/resolver.py @@ -1,25 +1,33 @@ # Copyright (c) 2026, TensorCast Team. -"""Serving artifact resolution facade for framework integrations.""" +"""Runtime artifact resolution facade for framework integrations.""" from __future__ import annotations +import importlib +from collections.abc import Callable from dataclasses import dataclass -from typing import Any +from typing import Any, cast import torch import tensorcast as tc -import tensorcast.serving.artifact_manifest as tc_artifact_manifest -import tensorcast.serving.builder.materialization as tc_core_materialization -from tensorcast.api.store import artifact as open_artifact +import tensorcast.artifact_runtime.artifact.manifest as tc_artifact_manifest +import tensorcast.artifact_runtime.contract as tc_contract +import tensorcast.artifact_runtime.recipe.materialization as tc_core_materialization from tensorcast.api.store.types import CanonicalIndexEntry -ServingArtifactManifest = tc.ServingArtifactManifest +RuntimeArtifactManifest = tc.RuntimeArtifactManifest + + +def _default_open_artifact(artifact_ref: str) -> Any: + store_api = importlib.import_module("tensorcast.api.store") + open_fn = cast(Callable[[str], Any], store_api.artifact) + return open_fn(artifact_ref) @dataclass(frozen=True) -class ResolvedServingArtifact: +class ResolvedRuntimeArtifact: artifact: Any artifact_ref: str descriptor: Any @@ -28,7 +36,7 @@ class ResolvedServingArtifact: tensor_schema_hash: str -def is_reserved_serving_tensor_name(name: str) -> bool: +def is_reserved_runtime_tensor_name(name: str) -> bool: return name.startswith("__tensorcast_meta__.") @@ -45,7 +53,7 @@ def model_tensor_names_from_descriptor(descriptor: Any) -> tuple[str, ...]: return tuple( str(name) for name in getattr(descriptor, "tensor_names", ()) - if not is_reserved_serving_tensor_name(str(name)) + if not is_reserved_runtime_tensor_name(str(name)) ) @@ -92,7 +100,7 @@ def compute_descriptor_tensor_schema_hash( *, manifest_tensor_name: str, ) -> str: - return tc.compute_serving_tensor_schema_hash( + return tc_contract.compute_canonical_runtime_tensor_schema_hash( canonical_index_from_descriptor(descriptor), manifest_tensor_name=manifest_tensor_name, ) @@ -109,7 +117,7 @@ def _prepared_summary_value(summary: Any, field_name: str) -> str | None: def _cross_check_prepared_manifest_summary( *, summary: Any, - manifest: tc.ServingArtifactManifest, + manifest: tc.RuntimeArtifactManifest, ) -> None: fields = ( "serving_manifest_ref", @@ -122,26 +130,29 @@ def _cross_check_prepared_manifest_summary( actual = _prepared_summary_value(manifest, field_name) if expected is not None and actual != expected: raise RuntimeError( - "TensorCast prepared serving artifact manifest does not match " + "TensorCast prepared runtime artifact manifest does not match " f"summary field {field_name}: manifest={actual!r}, " f"summary={expected!r}" ) -class ServingArtifactResolver: - """Resolve serving artifacts and enforce manifest/schema/policy checks.""" +class RuntimeArtifactResolver: + """Resolve runtime artifacts and enforce manifest/schema/policy checks.""" def __init__( self, *, manifest_tensor_name: str, schema_version: int, + open_artifact_fn: Callable[[str], Any] | None = None, ) -> None: self._manifest_tensor_name = manifest_tensor_name self._schema_version = schema_version + self._open_artifact_fn = open_artifact_fn def open(self, artifact_ref: str) -> Any: - artifact = open_artifact(ref=str(artifact_ref)) + open_fn = self._open_artifact_fn or _default_open_artifact + artifact = open_fn(str(artifact_ref)) artifact.describe() return artifact @@ -156,21 +167,21 @@ def read_manifest( artifact: Any, *, artifact_ref: str, - ) -> ResolvedServingArtifact: + ) -> ResolvedRuntimeArtifact: descriptor = artifact.describe() tensor_names = model_tensor_names_from_descriptor(descriptor) tensor_schema_hash = self.compute_descriptor_tensor_schema_hash(descriptor) if self._manifest_tensor_name not in getattr(descriptor, "tensor_names", ()): raise RuntimeError( - f"TensorCast artifact '{artifact_ref}' is missing serving " + f"TensorCast artifact '{artifact_ref}' is missing runtime " "manifest tensor" ) - manifest = tc_artifact_manifest.read_serving_artifact_manifest_tensor( + manifest = tc_artifact_manifest.read_runtime_artifact_manifest_tensor( artifact, artifact_ref=artifact_ref, manifest_tensor_name=self._manifest_tensor_name, ) - return ResolvedServingArtifact( + return ResolvedRuntimeArtifact( artifact=artifact, artifact_ref=str(artifact_ref), descriptor=descriptor, @@ -179,7 +190,7 @@ def read_manifest( tensor_schema_hash=tensor_schema_hash, ) - def resolve(self, artifact_ref: str) -> ResolvedServingArtifact: + def resolve(self, artifact_ref: str) -> ResolvedRuntimeArtifact: return self.read_manifest( self.open(artifact_ref), artifact_ref=artifact_ref, @@ -188,12 +199,12 @@ def resolve(self, artifact_ref: str) -> ResolvedServingArtifact: def resolve_prepared( self, summary: Any, - ) -> ResolvedServingArtifact: + ) -> ResolvedRuntimeArtifact: artifact_ref = getattr(summary, "serving_artifact_ref", None) if artifact_ref is None: raise RuntimeError( "TensorCast local-ready summary does not reference a durable " - "serving artifact" + "runtime artifact" ) artifact_ref = str(artifact_ref) artifact = self.open(artifact_ref) @@ -202,23 +213,23 @@ def resolve_prepared( tensor_schema_hash = self.compute_descriptor_tensor_schema_hash(descriptor) if self._manifest_tensor_name not in getattr(descriptor, "tensor_names", ()): raise RuntimeError( - f"TensorCast artifact '{artifact_ref}' is missing serving " + f"TensorCast artifact '{artifact_ref}' is missing runtime " "manifest tensor" ) - manifest = tc_artifact_manifest.read_serving_artifact_manifest_tensor( + manifest = tc_artifact_manifest.read_runtime_artifact_manifest_tensor( artifact, artifact_ref=artifact_ref, manifest_tensor_name=self._manifest_tensor_name, ) _cross_check_prepared_manifest_summary(summary=summary, manifest=manifest) - tc_artifact_manifest.cross_check_serving_artifact_manifest( + tc_artifact_manifest.cross_check_runtime_artifact_manifest( manifest=manifest, descriptor_tensor_schema_hash=tensor_schema_hash, tensor_names=tensor_names, expected_tensor_schema_hash=str(summary.tensor_schema_hash), expected_schema_version=self._schema_version, ) - return ResolvedServingArtifact( + return ResolvedRuntimeArtifact( artifact=artifact, artifact_ref=artifact_ref, descriptor=descriptor, @@ -229,36 +240,36 @@ def resolve_prepared( def cross_check( self, - resolved: ResolvedServingArtifact, + resolved: ResolvedRuntimeArtifact, *, expected_tensor_schema_hash: str, - serving_runtime_policy: Any | None = None, - ) -> ResolvedServingArtifact: - tc_artifact_manifest.cross_check_serving_artifact_manifest( + runtime_artifact_policy: Any | None = None, + ) -> ResolvedRuntimeArtifact: + tc_artifact_manifest.cross_check_runtime_artifact_manifest( manifest=resolved.manifest, descriptor_tensor_schema_hash=resolved.tensor_schema_hash, tensor_names=resolved.tensor_names, expected_tensor_schema_hash=expected_tensor_schema_hash, - serving_runtime_policy=serving_runtime_policy, + runtime_artifact_policy=runtime_artifact_policy, expected_schema_version=self._schema_version, ) return resolved -def resolve_serving_artifact( +def resolve_runtime_artifact( artifact_ref: str, *, manifest_tensor_name: str | None = None, schema_version: int | None = None, expected_tensor_schema_hash: str | None = None, - serving_runtime_policy: Any | None = None, -) -> ResolvedServingArtifact: - resolver = ServingArtifactResolver( + runtime_artifact_policy: Any | None = None, +) -> ResolvedRuntimeArtifact: + resolver = RuntimeArtifactResolver( manifest_tensor_name=manifest_tensor_name or tc.SERVING_MANIFEST_TENSOR_NAME, schema_version=( schema_version if schema_version is not None - else int(tc.ServingArtifactManifest.model_fields["schema_version"].default) + else int(tc.RuntimeArtifactManifest.model_fields["schema_version"].default) ), ) resolved = resolver.resolve(artifact_ref) @@ -266,19 +277,19 @@ def resolve_serving_artifact( resolver.cross_check( resolved, expected_tensor_schema_hash=expected_tensor_schema_hash, - serving_runtime_policy=serving_runtime_policy, + runtime_artifact_policy=runtime_artifact_policy, ) return resolved __all__ = [ - "ResolvedServingArtifact", - "ServingArtifactManifest", - "ServingArtifactResolver", + "ResolvedRuntimeArtifact", + "RuntimeArtifactManifest", + "RuntimeArtifactResolver", "canonical_index_from_descriptor", "compute_descriptor_tensor_schema_hash", "contiguous_stride", - "is_reserved_serving_tensor_name", + "is_reserved_runtime_tensor_name", "model_tensor_names_from_descriptor", - "resolve_serving_artifact", + "resolve_runtime_artifact", ] diff --git a/tensorcast/serving/runtime_attachment.py b/tensorcast/artifact_runtime/attachment.py similarity index 98% rename from tensorcast/serving/runtime_attachment.py rename to tensorcast/artifact_runtime/attachment.py index 2cfca228..af562d12 100644 --- a/tensorcast/serving/runtime_attachment.py +++ b/tensorcast/artifact_runtime/attachment.py @@ -8,7 +8,7 @@ from dataclasses import dataclass from typing import Any -from tensorcast.serving.runtime_view import RuntimeWorkerView +from tensorcast.artifact_runtime.view import RuntimeWorkerView def _optional_text(value: Any) -> str | None: diff --git a/tensorcast/artifact_runtime/binding/__init__.py b/tensorcast/artifact_runtime/binding/__init__.py new file mode 100644 index 00000000..00a8f7a8 --- /dev/null +++ b/tensorcast/artifact_runtime/binding/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) 2026, TensorCast Team. +"""Runtime binding execution helpers.""" diff --git a/tensorcast/serving/binding_runtime.py b/tensorcast/artifact_runtime/binding/execution.py similarity index 93% rename from tensorcast/serving/binding_runtime.py rename to tensorcast/artifact_runtime/binding/execution.py index 7163a321..96af1673 100644 --- a/tensorcast/serving/binding_runtime.py +++ b/tensorcast/artifact_runtime/binding/execution.py @@ -1,6 +1,6 @@ # Copyright (c) 2026, TensorCast Team. -"""Serving artifact bind/swap runtime facades.""" +"""Runtime artifact bind/swap facades.""" from __future__ import annotations @@ -90,17 +90,17 @@ def build_materialization_execution_context( return options, profile_fields -def bind_serving_artifact( +def bind_runtime_artifact( *, resolved_artifact: Any, tensor_names: Sequence[str], device: Any, - serving_runtime_policy: Any | None, + runtime_artifact_policy: Any | None, options: Any | None, ) -> Any: return resolved_artifact.artifact.subset(list(tensor_names)).bind( device=device, - serving_runtime_policy=serving_runtime_policy, + runtime_artifact_policy=runtime_artifact_policy, options=options, ) @@ -123,12 +123,12 @@ def _binding_tensor_names(binding: Any) -> tuple[str, ...]: return tuple(str(name) for name in binding_tensors) -def swap_serving_artifact( +def swap_runtime_artifact( *, binding: Any, resolved_artifact: Any, tensor_names: Sequence[str] | None = None, - serving_runtime_policy: Any | None, + runtime_artifact_policy: Any | None, options: Any | None, ) -> Any: binding_layout_tensor_names = _binding_layout_tensor_names(binding) @@ -148,13 +148,13 @@ def swap_serving_artifact( ) return binding.swap( artifact, - serving_runtime_policy=serving_runtime_policy, + runtime_artifact_policy=runtime_artifact_policy, options=options, ) __all__ = [ - "bind_serving_artifact", + "bind_runtime_artifact", "build_materialization_execution_context", - "swap_serving_artifact", + "swap_runtime_artifact", ] diff --git a/tensorcast/serving/builder/binding_plan.py b/tensorcast/artifact_runtime/binding/plan.py similarity index 97% rename from tensorcast/serving/builder/binding_plan.py rename to tensorcast/artifact_runtime/binding/plan.py index e32465f9..28da9c0b 100644 --- a/tensorcast/serving/builder/binding_plan.py +++ b/tensorcast/artifact_runtime/binding/plan.py @@ -1,6 +1,6 @@ # Copyright (c) 2026, TensorCast Team. -"""Framework-neutral TracePlan lowering for TensorCast serving bindings.""" +"""Framework-neutral TracePlan lowering for TensorCast runtime bindings.""" from __future__ import annotations @@ -10,7 +10,7 @@ from tensorcast.api.store import BindingRealizationEntry from tensorcast.api.store import CopyPlanEntry as StoreCopyPlanEntry from tensorcast.api.store import Range as StoreRange -from tensorcast.serving.builder.trace_ir import ( +from tensorcast.artifact_runtime.recipe.trace_ir import ( CopyPlanEntry, MultiRange, Range, diff --git a/tensorcast/serving/retained_binding.py b/tensorcast/artifact_runtime/binding/retained.py similarity index 58% rename from tensorcast/serving/retained_binding.py rename to tensorcast/artifact_runtime/binding/retained.py index 22f7bb35..f503e035 100644 --- a/tensorcast/serving/retained_binding.py +++ b/tensorcast/artifact_runtime/binding/retained.py @@ -1,10 +1,9 @@ # Copyright (c) 2026, TensorCast Team. -"""Retained serving binding authority and acquire helpers.""" +"""Retained runtime binding authority and acquire helpers.""" from __future__ import annotations import inspect -import json import logging import os import time @@ -14,7 +13,6 @@ from typing import Any, Callable, ContextManager, Iterator import torch -from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator import tensorcast as tc from tensorcast.api.store.realization_kernel import ( @@ -22,132 +20,19 @@ envelope_for_runtime_attachment, release_contract_for, ) +from tensorcast.artifact_runtime.config import ( + RetainedBindingAcquireSettings as _RetainedBindingAcquireSettings, +) +from tensorcast.retained_realization import ( + retained_realization_claim_mode, +) +from tensorcast.retained_realization_authority import ( + ParsedRetainedRealizationAuthority, + RetainedRealizationExpectedDigests, +) +RetainedBindingAcquireSettings = _RetainedBindingAcquireSettings _LOGGER = logging.getLogger(__name__) -_RETAINED_BINDING_ACQUIRE_MODES = {"disabled", "external"} -_READINESS_STATES = { - "serving_reserved", - "serving_local_ready", - "serving_published_ready", -} - - -def _normalize_optional_text(value: Any) -> str | None: - if value is None: - return None - normalized = str(value).strip() - return normalized or None - - -def _normalize_enum(value: Any, *, allowed: set[str], field_name: str) -> str: - normalized = str(value).strip().lower() - if normalized not in allowed: - raise ValueError( - f"{field_name} must be one of {sorted(allowed)}, got: {value!r}" - ) - return normalized - - -class RetainedServingBindingExpectedDigests(BaseModel): - model_config = ConfigDict(frozen=True, extra="forbid") - - target_layout_hash: str - tensor_schema_hash: str - serving_build_digest: str - resolved_spec_digest: str - - @field_validator( - "target_layout_hash", - "tensor_schema_hash", - "serving_build_digest", - "resolved_spec_digest", - mode="before", - ) - @classmethod - def _normalize_required_text(cls, value: Any) -> str: - normalized = _normalize_optional_text(value) - if normalized is None: - raise ValueError("expected digest fields must be non-empty") - return normalized - - -class RetainedServingBindingAuthority(BaseModel): - model_config = ConfigDict(frozen=True, extra="forbid") - - group_id: str - member_ref: dict[str, Any] - daemon_id: str - daemon_session_id: str - device_uuid: str - binding_value_ref: dict[str, Any] - reservation_capability: dict[str, Any] - group_realization_acquire: dict[str, Any] | None = None - local_serving_ref: str | None = None - readiness: str - verification_state: str = "local_only" - serving_artifact_id: str | None = None - trusted_reservation_bytes: int = Field(ge=0) - expected: RetainedServingBindingExpectedDigests - - @field_validator( - "group_id", - "daemon_id", - "daemon_session_id", - "device_uuid", - mode="before", - ) - @classmethod - def _normalize_required_text(cls, value: Any) -> str: - normalized = _normalize_optional_text(value) - if normalized is None: - raise ValueError("retained binding authority text fields required") - return normalized - - @field_validator( - "local_serving_ref", - "verification_state", - "serving_artifact_id", - mode="before", - ) - @classmethod - def _normalize_optional_fields(cls, value: Any) -> Any: - return _normalize_optional_text(value) - - @field_validator("readiness", mode="before") - @classmethod - def _normalize_readiness(cls, value: Any) -> str: - return _normalize_enum( - value, - allowed=_READINESS_STATES, - field_name="retained_binding_acquire.authority.readiness", - ) - - @model_validator(mode="after") - def _validate_published_ready(self) -> RetainedServingBindingAuthority: - if self.readiness == "serving_published_ready" and not self.serving_artifact_id: - raise ValueError( - "retained_binding_acquire.authority.serving_artifact_id is required when " - "readiness='serving_published_ready'" - ) - return self - - -@dataclass(frozen=True) -class ParsedRetainedServingBindingAuthority: - group_id: str - local_serving_ref: str | None - binding_value_ref: tc.BindingValueRef - reservation_capability: tc.BindingReservationCapability - daemon_id: str - daemon_session_id: str - device_uuid: str - member: tc.ServingBindingMemberRef - reservation_bytes: int - expected: RetainedServingBindingExpectedDigests - readiness: str - verification_state: str - serving_artifact_id: str | None = None - group_realization_acquire: tc.GroupRealizationAcquireRef | None = None @dataclass(frozen=True) @@ -163,10 +48,10 @@ def __init__( client: Any, response: Any, runtime: Any, - authority: ParsedRetainedServingBindingAuthority, + authority: ParsedRetainedRealizationAuthority, binding_value_ref: tc.BindingValueRef, binding_layout_id: str, - member_ref: tc.ServingBindingMemberRef, + member_ref: tc.RuntimeBindingMemberRef, reservation_bytes: int, lease_token: bytes, ) -> None: @@ -221,12 +106,12 @@ class RuntimeRetainedBindingAttachmentHandle: tensors: Mapping[str, torch.Tensor] binding_layout_id: str binding_value_ref: tc.BindingValueRef - member_ref: tc.ServingBindingMemberRef + member_ref: tc.RuntimeBindingMemberRef reservation_bytes: int _state: _RetainedBindingLifecycleState @property - def authority(self) -> ParsedRetainedServingBindingAuthority: + def authority(self) -> ParsedRetainedRealizationAuthority: return self._state.authority @property @@ -261,12 +146,12 @@ class AttachedRetainedBinding: tensors: Mapping[str, torch.Tensor] binding_layout_id: str binding_value_ref: tc.BindingValueRef - member_ref: tc.ServingBindingMemberRef + member_ref: tc.RuntimeBindingMemberRef reservation_bytes: int _state: _RetainedBindingLifecycleState @property - def authority(self) -> ParsedRetainedServingBindingAuthority: + def authority(self) -> ParsedRetainedRealizationAuthority: return self._state.authority @property @@ -311,13 +196,58 @@ def __exit__(self, *_exc: object) -> None: self.close() +@dataclass +class RestoredRetainedBinding: + """Restored retained binding tensors before runtime ownership transfer.""" + + _attached: AttachedRetainedBinding + _runtime_handle: RuntimeRetainedBindingAttachmentHandle | None = None + + @property + def tensors(self) -> Mapping[str, torch.Tensor]: + return self._attached.tensors + + @property + def binding_layout_id(self) -> str: + return self._attached.binding_layout_id + + @property + def binding_value_ref(self) -> tc.BindingValueRef: + return self._attached.binding_value_ref + + @property + def member_ref(self) -> tc.RuntimeBindingMemberRef: + return self._attached.member_ref + + @property + def reservation_bytes(self) -> int: + return self._attached.reservation_bytes + + @property + def authority(self) -> ParsedRetainedRealizationAuthority: + return self._attached.authority + + @property + def runtime_handle(self) -> RuntimeRetainedBindingAttachmentHandle | None: + return self._runtime_handle + + def transfer_to_runtime(self) -> RuntimeRetainedBindingAttachmentHandle: + if self._runtime_handle is None: + self._runtime_handle = self._attached.transfer_to_runtime() + return self._runtime_handle + + def close(self) -> None: + if self._runtime_handle is None: + self._attached.close() + + @dataclass(frozen=True) class BorrowedRetainedBindingLease: """Single-owner acquire lease for a retained binding value.""" - authority: ParsedRetainedServingBindingAuthority + authority: ParsedRetainedRealizationAuthority binding_value_ref: tc.BindingValueRef - member_ref: tc.ServingBindingMemberRef + member_ref: tc.RuntimeBindingMemberRef reservation_bytes: int _state: _RetainedBindingLifecycleState @@ -384,91 +314,12 @@ def close(self) -> None: self._state.release() -def _payload_to_dict(value: Any, *, field_name: str) -> dict[str, Any]: - if hasattr(value, "model_dump"): - return dict(value.model_dump(mode="python")) - if isinstance(value, Mapping): - return dict(value) - if isinstance(value, str): - try: - parsed = json.loads(value) - except json.JSONDecodeError as exc: - raise ValueError(f"{field_name} must be a JSON object") from exc - if not isinstance(parsed, Mapping): - raise ValueError(f"{field_name} must be a JSON object") - return dict(parsed) - raise ValueError(f"{field_name} must be a dict or JSON object") - - -def _model_validate(model_type: Any, value: Any, *, field_name: str) -> Any: - payload = _payload_to_dict(value, field_name=field_name) - try: - return model_type.model_validate(payload) - except Exception as exc: - raise ValueError( - f"{field_name} is invalid for TensorCast retained binding acquire: {exc}" - ) from exc - - -def _validate_authority_consistency( - authority: ParsedRetainedServingBindingAuthority, -) -> None: - capability = authority.reservation_capability - if capability.binding_value_ref != authority.binding_value_ref: - raise ValueError( - "retained_binding_acquire.authority.reservation_capability." - "binding_value_ref must match retained_binding_acquire.authority." - "binding_value_ref" - ) - if capability.daemon_id != authority.daemon_id: - raise ValueError( - "retained_binding_acquire.authority.reservation_capability." - "daemon_id mismatch" - ) - if capability.daemon_session_id != authority.daemon_session_id: - raise ValueError( - "retained_binding_acquire.authority.reservation_capability." - "daemon_session_id mismatch" - ) - if capability.device_uuid != authority.device_uuid: - raise ValueError( - "retained_binding_acquire.authority.reservation_capability." - "device_uuid mismatch" - ) - if capability.member != authority.member: - raise ValueError( - "retained_binding_acquire.authority.reservation_capability.member mismatch" - ) - if capability.reservation_bytes != authority.reservation_bytes: - raise ValueError( - "retained_binding_acquire.authority.reservation_capability." - "reservation_bytes must match retained_binding_acquire.authority." - "trusted_reservation_bytes" - ) - if authority.member.group_id is not None and authority.member.group_id != ( - authority.group_id - ): - raise ValueError( - "retained_binding_acquire.authority.member_ref.group_id must match " - "retained_binding_acquire.authority.group_id" - ) - if ( - authority.readiness == "serving_published_ready" - and not authority.serving_artifact_id - ): - raise ValueError( - "retained_binding_acquire.authority.serving_artifact_id is required " - "when retained_binding_acquire.authority.readiness=" - "'serving_published_ready'" - ) - - def _validate_authority_is_attachable( - authority: ParsedRetainedServingBindingAuthority, + authority: ParsedRetainedRealizationAuthority, ) -> None: - if authority.readiness == "serving_reserved": + if authority.readiness == "runtime_reserved": raise ValueError( - "retained_binding_acquire.authority.readiness='serving_reserved' " + "retained_binding_acquire.authority.readiness='runtime_reserved' " "is not attachable" ) group_acquire = authority.group_realization_acquire @@ -558,7 +409,7 @@ def _lease_token_from_response(response: Any) -> bytes: def _validate_acquire_response( response: Any, - authority: ParsedRetainedServingBindingAuthority, + authority: ParsedRetainedRealizationAuthority, ) -> tc.BindingValueRef: acquired_ref = _binding_value_ref_from_response( response, @@ -581,7 +432,7 @@ def _validate_acquire_response( def _acquire_retained_binding_response( client: Any, - authority: ParsedRetainedServingBindingAuthority, + authority: ParsedRetainedRealizationAuthority, *, caller_pid: int, timeout_s: float | None, @@ -592,7 +443,7 @@ def _acquire_retained_binding_response( "expected_device_uuid": authority.device_uuid, "expected_target_layout_hash": authority.expected.target_layout_hash, "expected_tensor_schema_hash": authority.expected.tensor_schema_hash, - "expected_serving_build_digest": authority.expected.serving_build_digest, + "expected_serving_build_digest": authority.expected.runtime_build_digest, "expected_daemon_id": authority.daemon_id, "expected_daemon_session_id": authority.daemon_session_id, "expected_member": authority.member, @@ -632,7 +483,7 @@ def _release_lease_token_after_acquire_failure( _release_lease_token(client, lease_token=lease_token) except Exception: _LOGGER.exception( - "Failed to release retained serving binding lease after acquire failure", + "Failed to release retained runtime binding lease after acquire failure", ) @@ -641,7 +492,7 @@ def acquire_local_ready_retained_binding_lease( *, local_serving_ref: str, expected_device_uuid: str, - expected_member: tc.ServingBindingMemberRef, + expected_member: tc.RuntimeBindingMemberRef, expected_tensor_schema_hash: str, expected_serving_build_digest: str, expected_target_layout_hash: str | None = None, @@ -653,7 +504,7 @@ def acquire_local_ready_retained_binding_lease( client: Any | None = None, timeout_s: float | None = None, ) -> Iterator[BorrowedRetainedBindingLease]: - """Acquire an already-retained local-ready serving binding by local ref.""" + """Acquire an already-retained local-ready runtime binding by local ref.""" if runtime is None: from tensorcast.api.store import get_runtime_context @@ -703,10 +554,10 @@ def acquire_local_ready_retained_binding_lease( or getattr(runtime, "session_id", "") or "local-session" ) - expected = RetainedServingBindingExpectedDigests( + expected = RetainedRealizationExpectedDigests( target_layout_hash=expected_target_layout_hash or "local-ready-direct", tensor_schema_hash=expected_tensor_schema_hash, - serving_build_digest=expected_serving_build_digest, + runtime_build_digest=expected_serving_build_digest, resolved_spec_digest="local-ready-direct", ) reservation_capability = tc.BindingReservationCapability( @@ -723,10 +574,10 @@ def acquire_local_ready_retained_binding_lease( scope_digest=( f"{expected.target_layout_hash}:" f"{expected.tensor_schema_hash}:" - f"{expected.serving_build_digest}" + f"{expected.runtime_build_digest}" ), ) - authority = ParsedRetainedServingBindingAuthority( + authority = ParsedRetainedRealizationAuthority( group_id=expected_member.group_id or "", local_serving_ref=local_serving_ref, binding_value_ref=binding_value_ref, @@ -737,7 +588,7 @@ def acquire_local_ready_retained_binding_lease( member=expected_member, reservation_bytes=reservation_bytes, expected=expected, - readiness="serving_local_ready", + readiness="runtime_local_ready", verification_state="local_only", serving_artifact_id=serving_artifact_id, ) @@ -766,8 +617,8 @@ def acquire_local_ready_retained_binding_lease( @contextmanager -def acquire_retained_serving_binding_lease( - authority: ParsedRetainedServingBindingAuthority, +def acquire_retained_binding_lease( + authority: ParsedRetainedRealizationAuthority, *, caller_pid: int | None = None, runtime: Any | None = None, @@ -824,139 +675,41 @@ def acquire_retained_serving_binding_lease( lease.close() -def _select_retained_serving_binding_authority_config( - config: Any, - *, - expected_member: tc.ServingBindingMemberRef | None = None, -) -> RetainedServingBindingAuthority: - acquire_config = config.retained_binding_acquire - authority_config = acquire_config.authority - if authority_config is not None: - return authority_config - - authority_configs = tuple(acquire_config.authorities) - if not authority_configs: - raise ValueError( - "TensorCast retained binding authority requires " - "retained_binding_acquire.mode='external' and " - "retained_binding_acquire.authority or " - "retained_binding_acquire.authorities" - ) - if expected_member is None: - if len(authority_configs) == 1: - return authority_configs[0] - raise ValueError( - "TensorCast retained binding authority set requires an expected " - "serving member to select the worker authority" - ) - - for index, candidate in enumerate(authority_configs): - member = _model_validate( - tc.ServingBindingMemberRef, - candidate.member_ref, - field_name=(f"retained_binding_acquire.authorities[{index}].member_ref"), - ) - if member == expected_member: - return candidate - raise ValueError( - "TensorCast retained binding authority set has no authority for " - f"expected member {expected_member!r}" - ) - +def retained_binding_acquire_mode(extra: Mapping[str, Any] | None) -> str: + return retained_realization_claim_mode(extra) -def parse_retained_serving_binding_authority( - extra: Mapping[str, Any] | Any, - *, - expected_member: tc.ServingBindingMemberRef | None = None, -) -> ParsedRetainedServingBindingAuthority: - from tensorcast.serving.config import ServingConfig - config = ( - extra if isinstance(extra, ServingConfig) else ServingConfig.from_mapping(extra) - ) - if config.retained_binding_acquire.mode != "external": - raise ValueError( - "TensorCast retained binding authority requires " - "retained_binding_acquire.mode='external' and " - "retained_binding_acquire.authority" +def runtime_restore_rejection_reason( + authority: ParsedRetainedRealizationAuthority, +) -> str | None: + readiness = getattr(authority, "readiness", None) + if readiness == "runtime_reserved": + return ( + "TensorCast retained acquire readiness='runtime_reserved' is not attachable" ) - authority_config = _select_retained_serving_binding_authority_config( - config, - expected_member=expected_member, - ) - - binding_value_ref = _model_validate( - tc.BindingValueRef, - authority_config.binding_value_ref, - field_name="retained_binding_acquire.authority.binding_value_ref", - ) - member = _model_validate( - tc.ServingBindingMemberRef, - authority_config.member_ref, - field_name="retained_binding_acquire.authority.member_ref", - ) - capability_payload = _payload_to_dict( - authority_config.reservation_capability, - field_name="retained_binding_acquire.authority.reservation_capability", - ) - capability_payload.setdefault( - "binding_value_ref", binding_value_ref.model_dump(mode="python") - ) - capability_payload.setdefault("member", member.model_dump(mode="python")) - reservation_capability = _model_validate( - tc.BindingReservationCapability, - capability_payload, - field_name="retained_binding_acquire.authority.reservation_capability", - ) - group_realization_acquire = None - if authority_config.group_realization_acquire is not None: - group_realization_acquire = _model_validate( - tc.GroupRealizationAcquireRef, - authority_config.group_realization_acquire, - field_name="retained_binding_acquire.authority.group_realization_acquire", + if readiness in { + "serving_group_prepared", + "serving_group_published_ready", + }: + return ( + "TensorCast retained acquire group readiness requires a " + "published group-realization transaction authority" ) - - authority = ParsedRetainedServingBindingAuthority( - group_id=authority_config.group_id, - local_serving_ref=authority_config.local_serving_ref, - binding_value_ref=binding_value_ref, - reservation_capability=reservation_capability, - daemon_id=authority_config.daemon_id, - daemon_session_id=authority_config.daemon_session_id, - device_uuid=authority_config.device_uuid, - member=member, - reservation_bytes=int(authority_config.trusted_reservation_bytes), - expected=authority_config.expected, - readiness=authority_config.readiness, - verification_state=authority_config.verification_state or "local_only", - serving_artifact_id=authority_config.serving_artifact_id, - group_realization_acquire=group_realization_acquire, - ) - _validate_authority_consistency(authority) - if expected_member is not None and authority.member != expected_member: - raise ValueError( - "TensorCast retained binding authority member does not match " - f"expected member: authority={authority.member!r}, " - f"expected={expected_member!r}" + if readiness == "runtime_published_ready": + return ( + "TensorCast retained acquire readiness='runtime_published_ready' " + "requires a swap-capable runtime binding handle" ) - return authority - - -def retained_binding_acquire_mode(extra: Mapping[str, Any] | None) -> str: - if extra is None or not isinstance(extra, Mapping): - return "disabled" - from tensorcast.serving.config import ServingConfig - - return ServingConfig.from_mapping(extra).retained_binding_acquire.mode + return None @contextmanager -def acquire_retained_serving_binding( +def acquire_retained_binding( *, - authority: ParsedRetainedServingBindingAuthority | None = None, + authority: ParsedRetainedRealizationAuthority | None = None, local_serving_ref: str | None = None, target_device: torch.device | str | None = None, - expected_member: tc.ServingBindingMemberRef | None = None, + expected_member: tc.RuntimeBindingMemberRef | None = None, expected_tensor_schema_hash: str | None = None, expected_serving_build_digest: str | None = None, expected_target_layout_hash: str | None = None, @@ -971,7 +724,7 @@ def acquire_retained_serving_binding( if authority is not None: if local_serving_ref is not None: raise ValueError( - "acquire_retained_serving_binding accepts either authority " + "acquire_retained_binding accepts either authority " "or local_serving_ref, not both" ) if expected_member is not None and authority.member != expected_member: @@ -980,7 +733,7 @@ def acquire_retained_serving_binding( "the expected runtime placement: " f"authority={authority.member}, expected={expected_member}" ) - with acquire_retained_serving_binding_lease( + with acquire_retained_binding_lease( authority, caller_pid=caller_pid, runtime=runtime, @@ -992,7 +745,7 @@ def acquire_retained_serving_binding( if local_serving_ref is None: raise ValueError( - "acquire_retained_serving_binding requires authority or local_serving_ref" + "acquire_retained_binding requires authority or local_serving_ref" ) if target_device is None or expected_member is None: raise ValueError( @@ -1035,114 +788,105 @@ def acquire_retained_serving_binding( yield lease -def retained_serving_binding_trusted_reservation_bytes( - load_config_or_extra: Any, +@contextmanager +def restore_retained_binding( *, - expected_member: tc.ServingBindingMemberRef | None = None, -) -> int: - extra = getattr( - load_config_or_extra, "model_loader_extra_config", load_config_or_extra - ) - if extra is None or not isinstance(extra, Mapping): - return 0 - if retained_binding_acquire_mode(extra) != "external": - return 0 - return parse_retained_serving_binding_authority( - extra, - expected_member=expected_member, - ).reservation_bytes + authority: ParsedRetainedRealizationAuthority | None = None, + local_serving_ref: str | None = None, + target_device: torch.device | str, + expected_member: tc.RuntimeBindingMemberRef | None = None, + expected_tensor_schema_hash: str | None = None, + expected_serving_build_digest: str | None = None, + expected_target_layout_hash: str | None = None, + expected_daemon_id: str | None = None, + expected_daemon_session_id: str | None = None, + serving_artifact_id: str | None = None, + caller_pid: int | None = None, + runtime: Any | None = None, + client: Any | None = None, + restore_fn: Any | None = None, + timeout_s: float | None = None, +) -> Iterator[RestoredRetainedBinding]: + """Acquire and restore a retained binding value for framework attach. + If the framework does not call ``transfer_to_runtime()``, the restored owner + is released automatically when the context exits. After transfer, close + ownership belongs to the returned runtime handle. + """ -def retained_serving_binding_extra_from_prefetched_binding( - *, - prefetched: tc.PrefetchedServingBinding, - target: tc.ServingBindingTarget, - expected_member: tc.ServingBindingMemberRef | None = None, -) -> dict[str, Any]: - authority = _retained_serving_binding_authority_from_prefetched_binding( - prefetched=prefetched, - target=target, - expected_member=expected_member, - ) - return _retained_serving_binding_extra( + with acquire_retained_binding( authority=authority, - config_key="retained_binding_acquire", - ) - - -def _retained_serving_binding_authority_from_prefetched_binding( - *, - prefetched: tc.PrefetchedServingBinding, - target: tc.ServingBindingTarget, - expected_member: tc.ServingBindingMemberRef | None = None, -) -> dict[str, Any]: - member = prefetched.member - if expected_member is not None and member != expected_member: - raise ValueError( - "Prefetched serving binding member does not match expected " - f"placement: prefetched={member}, expected={expected_member}" - ) - authority: dict[str, Any] = { - "group_id": member.group_id or "", - "member_ref": _model_dump(member), - "daemon_id": prefetched.daemon_id, - "daemon_session_id": prefetched.daemon_session_id, - "device_uuid": prefetched.device_uuid, - "binding_value_ref": _model_dump(prefetched.binding_value_ref), - "reservation_capability": _model_dump(prefetched.reservation_capability), - "local_serving_ref": prefetched.local_serving_ref, - "readiness": str(getattr(prefetched.readiness, "value", prefetched.readiness)), - "verification_state": str( - getattr( - prefetched.verification_state, - "value", - prefetched.verification_state, - ) - ), - "serving_artifact_id": prefetched.serving_artifact_id, - "trusted_reservation_bytes": prefetched.reservation_bytes, - "expected": { - "target_layout_hash": target.resolved_layout.target_layout_hash, - "tensor_schema_hash": target.resolved_layout.tensor_schema_hash, - "serving_build_digest": target.serving_build_digest, - "resolved_spec_digest": target.resolved_layout.spec_digest, - }, - } - if prefetched.group_realization_acquire is not None: - authority["group_realization_acquire"] = _model_dump( - prefetched.group_realization_acquire + local_serving_ref=local_serving_ref, + target_device=target_device, + expected_member=expected_member, + expected_tensor_schema_hash=expected_tensor_schema_hash, + expected_serving_build_digest=expected_serving_build_digest, + expected_target_layout_hash=expected_target_layout_hash, + expected_daemon_id=expected_daemon_id, + expected_daemon_session_id=expected_daemon_session_id, + serving_artifact_id=serving_artifact_id, + caller_pid=caller_pid if caller_pid is not None else os.getpid(), + runtime=runtime, + client=client, + timeout_s=timeout_s, + ) as lease: + attached = lease.restore( + target_device=torch.device(target_device), + restore_fn=restore_fn, ) - return authority + restored = RestoredRetainedBinding(attached) + try: + yield restored + finally: + restored.close() -def _retained_serving_binding_extra( +@contextmanager +def restore_prepared_local_ready_binding( *, - authority: dict[str, Any], - config_key: str, -) -> dict[str, Any]: - return { - config_key: { - "mode": "external", - "authority": authority, - }, - } - + resolved_artifact: Any, + target_device: torch.device | str, + expected_member: tc.RuntimeBindingMemberRef, + expected_tensor_schema_hash: str, + expected_serving_build_digest: str | None = None, + caller_pid: int | None = None, + timeout_s: float | None = None, + runtime: Any | None = None, + client: Any | None = None, + restore_fn: Any | None = None, +) -> Iterator[RestoredRetainedBinding]: + """Restore a local-ready retained value referenced by a runtime manifest.""" -def retained_serving_binding_extra_json( - *, - prefetched: tc.PrefetchedServingBinding, - target: tc.ServingBindingTarget, - expected_member: tc.ServingBindingMemberRef | None = None, -) -> str: - return json.dumps( - retained_serving_binding_extra_from_prefetched_binding( - prefetched=prefetched, - target=target, - expected_member=expected_member, - ), - sort_keys=True, - separators=(",", ":"), + manifest = getattr(resolved_artifact, "manifest", None) + local_serving_ref = getattr(manifest, "local_serving_ref", None) + if manifest is None or not local_serving_ref: + raise RuntimeError( + "TensorCast prepared local-ready startup requires local_serving_ref " + "in the runtime artifact manifest" + ) + serving_build_digest = ( + expected_serving_build_digest + if expected_serving_build_digest is not None + else getattr(manifest, "serving_build_digest", None) ) + if not serving_build_digest: + raise RuntimeError( + "TensorCast prepared local-ready startup requires serving_build_digest" + ) + with restore_retained_binding( + local_serving_ref=str(local_serving_ref), + target_device=target_device, + expected_member=expected_member, + expected_tensor_schema_hash=expected_tensor_schema_hash, + expected_serving_build_digest=str(serving_build_digest), + serving_artifact_id=str(getattr(resolved_artifact, "artifact_ref", "")), + caller_pid=caller_pid, + timeout_s=timeout_s, + runtime=runtime, + client=client, + restore_fn=restore_fn, + ) as restored: + yield restored def promote_current_value_and_wait( @@ -1250,53 +994,3 @@ def _promotion_state_name( if mapped is not None: return str(mapped.value).strip().lower() return str(value).strip().lower() - - -def _model_dump(value: Any) -> dict[str, Any]: - if hasattr(value, "model_dump"): - return dict(value.model_dump(mode="python")) - if isinstance(value, Mapping): - return dict(value) - raise TypeError(f"Cannot serialize {type(value)!r}") - - -class RetainedBindingAcquireSettings(BaseModel): - model_config = ConfigDict(frozen=True, extra="forbid") - - mode: str = "disabled" - authority: RetainedServingBindingAuthority | None = None - authorities: tuple[RetainedServingBindingAuthority, ...] = () - - @field_validator("mode", mode="before") - @classmethod - def _normalize_mode(cls, value: Any) -> str: - if value is None: - return "disabled" - return _normalize_enum( - value, - allowed=_RETAINED_BINDING_ACQUIRE_MODES, - field_name="retained_binding_acquire.mode", - ) - - @model_validator(mode="after") - def _validate_authority(self) -> RetainedBindingAcquireSettings: - has_authority = self.authority is not None - has_authorities = bool(self.authorities) - if self.mode == "external" and not (has_authority or has_authorities): - raise ValueError( - "retained_binding_acquire.authority or " - "retained_binding_acquire.authorities is required when " - "retained_binding_acquire.mode='external'" - ) - if self.mode == "external" and has_authority and has_authorities: - raise ValueError( - "retained_binding_acquire.authority and " - "retained_binding_acquire.authorities are mutually exclusive" - ) - if self.mode != "external" and (has_authority or has_authorities): - raise ValueError( - "retained_binding_acquire.authority and " - "retained_binding_acquire.authorities are only valid when " - "retained_binding_acquire.mode='external'" - ) - return self diff --git a/tensorcast/artifact_runtime/config.py b/tensorcast/artifact_runtime/config.py new file mode 100644 index 00000000..23e40244 --- /dev/null +++ b/tensorcast/artifact_runtime/config.py @@ -0,0 +1,727 @@ +# Copyright (c) 2026, TensorCast Team. + +"""Artifact runtime configuration schema and startup planning.""" + +from __future__ import annotations + +import importlib.resources +import re +from dataclasses import dataclass, field +from pathlib import Path +from threading import Lock +from typing import Any, Mapping + +from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator + +from tensorcast.artifact_runtime.locator import ArtifactLocator +from tensorcast.artifact_runtime.policy import RuntimePolicy +from tensorcast.retained_realization_authority import ( + ParsedRetainedRealizationAuthority, + RetainedRealizationAuthority, +) + +_INIT_LOCK = Lock() +_INIT_KWARGS: dict[str, Any] | None = None +_DEFAULT_GLOBAL_STORE_ADDRESS = "127.0.0.1:50051" +DEFAULT_RUNTIME_PROFILE = "serving_single_node" + +_RUNTIME_MODES = {"auto", "connect", "create"} +_GLOBAL_STORE_MODES = {"auto", "connect", "start", "none"} +_BOOTSTRAP_MODES = {"disabled", "auto", "required"} +_COLLECTIVE_MODES = {"auto", "required", "disabled"} +_RETAINED_BINDING_ACQUIRE_MODES = {"disabled", "external"} +_REPLICA_PUBLICATION_MODES = {"disabled", "optional", "required"} +_REPLICA_PUBLICATION_TRIGGERS = {"after_vllm_ready"} +_PROFILE_NAME_PATTERN = re.compile(r"^[A-Za-z0-9_.-]+$") +_TOP_LEVEL_KEYS = { + "runtime", + "runtime_artifact", + "bootstrap", + "materialization", + "retained_binding_acquire", + "diagnostics", + "replica_publication", +} + + +def _normalize_optional_text(value: Any) -> str | None: + if value is None: + return None + normalized = str(value).strip() + return normalized or None + + +def _normalize_enum(value: Any, *, allowed: set[str], field_name: str) -> str: + normalized = str(value).strip().lower() + if normalized not in allowed: + raise ValueError( + f"{field_name} must be one of {sorted(allowed)}, got: {value!r}" + ) + return normalized + + +def _validate_existing_file(path: str, *, field_name: str) -> str: + candidate = Path(path).expanduser() + if not candidate.is_file(): + raise ValueError(f"{field_name} must point to an existing file, got: {path!r}") + return str(candidate) + + +def _default_resource_path(package: str, name: str) -> str | None: + try: + resource = importlib.resources.files(package).joinpath(name) + except (FileNotFoundError, ModuleNotFoundError): + return None + path = Path(str(resource)) + return str(path) if path.is_file() else None + + +def _normalize_profile_name(value: Any) -> str | None: + normalized = _normalize_optional_text(value) + if normalized is None: + return None + if not _PROFILE_NAME_PATTERN.fullmatch(normalized): + raise ValueError( + "runtime.profile must contain only letters, digits, '.', '_', or '-'" + ) + return normalized + + +def _profile_resource_path(profile: str, filename: str) -> str: + profile_name = _normalize_profile_name(profile) + if profile_name is None: + raise ValueError("runtime.profile must be non-empty") + try: + resource = ( + importlib.resources.files("tensorcast") + .joinpath("config") + .joinpath("profiles") + .joinpath(profile_name) + .joinpath(filename) + ) + except (FileNotFoundError, ModuleNotFoundError) as exc: + raise ValueError( + f"Unknown TensorCast runtime config profile: {profile_name!r}" + ) from exc + path = Path(str(resource)) + if not path.is_file(): + raise ValueError(f"Unknown TensorCast runtime config profile: {profile_name!r}") + return str(path) + + +@dataclass(frozen=True) +class RuntimeConfigProfile: + name: str + daemon_config_path: str + global_store_config_path: str + + +def resolve_runtime_config_profile(profile: str) -> RuntimeConfigProfile: + profile_name = _normalize_profile_name(profile) + if profile_name is None: + raise ValueError("runtime.profile must be non-empty") + return RuntimeConfigProfile( + name=profile_name, + daemon_config_path=_profile_resource_path( + profile_name, "store_daemon_config.yaml" + ), + global_store_config_path=_profile_resource_path( + profile_name, "global_store_config.yaml" + ), + ) + + +class RuntimeDaemonSettings(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + + address: str | None = None + config_path: str | None = None + show_logs: bool = False + + @field_validator("address", "config_path", mode="before") + @classmethod + def _normalize_optional_fields(cls, value: Any) -> Any: + return _normalize_optional_text(value) + + +class RuntimeGlobalStoreSettings(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + + mode: str = "auto" + address: str | None = None + config_path: str | None = None + + @field_validator("mode", mode="before") + @classmethod + def _normalize_mode(cls, value: Any) -> str: + if value is None: + return "auto" + return _normalize_enum( + value, + allowed=_GLOBAL_STORE_MODES, + field_name="runtime.global_store.mode", + ) + + @field_validator("address", "config_path", mode="before") + @classmethod + def _normalize_optional_fields(cls, value: Any) -> Any: + return _normalize_optional_text(value) + + def resolved_mode(self, runtime_mode: str) -> str: + if self.mode != "auto": + return self.mode + if self.address is not None: + return "connect" + if self.config_path is not None: + return "start" + if runtime_mode in {"create", "auto"}: + return "start" + return "none" + + +class RuntimeSettings(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + + profile: str | None = DEFAULT_RUNTIME_PROFILE + mode: str = "auto" + daemon: RuntimeDaemonSettings = RuntimeDaemonSettings() + global_store: RuntimeGlobalStoreSettings = RuntimeGlobalStoreSettings() + + @field_validator("profile", mode="before") + @classmethod + def _normalize_profile(cls, value: Any) -> str | None: + return _normalize_profile_name(value) + + @field_validator("mode", mode="before") + @classmethod + def _normalize_mode(cls, value: Any) -> str: + if value is None: + return "auto" + return _normalize_enum( + value, + allowed=_RUNTIME_MODES, + field_name="runtime.mode", + ) + + @staticmethod + def _default_daemon_config_path() -> str | None: + return _default_resource_path("tensorcast", "daemon_config.yaml") + + @staticmethod + def _default_global_store_config_path() -> str | None: + return _default_resource_path("tensorcast", "global_store_config.yaml") + + def to_init_kwargs( + self, + *, + default_daemon_config_path: str | None = None, + default_global_store_config_path: str | None = None, + ) -> dict[str, Any]: + profile = ( + resolve_runtime_config_profile(self.profile) + if self.profile is not None + else None + ) + kwargs: dict[str, Any] = { + "mode": self.mode, + "show_daemon_logs": self.daemon.show_logs, + } + if self.daemon.address is not None: + kwargs["address"] = self.daemon.address + + daemon_config_path = self.daemon.config_path + if daemon_config_path is None and self.mode in {"create", "auto"}: + daemon_config_path = ( + profile.daemon_config_path + if profile is not None + else default_daemon_config_path or self._default_daemon_config_path() + ) + if daemon_config_path is not None: + kwargs["daemon_config_path"] = _validate_existing_file( + daemon_config_path, + field_name="runtime.daemon.config_path", + ) + elif self.mode in {"create", "auto"}: + raise ValueError( + "runtime.mode requires a daemon config file for create/auto; " + "set runtime.profile or runtime.daemon.config_path" + ) + + global_store_mode = self.global_store.resolved_mode(self.mode) + if global_store_mode != "none": + kwargs["global_store_mode"] = global_store_mode + if global_store_mode == "connect": + kwargs["global_store_address"] = ( + self.global_store.address or _DEFAULT_GLOBAL_STORE_ADDRESS + ) + elif global_store_mode == "start": + global_store_config_path = self.global_store.config_path + if global_store_config_path is None: + global_store_config_path = ( + profile.global_store_config_path + if profile is not None + else default_global_store_config_path + or self._default_global_store_config_path() + ) + if global_store_config_path is not None: + kwargs["global_store_config_path"] = _validate_existing_file( + global_store_config_path, + field_name="runtime.global_store.config_path", + ) + else: + raise ValueError( + "runtime.global_store.mode='start' requires a Global " + "Store config file; set runtime.profile or " + "runtime.global_store.config_path" + ) + + return kwargs + + def ensure_initialized( + self, + *, + default_daemon_config_path: str | None = None, + default_global_store_config_path: str | None = None, + ) -> None: + init_kwargs = self.to_init_kwargs( + default_daemon_config_path=default_daemon_config_path, + default_global_store_config_path=default_global_store_config_path, + ) + import tensorcast as tc + + with _INIT_LOCK: + global _INIT_KWARGS + if tc.is_initialized(): + if _INIT_KWARGS is None: + raise RuntimeError( + "TensorCast runtime was already initialized outside " + "tensorcast.artifact_runtime.config.RuntimeSettings." + ) + if init_kwargs != _INIT_KWARGS: + raise RuntimeError( + "TensorCast runtime already initialized with different " + "settings. Existing=" + f"{_INIT_KWARGS}, requested={init_kwargs}" + ) + return + tc.init(**init_kwargs) + _INIT_KWARGS = dict(init_kwargs) + + +class RuntimeArtifactSettings(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + + artifact_locator: ArtifactLocator | None = None + policy: RuntimePolicy = RuntimePolicy() + + +class BootstrapSettings(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + + mode: str = "auto" + cache_dir: str | None = None + verify_source_checksums: bool = True + + @field_validator("mode", mode="before") + @classmethod + def _normalize_mode(cls, value: Any) -> str: + if value is None: + return "auto" + return _normalize_enum( + value, + allowed=_BOOTSTRAP_MODES, + field_name="bootstrap.mode", + ) + + @field_validator("cache_dir", mode="before") + @classmethod + def _normalize_optional_fields(cls, value: Any) -> Any: + return _normalize_optional_text(value) + + +class MaterializationSettings(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + + collective: str = "auto" + + @field_validator("collective", mode="before") + @classmethod + def _normalize_collective(cls, value: Any) -> str: + if value is None: + return "auto" + return _normalize_enum( + value, + allowed=_COLLECTIVE_MODES, + field_name="materialization.collective", + ) + + def collective_policy_value(self) -> str: + return { + "auto": "collective_first", + "required": "require_collective", + "disabled": "disable_collective", + }[self.collective] + + +class DiagnosticsSettings(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + + debug_path: str | None = None + verify_tensors: bool = False + + @field_validator("debug_path", mode="before") + @classmethod + def _normalize_debug_path(cls, value: Any) -> Any: + return _normalize_optional_text(value) + + +class ReplicaPublicationPolicy(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + + mode: str = "disabled" + trigger: str = "after_vllm_ready" + async_publish: bool = True + timeout_s: float = 30.0 + ttl_ms: int | None = None + drain_timeout_s: float = 30.0 + + @field_validator("mode", mode="before") + @classmethod + def _normalize_mode(cls, value: Any) -> str: + if value is None: + return "disabled" + return _normalize_enum( + value, + allowed=_REPLICA_PUBLICATION_MODES, + field_name="replica_publication.mode", + ) + + @field_validator("trigger", mode="before") + @classmethod + def _normalize_trigger(cls, value: Any) -> str: + if value is None: + return "after_vllm_ready" + return _normalize_enum( + value, + allowed=_REPLICA_PUBLICATION_TRIGGERS, + field_name="replica_publication.trigger", + ) + + @field_validator("async_publish") + @classmethod + def _validate_async_publish(cls, value: bool) -> bool: + if not value: + raise ValueError("replica_publication.async_publish=false is not supported") + return value + + @field_validator("timeout_s", "drain_timeout_s") + @classmethod + def _validate_positive_timeout(cls, value: float) -> float: + normalized = float(value) + if normalized <= 0: + raise ValueError("replica_publication timeouts must be positive") + return normalized + + @field_validator("ttl_ms") + @classmethod + def _reject_ttl(cls, value: int | None) -> int | None: + if value is not None: + raise ValueError("replica_publication.ttl_ms is not supported yet") + return value + + +def _retained_authority(value: Any) -> RetainedRealizationAuthority | None: + if value is None: + return None + + if isinstance(value, RetainedRealizationAuthority): + return value + return RetainedRealizationAuthority.model_validate(value) + + +class RetainedBindingAcquireSettings(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + + mode: str = "disabled" + authority: RetainedRealizationAuthority | None = None + authorities: tuple[RetainedRealizationAuthority, ...] = () + + @field_validator("mode", mode="before") + @classmethod + def _normalize_mode(cls, value: Any) -> str: + if value is None: + return "disabled" + return _normalize_enum( + value, + allowed=_RETAINED_BINDING_ACQUIRE_MODES, + field_name="retained_binding_acquire.mode", + ) + + @field_validator("authority", mode="before") + @classmethod + def _validate_authority_value( + cls, + value: Any, + ) -> RetainedRealizationAuthority | None: + return _retained_authority(value) + + @field_validator("authorities", mode="before") + @classmethod + def _validate_authorities_value( + cls, + value: Any, + ) -> tuple[RetainedRealizationAuthority, ...]: + if value is None: + return () + return tuple( + authority + for authority in (_retained_authority(item) for item in value) + if authority is not None + ) + + @field_validator("authorities") + @classmethod + def _validate_authorities( + cls, + value: tuple[RetainedRealizationAuthority, ...], + ) -> tuple[RetainedRealizationAuthority, ...]: + return value + + @model_validator(mode="after") + def _validate_authority(self) -> RetainedBindingAcquireSettings: + has_authority = self.authority is not None + has_authorities = bool(self.authorities) + if self.mode == "external" and not (has_authority or has_authorities): + raise ValueError( + "retained_binding_acquire.authority or " + "retained_binding_acquire.authorities is required when " + "retained_binding_acquire.mode='external'" + ) + if self.mode == "external" and has_authority and has_authorities: + raise ValueError( + "retained_binding_acquire.authority and " + "retained_binding_acquire.authorities are mutually exclusive" + ) + if self.mode != "external" and (has_authority or has_authorities): + raise ValueError( + "retained_binding_acquire.authority and " + "retained_binding_acquire.authorities are only valid when " + "retained_binding_acquire.mode='external'" + ) + return self + + +class TensorCastRuntimeConfig(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + + runtime: RuntimeSettings = RuntimeSettings() + runtime_artifact: RuntimeArtifactSettings = RuntimeArtifactSettings() + bootstrap: BootstrapSettings = BootstrapSettings() + materialization: MaterializationSettings = MaterializationSettings() + retained_binding_acquire: RetainedBindingAcquireSettings = Field( + default_factory=RetainedBindingAcquireSettings, + ) + diagnostics: DiagnosticsSettings = DiagnosticsSettings() + replica_publication: ReplicaPublicationPolicy = ReplicaPublicationPolicy() + + @classmethod + def from_mapping( + cls, + data: Mapping[str, Any] | None, + ) -> TensorCastRuntimeConfig: + payload: Mapping[str, Any] = {} if data is None else data + if not isinstance(payload, Mapping): + raise ValueError("model_loader_extra_config must be a mapping") + if "serving" in payload: + raise ValueError( + "TensorCast runtime config section 'serving' was removed; " + "use 'runtime_artifact'" + ) + unknown = {str(key) for key in payload if str(key) not in _TOP_LEVEL_KEYS} + if unknown: + raise ValueError( + "Unexpected TensorCast runtime config keys in " + "model_loader_extra_config: " + f"{sorted(unknown)}" + ) + runtime_artifact = payload.get("runtime_artifact") + if isinstance(runtime_artifact, Mapping) and "selector" in runtime_artifact: + raise ValueError( + "runtime_artifact.selector is not supported; " + "use runtime_artifact.artifact_locator" + ) + return cls.model_validate(dict(payload)) + + def to_mapping(self) -> dict[str, Any]: + return self.model_dump(mode="python") + + +class RuntimeStartPlanError(ValueError): + """Startup configuration cannot be lowered into one runtime plan.""" + + +@dataclass(frozen=True) +class RuntimeStartPlan: + """Typed artifact runtime startup intent selected before allocation.""" + + kind: str = field(init=False) + + +@dataclass(frozen=True) +class RuntimeArtifactBindStartPlan(RuntimeStartPlan): + """Bind a durable runtime artifact selected by an artifact locator.""" + + artifact_locator: ArtifactLocator + policy: RuntimePolicy + kind: str = field(default="artifact_bind", init=False) + + +@dataclass(frozen=True) +class RuntimeSourceBootstrapStartPlan(RuntimeStartPlan): + """Bootstrap a source artifact into a daemon-owned binding value.""" + + source_selector: Any + bootstrap_policy: BootstrapSettings + kind: str = field(default="source_bootstrap_to_binding", init=False) + + +@dataclass(frozen=True) +class RuntimeRetainedRealizationStartPlan(RuntimeStartPlan): + """Acquire a retained binding authority prepared by artifact prefetch.""" + + authority: ParsedRetainedRealizationAuthority + kind: str = field(default="retained_binding_acquire", init=False) + + +def _candidate_rejection_reasons( + *, + has_retained_authority: bool, + has_artifact_locator: bool, + has_source_selector: bool, + bootstrap_mode: str, +) -> dict[str, str]: + source_reason = ( + "bootstrap.mode is disabled" + if bootstrap_mode == "disabled" + else "source selector is unavailable" + ) + return { + "retained_binding_acquire": ( + "selected" + if has_retained_authority + else "retained_binding_acquire.mode is not external" + ), + "artifact_bind": ( + "selected" if has_artifact_locator else "runtime artifact locator missing" + ), + "source_bootstrap_to_binding": ( + "selected" + if has_source_selector and bootstrap_mode in {"auto", "required"} + else source_reason + ), + } + + +def _format_rejection_reasons(reasons: Mapping[str, str]) -> str: + return "; ".join(f"{name}: {reason}" for name, reason in reasons.items()) + + +def plan_runtime_start( + *, + config: TensorCastRuntimeConfig, + source_selector: Any | None, + expected_member: Any | None = None, +) -> RuntimeStartPlan: + """Classify artifact runtime startup into exactly one canonical start plan.""" + + retained_requested = config.retained_binding_acquire.mode == "external" + artifact_locator = config.runtime_artifact.artifact_locator + has_artifact_locator = artifact_locator is not None + bootstrap_mode = config.bootstrap.mode + has_source_selector = source_selector is not None + + if retained_requested and has_artifact_locator: + raise RuntimeStartPlanError( + "TensorCast runtime config cannot request both retained binding " + "acquire and durable runtime artifact bind" + ) + if bootstrap_mode == "required" and (retained_requested or has_artifact_locator): + raise RuntimeStartPlanError( + "TensorCast bootstrap.mode='required' is mutually exclusive with " + "retained binding acquire and durable runtime artifact bind" + ) + if bootstrap_mode == "disabled" and not ( + retained_requested or has_artifact_locator + ): + raise RuntimeStartPlanError( + "TensorCast bootstrap.mode='disabled' requires retained binding " + "authority or durable runtime artifact locator" + ) + + if retained_requested: + from tensorcast.retained_realization import parse_retained_realization_authority + + return RuntimeRetainedRealizationStartPlan( + authority=parse_retained_realization_authority( + config, + expected_member=expected_member, + ) + ) + if artifact_locator is not None: + return RuntimeArtifactBindStartPlan( + artifact_locator=artifact_locator, + policy=config.runtime_artifact.policy, + ) + if bootstrap_mode in {"auto", "required"} and source_selector is not None: + return RuntimeSourceBootstrapStartPlan( + source_selector=source_selector, + bootstrap_policy=config.bootstrap, + ) + + reasons = _candidate_rejection_reasons( + has_retained_authority=retained_requested, + has_artifact_locator=has_artifact_locator, + has_source_selector=has_source_selector, + bootstrap_mode=bootstrap_mode, + ) + raise RuntimeStartPlanError( + "TensorCast runtime config did not resolve to one startup plan; " + f"rejected candidates: {_format_rejection_reasons(reasons)}" + ) + + +RuntimeArtifactLocator = ArtifactLocator +RuntimeBootstrapSettings = BootstrapSettings +RuntimeDiagnosticsSettings = DiagnosticsSettings +RuntimeMaterializationSettings = MaterializationSettings +RuntimeReplicaPublicationPolicy = ReplicaPublicationPolicy + + +__all__ = [ + "DEFAULT_RUNTIME_PROFILE", + "ArtifactLocator", + "BootstrapSettings", + "DiagnosticsSettings", + "MaterializationSettings", + "ReplicaPublicationPolicy", + "RetainedBindingAcquireSettings", + "RuntimeArtifactSettings", + "RuntimeArtifactBindStartPlan", + "RuntimeArtifactLocator", + "RuntimeBootstrapSettings", + "RuntimeConfigProfile", + "RuntimeDaemonSettings", + "RuntimeDiagnosticsSettings", + "RuntimeGlobalStoreSettings", + "RuntimeMaterializationSettings", + "RuntimePolicy", + "RuntimeReplicaPublicationPolicy", + "RuntimeRetainedRealizationStartPlan", + "RuntimeSettings", + "RuntimeSourceBootstrapStartPlan", + "RuntimeStartPlan", + "RuntimeStartPlanError", + "TensorCastRuntimeConfig", + "plan_runtime_start", + "resolve_runtime_config_profile", +] diff --git a/tensorcast/artifact_runtime/contract.py b/tensorcast/artifact_runtime/contract.py new file mode 100644 index 00000000..0ba45e4e --- /dev/null +++ b/tensorcast/artifact_runtime/contract.py @@ -0,0 +1,357 @@ +# Copyright (c) 2026, TensorCast Team. + +"""Runtime identity, topology, and source-bound contract helpers.""" + +from __future__ import annotations + +import base64 +import hashlib +import json +from collections.abc import Callable, Mapping, Sequence +from dataclasses import dataclass +from typing import Any + +import torch + +from tensorcast.api.store.types import CanonicalIndex, CanonicalIndexEntry +from tensorcast.types import ( + SERVING_MANIFEST_TENSOR_NAME, + RuntimeBindingMemberRef, + RuntimeTopologyRef, + SourceBoundCapability, +) + +MIN_SOURCE_BOUND_CONTRACT_VERSION = 4 +SOURCE_BOUND_CONTRACT_PATH_COLLECTIVE_FIRST_V4 = "collective_first_v4" +REQUIRED_SOURCE_BOUND_CAPABILITIES = ( + SourceBoundCapability.FIRST_CLASS_COLLECTIVE_INGRESS, + SourceBoundCapability.TYPED_EXECUTION_DIAGNOSTICS, + SourceBoundCapability.SINGLE_MINT_BINDING_CLOSEOUT, +) + + +def _canonical_json_bytes(payload: object) -> bytes: + return json.dumps(payload, sort_keys=True, separators=(",", ":")).encode("utf-8") + + +def _multibase_multihash_sha256(digest: bytes) -> str: + if len(digest) != 32: + raise ValueError("SHA256 digest must be 32 bytes") + multihash = b"\x12\x20" + digest + encoded = base64.b32encode(multihash).decode("ascii").lower().rstrip("=") + return f"b{encoded}" + + +def hash_versioned_payload_to_multihash(version: str, payload: object) -> str: + serialized = _canonical_json_bytes(payload) + versioned_payload = version.encode("utf-8") + b"\n" + serialized + return _multibase_multihash_sha256(hashlib.sha256(versioned_payload).digest()) + + +def normalize_logical_topology_payload( + logical_topology_json: str | None, +) -> dict[str, object] | None: + if logical_topology_json is None: + return None + try: + payload = json.loads(logical_topology_json) + except Exception as exc: # noqa: BLE001 + raise ValueError("logical_topology_json must be valid JSON") from exc + if not isinstance(payload, dict): + raise ValueError("logical_topology_json must encode an object") + family = str(payload.get("family", "")).strip() + version = str(payload.get("version", "")).strip() + raw_dimensions = payload.get("dimensions", []) + if not family: + raise ValueError("logical_topology_json.family must not be empty") + if not version: + raise ValueError("logical_topology_json.version must not be empty") + if not isinstance(raw_dimensions, list): + raise ValueError("logical_topology_json.dimensions must be a list") + dimensions: list[dict[str, int | str]] = [] + for raw_dimension in raw_dimensions: + if not isinstance(raw_dimension, dict): + raise ValueError("logical_topology_json.dimensions items must be objects") + name = str(raw_dimension.get("name", "")).strip() + if not name: + raise ValueError("logical_topology_json dimensions require non-empty name") + size = raw_dimension.get("size", None) + if not isinstance(size, int) or size <= 0: + raise ValueError( + "logical_topology_json dimensions require positive integer size" + ) + dimensions.append({"name": name, "size": int(size)}) + dimensions.sort(key=lambda item: (str(item["name"]), int(item["size"]))) + return { + "family": family, + "version": version, + "dimensions": dimensions, + } + + +@dataclass(frozen=True) +class RuntimeTensorSchemaEntry: + name: str + dtype: str + shape: tuple[int, ...] + stride: tuple[int, ...] + element_size: int + storage_offset: int + + +@dataclass(frozen=True) +class SourceBoundContractState: + server_config_present: bool + source_bound_contract_version: int + source_bound_capability_flags: int + source_bound_capability_names: tuple[str, ...] + source_bound_contract_ready: bool + + @classmethod + def unavailable(cls) -> SourceBoundContractState: + return cls( + server_config_present=False, + source_bound_contract_version=0, + source_bound_capability_flags=0, + source_bound_capability_names=(), + source_bound_contract_ready=False, + ) + + @classmethod + def from_server_config( + cls, + server_config: Any | None, + ) -> SourceBoundContractState: + if server_config is None: + return cls.unavailable() + flags = int(getattr(server_config, "source_bound_capability_flags", 0) or 0) + version = int(getattr(server_config, "source_bound_contract_version", 0) or 0) + capability_names = tuple( + str(capability.name) + for capability in SourceBoundCapability + if flags & int(capability) + ) + contract_ready = version >= MIN_SOURCE_BOUND_CONTRACT_VERSION and all( + flags & int(capability) for capability in REQUIRED_SOURCE_BOUND_CAPABILITIES + ) + return cls( + server_config_present=True, + source_bound_contract_version=version, + source_bound_capability_flags=flags, + source_bound_capability_names=capability_names, + source_bound_contract_ready=contract_ready, + ) + + +def collect_runtime_tensor_schema( + tensors: Mapping[str, torch.Tensor], + *, + remove_duplicate: bool, +) -> tuple[RuntimeTensorSchemaEntry, ...]: + schema: list[RuntimeTensorSchemaEntry] = [] + seen_ptrs: set[int] = set() + for name, tensor in sorted(tensors.items()): + data_ptr = int(tensor.data_ptr()) + if remove_duplicate and data_ptr in seen_ptrs: + continue + seen_ptrs.add(data_ptr) + storage_offset = int(tensor.storage_offset()) + if storage_offset != 0: + raise ValueError( + "runtime tensor schema hash requires storage_offset == 0: " + f"{name} has storage_offset={storage_offset}" + ) + schema.append( + RuntimeTensorSchemaEntry( + name=str(name), + dtype=str(tensor.dtype), + shape=tuple(int(dim) for dim in tensor.shape), + stride=tuple(int(dim) for dim in tensor.stride()), + element_size=int(tensor.element_size()), + storage_offset=storage_offset, + ) + ) + return tuple(schema) + + +def compute_runtime_tensor_schema_hash( + schema: Sequence[RuntimeTensorSchemaEntry], +) -> str: + entries: list[CanonicalIndexEntry] = [] + segment_offset = 0 + for entry in sorted(schema, key=lambda item: item.name): + if int(entry.storage_offset) != 0: + raise ValueError( + "runtime tensor schema hash requires storage_offset == 0: " + f"{entry.name} has storage_offset={entry.storage_offset}" + ) + size_bytes = _schema_entry_size_bytes(entry) + entries.append( + CanonicalIndexEntry( + name=entry.name, + dtype=_torch_dtype_from_name(entry.dtype), + shape=entry.shape, + stride=entry.stride, + storage_offset=0, + segment_offset=segment_offset, + size_bytes=size_bytes, + ) + ) + segment_offset += size_bytes + return compute_canonical_runtime_tensor_schema_hash( + CanonicalIndex( + entries=tuple(entries), + total_size_bytes=segment_offset, + avbs_hash="", + ) + ) + + +def compute_canonical_runtime_tensor_schema_hash( + canonical_index: CanonicalIndex, + *, + manifest_tensor_name: str = SERVING_MANIFEST_TENSOR_NAME, +) -> str: + tensors = [ + { + "name": str(entry.name), + "dtype": str(entry.dtype), + "shape": [int(dim) for dim in entry.shape], + "stride": [int(dim) for dim in entry.stride], + "element_size": int(entry.dtype.itemsize), + } + for entry in sorted( + ( + entry + for entry in canonical_index.entries + if str(entry.name) != str(manifest_tensor_name) + ), + key=lambda entry: str(entry.name), + ) + ] + return hash_versioned_payload_to_multihash( + "tensorcast.representation.tensor_schema.v1", + {"tensors": tensors}, + ) + + +def logical_topology_json( + topology_ref: RuntimeTopologyRef, + *, + framework_payload: Mapping[str, object], +) -> str: + del topology_ref + normalized = normalize_logical_topology_payload( + json.dumps( + dict(framework_payload), + sort_keys=True, + separators=(",", ":"), + ) + ) + if normalized is None: + raise ValueError("framework_payload must define a logical topology") + return json.dumps(normalized, sort_keys=True, separators=(",", ":")) + + +def compute_runtime_representation_contract_hash( + *, + tensor_schema_hash: str, + topology_ref: RuntimeTopologyRef, + member_ref: RuntimeBindingMemberRef, + framework_name: str, + framework_version: str, + adapter_version: str, + serving_abi_version: str, + source_identity: Mapping[str, object], +) -> str: + if not tensor_schema_hash: + raise ValueError("tensor_schema_hash must not be empty") + payload = { + "framework": { + "name": str(framework_name), + "version": str(framework_version), + "adapter_version": str(adapter_version), + "serving_abi_version": str(serving_abi_version), + }, + "topology_ref": _stable_payload(topology_ref.model_dump(mode="python")), + "member_ref": _stable_payload(member_ref.model_dump(mode="python")), + "source_identity": _stable_payload(dict(source_identity)), + "tensor_schema_hash": str(tensor_schema_hash), + } + return hash_versioned_payload_to_multihash( + "tensorcast.representation.runtime_contract.v1", + payload, + ) + + +def read_source_bound_contract_state( + *, + store_fn: Callable[[], Any] | None = None, +) -> SourceBoundContractState: + try: + if store_fn is None: + import tensorcast as tc + + store_fn = tc.store + store = store_fn() + capabilities = store.capabilities + server_config = getattr(capabilities, "server_config", None) + except Exception: + return SourceBoundContractState.unavailable() + return SourceBoundContractState.from_server_config(server_config) + + +def source_bound_contract_profile_fields( + state: SourceBoundContractState, + path: str, +) -> dict[str, object]: + return { + "source_bound_contract_version": int(state.source_bound_contract_version), + "source_bound_capability_flags": list(state.source_bound_capability_names), + "source_bound_contract_ready": bool(state.source_bound_contract_ready), + "source_bound_contract_path": path, + } + + +def _schema_entry_size_bytes(entry: RuntimeTensorSchemaEntry) -> int: + elements = 1 + for dim in entry.shape: + elements *= int(dim) + return int(elements * entry.element_size) + + +def _torch_dtype_from_name(dtype_name: str) -> torch.dtype: + normalized = dtype_name.removeprefix("torch.") + dtype = getattr(torch, normalized, None) + if not isinstance(dtype, torch.dtype): + raise ValueError(f"unsupported runtime tensor dtype: {dtype_name}") + return dtype + + +def _stable_payload(value: object) -> object: + if isinstance(value, Mapping): + return { + str(key): _stable_payload(value[key]) + for key in sorted(value, key=lambda item: str(item)) + if value[key] is not None + } + if isinstance(value, (list, tuple)): + return [_stable_payload(item) for item in value] + if isinstance(value, (str, int, float, bool)) or value is None: + return value + return str(value) + + +__all__ = [ + "MIN_SOURCE_BOUND_CONTRACT_VERSION", + "REQUIRED_SOURCE_BOUND_CAPABILITIES", + "RuntimeTensorSchemaEntry", + "SOURCE_BOUND_CONTRACT_PATH_COLLECTIVE_FIRST_V4", + "SourceBoundContractState", + "collect_runtime_tensor_schema", + "compute_runtime_representation_contract_hash", + "compute_runtime_tensor_schema_hash", + "logical_topology_json", + "read_source_bound_contract_state", + "source_bound_contract_profile_fields", +] diff --git a/tensorcast/serving/diagnostics.py b/tensorcast/artifact_runtime/diagnostics.py similarity index 94% rename from tensorcast/serving/diagnostics.py rename to tensorcast/artifact_runtime/diagnostics.py index ca7f69b2..a5f8b493 100644 --- a/tensorcast/serving/diagnostics.py +++ b/tensorcast/artifact_runtime/diagnostics.py @@ -1,6 +1,6 @@ # Copyright (c) 2026, TensorCast Team. -"""Serving diagnostic helpers with no lifecycle authority.""" +"""Artifact runtime diagnostic helpers with no lifecycle authority.""" from __future__ import annotations @@ -59,7 +59,7 @@ def to_dict(self) -> dict[str, Any]: @dataclass(frozen=True) -class ServingRealizationReport: +class RuntimeRealizationReport: source_artifact_ref: str serving_manifest_ref: str representation_contract_hash: str @@ -88,7 +88,11 @@ def to_dict(self) -> dict[str, Any]: } def to_runtime_diagnostics(self) -> dict[str, Any]: - return {"serving_realization_report": self.to_dict()} + payload = self.to_dict() + return { + "runtime_realization_report": payload, + "serving_realization_report": payload, + } def binding_layout_tensor_count(layout: Any) -> int: @@ -165,7 +169,7 @@ def binding_layout_debug_payload( __all__ = [ "BindingValueReport", "RealizationReport", - "ServingRealizationReport", + "RuntimeRealizationReport", "SourceContractReport", "binding_layout_debug_payload", "binding_layout_profile_fields", diff --git a/tensorcast/serving/dto.py b/tensorcast/artifact_runtime/dto.py similarity index 83% rename from tensorcast/serving/dto.py rename to tensorcast/artifact_runtime/dto.py index 972ad121..e9e5b9e1 100644 --- a/tensorcast/serving/dto.py +++ b/tensorcast/artifact_runtime/dto.py @@ -1,6 +1,6 @@ # Copyright (c) 2026, TensorCast Team. -"""Serving artifact runtime DTOs shared by framework integrations.""" +"""Runtime artifact DTOs shared by framework integrations.""" from __future__ import annotations @@ -9,13 +9,20 @@ from pydantic import BaseModel, ConfigDict, Field, model_validator -from tensorcast.serving.policy import ServingArtifactLocator +from tensorcast.artifact_runtime.host import ( + RuntimePlacement as _RuntimePlacement, +) +from tensorcast.artifact_runtime.host import ( + RuntimeTensorView as _RuntimeTensorView, +) +from tensorcast.artifact_runtime.locator import ArtifactLocator from tensorcast.types import ( BindingValueRef, - ServingBindingMemberRef, - ServingTopologyRef, ) +RuntimeTensorView = _RuntimeTensorView +RuntimePlacement = _RuntimePlacement + def _normalize_manifest_ref_payload(data: Any) -> Any: if not isinstance(data, Mapping): @@ -44,7 +51,7 @@ def _model_dump_or_none(value: Any) -> dict[str, Any] | None: raise TypeError(f"Cannot serialize {type(value)!r} as a mapping") -class ServingBindingValue(BaseModel): +class RuntimeBindingValue(BaseModel): model_config = ConfigDict(frozen=True, extra="forbid") source_artifact_ref: str @@ -79,7 +86,7 @@ def to_dict(self) -> dict[str, Any]: } -class PreparedServingArtifact(BaseModel): +class PreparedRuntimeArtifact(BaseModel): model_config = ConfigDict(frozen=True, extra="forbid") source_artifact_ref: str @@ -88,7 +95,7 @@ class PreparedServingArtifact(BaseModel): representation_contract_hash: str serving_build_digest: str binding_value_ref: BindingValueRef | None = None - readiness: str = "serving_published_ready" + readiness: str = "runtime_published_ready" family: str tensor_schema_hash: str serving_version_key: str | None = None @@ -98,7 +105,7 @@ class PreparedServingArtifact(BaseModel): verification_job_id: str | None = None tp_rank: int = 0 tp_world_size: int = 1 - artifact_locator: ServingArtifactLocator | None = None + artifact_locator: ArtifactLocator | None = None @model_validator(mode="before") @classmethod @@ -109,8 +116,8 @@ def _normalize_input(cls, data: Any) -> Any: def manifest_ref(self) -> str: return self.serving_manifest_ref - def to_binding_value(self) -> ServingBindingValue: - return ServingBindingValue( + def to_binding_value(self) -> RuntimeBindingValue: + return RuntimeBindingValue( source_artifact_ref=self.source_artifact_ref, binding_value_ref=self.binding_value_ref, readiness=self.readiness, @@ -142,7 +149,7 @@ def to_reload_request(self) -> dict[str, Any]: else: raise RuntimeError( "TensorCast local-ready serving result does not reference a " - "durable serving artifact and cannot be used as a reload " + "durable runtime artifact and cannot be used as a reload " "request" ) return { @@ -201,38 +208,6 @@ class FamilyReadiness(BaseModel): notes: str = "" -class RuntimeTensorView(BaseModel): - """Framework-neutral tensor identity view without live tensor payload.""" - - model_config = ConfigDict(frozen=True, extra="forbid") - - name: str - dtype: str - shape: tuple[int, ...] - stride: tuple[int, ...] - storage_offset: int = 0 - element_size: int | None = None - - -class ServingPlacement(BaseModel): - """Stable runtime placement identity shared with framework integrations.""" - - model_config = ConfigDict(frozen=True, extra="forbid") - - topology: ServingTopologyRef - member: ServingBindingMemberRef - framework_payload: dict[str, Any] - identity_payload: dict[str, Any] - - def stable_identity_payload(self) -> dict[str, Any]: - return { - "topology": self.topology.model_dump(mode="python"), - "member": self.member.model_dump(mode="python"), - "framework_payload": self.framework_payload, - "identity_payload": self.identity_payload, - } - - class FrameworkIntegrationContext(BaseModel): """Serializable framework identity facts used by core-owned facades.""" @@ -242,7 +217,7 @@ class FrameworkIntegrationContext(BaseModel): framework_version: str adapter_version: str serving_abi_version: str - placement: ServingPlacement | None = None + placement: RuntimePlacement | None = None source_identity: dict[str, Any] = Field(default_factory=dict) def stable_identity_payload(self) -> dict[str, Any]: diff --git a/tensorcast/serving/errors.py b/tensorcast/artifact_runtime/errors.py similarity index 67% rename from tensorcast/serving/errors.py rename to tensorcast/artifact_runtime/errors.py index c2492d99..3c0ee15a 100644 --- a/tensorcast/serving/errors.py +++ b/tensorcast/artifact_runtime/errors.py @@ -1,17 +1,17 @@ # Copyright (c) 2026, TensorCast Team. -"""Structured serving runtime errors.""" +"""Structured artifact runtime errors.""" from __future__ import annotations from collections.abc import Mapping, Sequence -class TensorCastServingRuntimeError(RuntimeError): - """Base class for machine-readable serving runtime failures.""" +class TensorCastRuntimeError(RuntimeError): + """Base class for machine-readable artifact runtime failures.""" - code = "tensorcast_serving_runtime_error" - operation = "serving_runtime" + code = "tensorcast_runtime_error" + operation = "artifact_runtime" retryable = False worker_suspect = False @@ -33,25 +33,25 @@ def __init__( self.details = dict(details or {}) -class ServingIntegrationError(TensorCastServingRuntimeError): - """Base class for structured serving integration failures.""" +class ArtifactRuntimeIntegrationError(TensorCastRuntimeError): + """Base class for structured runtime integration failures.""" -class ServingIntegrationNotImplementedError(ServingIntegrationError): +class ArtifactRuntimeNotImplementedError(ArtifactRuntimeIntegrationError): """Raised when a deep core-owned lifecycle method is not implemented yet.""" code = "not_implemented" - operation = "serving_runtime" + operation = "artifact_runtime" -class ConfigConflictError(ServingIntegrationError): - """Serving config requests mutually exclusive lifecycle execution modes.""" +class ConfigConflictError(ArtifactRuntimeIntegrationError): + """Runtime config requests mutually exclusive lifecycle execution modes.""" code = "config_conflict" operation = "config_planning" -class CapabilityMissingError(ServingIntegrationError): +class CapabilityMissingError(ArtifactRuntimeIntegrationError): """Required host capability is absent for a requested lifecycle path.""" code = "capability_missing" @@ -80,57 +80,57 @@ def capability_missing( ) -class AdmissionRejectedError(ServingIntegrationError): - """Core admission rejected a serving lifecycle request.""" +class AdmissionRejectedError(ArtifactRuntimeIntegrationError): + """Core admission rejected a runtime lifecycle request.""" code = "admission_rejected" operation = "admission" -class PlacementAdmissionError(ServingIntegrationError): +class PlacementAdmissionError(ArtifactRuntimeIntegrationError): """Placement identity or semantic placement proof is invalid.""" code = "placement_admission" operation = "placement_admission" -class ArtifactLocatorResolutionError(ServingIntegrationError): - """Durable serving artifact locator could not resolve to an artifact.""" +class ArtifactLocatorResolutionError(ArtifactRuntimeIntegrationError): + """Durable runtime artifact locator could not resolve to an artifact.""" code = "artifact_locator_resolution" operation = "artifact_locator_resolution" -class ManifestMismatchError(ServingIntegrationError): - """Serving manifest content does not match requested runtime facts.""" +class ManifestMismatchError(ArtifactRuntimeIntegrationError): + """Runtime artifact manifest content does not match requested facts.""" code = "manifest_mismatch" operation = "manifest_validation" -class PolicyMismatchError(ServingIntegrationError): - """Serving runtime policy does not match the artifact manifest.""" +class PolicyMismatchError(ArtifactRuntimeIntegrationError): + """Runtime artifact policy does not match the artifact manifest.""" code = "policy_mismatch" operation = "policy_validation" -class AuthorityValidationError(ServingIntegrationError): - """Retained binding authority failed validation.""" +class AuthorityValidationError(ArtifactRuntimeIntegrationError): + """Retained realization authority failed validation.""" code = "authority_validation" operation = "retained_acquire" -class SchemaMismatchError(ServingIntegrationError): - """Runtime tensor schema does not match the serving artifact schema.""" +class SchemaMismatchError(ArtifactRuntimeIntegrationError): + """Runtime tensor schema does not match the artifact schema.""" code = "schema_mismatch" operation = "schema_validation" worker_suspect = True -class AttachFinalizeError(ServingIntegrationError): +class AttachFinalizeError(ArtifactRuntimeIntegrationError): """Framework attach, process-after-load, or finalize failed.""" code = "attach_finalize" @@ -138,14 +138,14 @@ class AttachFinalizeError(ServingIntegrationError): worker_suspect = True -class RestoreBindingError(ServingIntegrationError): +class RestoreBindingError(ArtifactRuntimeIntegrationError): """Retained binding restore failed before runtime ownership transfer.""" code = "restore_binding" operation = "retained_acquire" -class OwnershipTransferError(ServingIntegrationError): +class OwnershipTransferError(ArtifactRuntimeIntegrationError): """Binding ownership transfer to runtime state failed.""" code = "ownership_transfer" @@ -153,36 +153,36 @@ class OwnershipTransferError(ServingIntegrationError): worker_suspect = True -class RuntimeSwapError(ServingIntegrationError): - """Serving binding swap failed after execution started.""" +class RuntimeSwapError(ArtifactRuntimeIntegrationError): + """Runtime binding swap failed after execution started.""" code = "runtime_swap" operation = "reload" worker_suspect = True -class SourceSubjectError(ServingIntegrationError): +class SourceSubjectError(ArtifactRuntimeIntegrationError): """Source selector resolution or broadcast payload handling failed.""" code = "source_subject" operation = "source_provider" -class SourceProviderError(ServingIntegrationError): +class SourceProviderError(ArtifactRuntimeIntegrationError): """Source provider, catalog, or cache policy failed.""" code = "source_provider" operation = "source_provider" -class PublicationRequiredError(ServingIntegrationError): +class PublicationRequiredError(ArtifactRuntimeIntegrationError): """A local-ready identity was used where durable publication is required.""" code = "publication_required" operation = "artifact_locator_validation" -class ReplicaPublicationError(ServingIntegrationError): +class ReplicaPublicationError(ArtifactRuntimeIntegrationError): """Runtime-owned ephemeral replica publication failed.""" code = "replica_publication" @@ -225,10 +225,10 @@ def __init__( "RestoreBindingError", "RuntimeSwapError", "SchemaMismatchError", - "ServingIntegrationError", - "ServingIntegrationNotImplementedError", + "ArtifactRuntimeIntegrationError", + "ArtifactRuntimeNotImplementedError", "SourceProviderError", "SourceSubjectError", - "TensorCastServingRuntimeError", + "TensorCastRuntimeError", "capability_missing", ] diff --git a/tensorcast/serving/hosts.py b/tensorcast/artifact_runtime/host.py similarity index 90% rename from tensorcast/serving/hosts.py rename to tensorcast/artifact_runtime/host.py index a39103cc..e0bf1d7c 100644 --- a/tensorcast/serving/hosts.py +++ b/tensorcast/artifact_runtime/host.py @@ -1,8 +1,8 @@ # Copyright (c) 2026, TensorCast Team. -"""Public host capability protocols for serving runtime integrations. +"""Artifact-runtime host capability protocols for framework integrations. This module is intentionally lightweight: importing it must not import the -serving lifecycle implementation, builder stack, binding runtime, or store API. +runtime lifecycle implementation, builder stack, binding runtime, or store API. Framework integrations should use these DTOs and protocols to describe facts and capabilities; TensorCast core owns the lifecycle that consumes them. """ @@ -16,11 +16,12 @@ from dataclasses import dataclass, field from typing import Any, Protocol, cast -from tensorcast.serving.dto import RuntimeTensorView, ServingPlacement +from pydantic import BaseModel, ConfigDict + from tensorcast.types import ( SERVING_MANIFEST_TENSOR_NAME, - ServingBindingMemberRef, - ServingTopologyRef, + RuntimeBindingMemberRef, + RuntimeTopologyRef, ) PLACEMENT_IDENTITY_FACTS_SCHEMA_VERSION = 1 @@ -31,6 +32,38 @@ SOURCE_CATALOG_SCHEMA_VERSION = 1 +class RuntimeTensorView(BaseModel): + """Framework-neutral tensor identity view without live tensor payload.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + name: str + dtype: str + shape: tuple[int, ...] + stride: tuple[int, ...] + storage_offset: int = 0 + element_size: int | None = None + + +class RuntimePlacement(BaseModel): + """Stable runtime placement identity shared with framework integrations.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + topology: RuntimeTopologyRef + member: RuntimeBindingMemberRef + framework_payload: dict[str, Any] + identity_payload: dict[str, Any] + + def stable_identity_payload(self) -> dict[str, Any]: + return { + "topology": self.topology.model_dump(mode="python"), + "member": self.member.model_dump(mode="python"), + "framework_payload": self.framework_payload, + "identity_payload": self.identity_payload, + } + + @dataclass(frozen=True) class FrameworkIdentity: """Stable framework identity facts owned by a framework host.""" @@ -247,7 +280,7 @@ def collect_runtime_tensor_view( self, tensors: Mapping[str, object], ) -> tuple[RuntimeTensorView, ...]: - from tensorcast.serving import contract as tc_contract + import tensorcast.artifact_runtime.contract as tc_contract schema = tc_contract.collect_runtime_tensor_schema( cast(Any, tensors), remove_duplicate=False @@ -435,15 +468,15 @@ def execution_facts( ) -> MaterializationExecutionFacts: ... -def serving_placement_from_framework_facts( +def runtime_placement_from_framework_facts( *, identity_facts: PlacementIdentityFacts, admission_facts: PlacementAdmissionFacts | None = None, member_facts: PlacementMemberFacts, framework_payload: Mapping[str, object] | None = None, identity_payload: Mapping[str, object] | None = None, -) -> ServingPlacement: - """Build core-owned serving placement identity from host facts.""" +) -> RuntimePlacement: + """Build core-owned runtime placement identity from host facts.""" admission_facts = admission_facts or PlacementAdmissionFacts() placement_identity_payload = _stable_payload( @@ -512,12 +545,12 @@ def serving_placement_from_framework_facts( resolved_identity_payload = dict( identity_payload or cast(Mapping[str, object], placement_identity_payload) ) - return ServingPlacement( - topology=ServingTopologyRef( + return RuntimePlacement( + topology=RuntimeTopologyRef( schema_topology_digest=topology_digest, logical_topology_ref=(f"tensorcast://placement/{topology_digest[:16]}"), ), - member=ServingBindingMemberRef( + member=RuntimeBindingMemberRef( member_id=str(member_id), member_index=int(member_index), member_count=int(member_count), @@ -567,26 +600,26 @@ class RuntimeProfile: source_catalog_policy: SourceCatalogPolicy | None = None @classmethod - def from_config(cls, serving_config: object) -> "RuntimeProfile": - return cls.from_serving_config(serving_config) + def from_config(cls, runtime_config: object) -> "RuntimeProfile": + return cls.from_runtime_config(runtime_config) @classmethod - def from_serving_config(cls, serving_config: object) -> "RuntimeProfile": + def from_runtime_config(cls, runtime_config: object) -> "RuntimeProfile": return cls( runtime_config=RuntimeConfig( - _mapping_from_object(getattr(serving_config, "runtime", None)) + _mapping_from_object(getattr(runtime_config, "runtime", None)) ), materialization_policy=MaterializationPolicy( - _mapping_from_object(getattr(serving_config, "materialization", None)) + _mapping_from_object(getattr(runtime_config, "materialization", None)) ), source_bound_contract=SourceBoundContractProfile( _mapping_from_object( - getattr(serving_config, "source_bound_contract", None) + getattr(runtime_config, "source_bound_contract", None) ) ), manifest_policy=ManifestPolicy(), source_catalog_policy=SourceCatalogPolicy( - _mapping_from_object(getattr(serving_config, "source_catalog", None)) + _mapping_from_object(getattr(runtime_config, "source_catalog", None)) ), ) @@ -724,7 +757,7 @@ def admit(self, request: AdmissionRequest) -> AdmissionDecision: @dataclass(frozen=True) -class IntegrationHost: +class RuntimeHostCapabilities: framework: FrameworkHost placement: PlacementHost source_catalog: SourceCatalogProvider | None = None @@ -736,6 +769,12 @@ class IntegrationHost: admission: AdmissionPolicy | None = None +IntegrationHost = RuntimeHostCapabilities +RuntimeAdmissionDecision = AdmissionDecision +RuntimeAdmissionPolicy = AdmissionPolicy +RuntimeAdmissionRequest = AdmissionRequest + + def semantic_placement_digest( *, kind: str, @@ -811,7 +850,9 @@ def _stable_digest(value: object) -> str: "FrameworkHost", "FrameworkIdentity", "IntegrationHost", + "ManifestPolicy", "MaterializationExecutionFacts", + "MaterializationPolicy", "NativeLoadHost", "ObservabilitySink", "PLACEMENT_ADMISSION_FACTS_SCHEMA_VERSION", @@ -823,6 +864,15 @@ def _stable_digest(value: object) -> str: "RecipeCachePolicy", "RecipeTraceHost", "RECIPE_CACHE_POLICY_SCHEMA_VERSION", + "RuntimeConfig", + "RuntimeAdmissionDecision", + "RuntimeAdmissionPolicy", + "RuntimeAdmissionRequest", + "RuntimeHostCapabilities", + "RuntimePlacement", + "RuntimeProfile", + "SourceBoundContractProfile", + "SourceCatalogPolicy", "SourceCatalogProvider", "SourceCatalogRequest", "SOURCE_CATALOG_REQUEST_SCHEMA_VERSION", @@ -835,6 +885,6 @@ def _stable_digest(value: object) -> str: "TensorCastEvent", "TensorSurfaceHost", "TorchTensorHost", + "runtime_placement_from_framework_facts", "semantic_placement_digest", - "serving_placement_from_framework_facts", ] diff --git a/tensorcast/artifact_runtime/intent.py b/tensorcast/artifact_runtime/intent.py new file mode 100644 index 00000000..7b016af5 --- /dev/null +++ b/tensorcast/artifact_runtime/intent.py @@ -0,0 +1,75 @@ +# Copyright (c) 2026, TensorCast Team. + +"""Artifact runtime intent DTOs.""" + +from __future__ import annotations + +from collections.abc import Mapping +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Any + +from tensorcast.artifact_runtime.errors import AuthorityValidationError +from tensorcast.artifact_runtime.locator import ArtifactLocator +from tensorcast.artifact_runtime.policy import RuntimePolicy +from tensorcast.retained_realization_authority import ( + ParsedRetainedRealizationAuthority, +) + +if TYPE_CHECKING: + from tensorcast.artifact_runtime.host import RecipeCachePolicy, SourceSelector + + +@dataclass(frozen=True) +class BootstrapPolicy: + fields: Mapping[str, object] = field(default_factory=dict) + + +class RuntimeIntent: + """Marker base class for artifact runtime lifecycle intent DTOs.""" + + +@dataclass(frozen=True) +class ExistingRuntimeArtifact(RuntimeIntent): + artifact_locator: ArtifactLocator | object + policy: RuntimePolicy | object | None = None + + +@dataclass(frozen=True) +class LocalSourceBootstrap(RuntimeIntent): + source_selector: SourceSelector + bootstrap_policy: Any + cache_policy: RecipeCachePolicy | None = None + + +@dataclass(frozen=True) +class RetainedBindingAcquire(RuntimeIntent): + authority: ParsedRetainedRealizationAuthority + + def __post_init__(self) -> None: + if not isinstance(self.authority, ParsedRetainedRealizationAuthority): + raise AuthorityValidationError( + "RetainedBindingAcquire.authority must be " + "ParsedRetainedRealizationAuthority" + ) + + +@dataclass(frozen=True) +class RequestContext: + framework_config: object | None = None + model_config: object | None = None + target_device: object | None = None + timeout_s: float | None = 30.0 + + +RuntimeRequestContext = RequestContext + + +__all__ = [ + "BootstrapPolicy", + "ExistingRuntimeArtifact", + "LocalSourceBootstrap", + "RequestContext", + "RetainedBindingAcquire", + "RuntimeIntent", + "RuntimeRequestContext", +] diff --git a/tensorcast/serving/_runtime_impl/lifecycle.py b/tensorcast/artifact_runtime/lifecycle.py similarity index 73% rename from tensorcast/serving/_runtime_impl/lifecycle.py rename to tensorcast/artifact_runtime/lifecycle.py index 94a22d3d..e0ad19bb 100644 --- a/tensorcast/serving/_runtime_impl/lifecycle.py +++ b/tensorcast/artifact_runtime/lifecycle.py @@ -1,10 +1,9 @@ # Copyright (c) 2026, TensorCast Team. -"""Serving lifecycle implementation for TensorCast framework integrations. +"""Artifact-runtime lifecycle implementation for framework integrations. -New framework integrations should prefer the narrow public modules -``tensorcast.serving.runtime``, ``tensorcast.serving.hosts`` and -``tensorcast.serving.testing``. This module owns lifecycle orchestration and -keeps low-level helpers out of the framework-facing host/runtime modules. +New framework integrations should prefer the artifact-runtime public modules +and runtime testing fixtures. This module owns lifecycle orchestration and keeps +low-level helpers out of the framework-facing host/runtime modules. """ from __future__ import annotations @@ -13,22 +12,34 @@ import json import logging import os -from collections.abc import Callable, Iterator, Mapping, Sequence -from contextlib import contextmanager +import time +from collections.abc import Callable, Mapping, Sequence from dataclasses import dataclass, replace -from pathlib import Path from types import SimpleNamespace -from typing import Any, cast +from typing import Any, NoReturn, cast import torch import tensorcast as tc +import tensorcast.artifact_runtime.binding.execution as tc_binding_runtime +import tensorcast.artifact_runtime.config as tc_runtime_config +import tensorcast.artifact_runtime.contract as tc_contract +import tensorcast.artifact_runtime.diagnostics as tc_diagnostics +import tensorcast.artifact_runtime.intent as tc_runtime_intent +import tensorcast.artifact_runtime.publication.replica as tc_replica_publication +import tensorcast.artifact_runtime.readiness as tc_readiness +import tensorcast.artifact_runtime.recipe.local_ready as tc_local_ready +import tensorcast.artifact_runtime.recipe.semantic_validation as tc_semantic_validation +import tensorcast.artifact_runtime.recipe.tensor_schema as tc_tensor_schema +import tensorcast.artifact_runtime.request_facts as tc_request_facts +import tensorcast.artifact_runtime.source as tc_source_catalog from tensorcast.api.store.common import canonical_index_to_bytes from tensorcast.api.store.realization_kernel import ( ArtifactRealizationHandle, ArtifactRealizationReport, ArtifactRealizationSpec, RealizationTargetPlan, + ResolvedArtifactSelection, artifact_realization_report_to_dict, emit_artifact_realization_profile_event, envelope_for_runtime_attachment, @@ -37,45 +48,114 @@ resolve_artifact_selection, ) from tensorcast.api.store.types import CanonicalIndex, CanonicalIndexEntry -from tensorcast.serving import binding_runtime as tc_binding_runtime -from tensorcast.serving import config as tc_config -from tensorcast.serving import contract as tc_contract -from tensorcast.serving import diagnostics as tc_diagnostics -from tensorcast.serving import dto as tc_dto -from tensorcast.serving import errors as tc_errors -from tensorcast.serving import hosts as tc_hosts -from tensorcast.serving import local_ready as tc_local_ready -from tensorcast.serving import policy as tc_policy -from tensorcast.serving import readiness as tc_readiness -from tensorcast.serving import recipe_build as tc_recipe_build -from tensorcast.serving import replica_publication as tc_replica_publication -from tensorcast.serving import retained_binding as tc_retained_binding -from tensorcast.serving import runtime_attachment as tc_runtime_attachment -from tensorcast.serving import runtime_config as tc_runtime_config -from tensorcast.serving import runtime_contract as tc_runtime_contract -from tensorcast.serving import runtime_intent as tc_runtime_intent -from tensorcast.serving import runtime_view as tc_runtime_view -from tensorcast.serving import session as tc_session -from tensorcast.serving import source_catalog as tc_source_catalog -from tensorcast.serving.builder import compiler as tc_compiler -from tensorcast.serving.builder import materialization as tc_materialization -from tensorcast.serving.builder import publication as tc_publication -from tensorcast.serving.builder import recipe_cache as tc_recipe_cache -from tensorcast.serving.builder import recipe_validation as tc_recipe_validation -from tensorcast.serving.builder import semantic_validation as tc_semantic_validation -from tensorcast.serving.builder import tensor_schema as tc_tensor_schema -from tensorcast.serving.builder import trace_cache as tc_trace_cache -from tensorcast.serving.builder.compiler import TracePlan -from tensorcast.serving.resolver import ( - ResolvedServingArtifact, - ServingArtifactResolver, +from tensorcast.artifact_runtime.artifact.resolver import ( + ResolvedRuntimeArtifact, + RuntimeArtifactResolver, canonical_index_from_descriptor, - is_reserved_serving_tensor_name, + is_reserved_runtime_tensor_name, +) +from tensorcast.artifact_runtime.attachment import ( + RuntimeAttachment, + RuntimeBindingState, + RuntimeBindingView, + RuntimeStateSeed, +) +from tensorcast.artifact_runtime.binding.retained import ( + RestoredRetainedBinding, + restore_prepared_local_ready_binding, + restore_retained_binding, + runtime_restore_rejection_reason, +) +from tensorcast.artifact_runtime.dto import ( + FrameworkIntegrationContext, + PreparedRuntimeArtifact, + RuntimeBindingValue, + RuntimePlacement, +) +from tensorcast.artifact_runtime.errors import ( + AdmissionRejectedError, + ArtifactLocatorResolutionError, + ArtifactRuntimeIntegrationError, + ArtifactRuntimeNotImplementedError, + AttachFinalizeError, + AuthorityValidationError, + CapabilityMissingError, + ConfigConflictError, + ManifestMismatchError, + OwnershipTransferError, + PlacementAdmissionError, + RestoreBindingError, + SchemaMismatchError, + SourceProviderError, + SourceSubjectError, +) +from tensorcast.artifact_runtime.errors import ( + capability_missing as _capability_missing, +) +from tensorcast.artifact_runtime.host import ( + AdmissionDecision, + AdmissionRequest, + DefaultAdmissionPolicy, + FrameworkHost, + FrameworkIdentity, + IntegrationHost, + MaterializationExecutionFacts, + MaterializationPolicy, + PlacementAdmissionFacts, + PlacementIdentityFacts, + PlacementMemberFacts, + RecipeCachePolicy, + RuntimeProfile, + SourceCatalogRequest, + SourceDownloadPolicy, + SourceHost, + SourceSelector, + SourceSubjectCoordinator, + TensorSurfaceHost, + TorchTensorHost, + runtime_placement_from_framework_facts, +) +from tensorcast.artifact_runtime.locator import ( + ArtifactLocator, +) +from tensorcast.artifact_runtime.policy import ( + RuntimePolicy, +) +from tensorcast.artifact_runtime.recipe.build import ( + RecipeBuildCacheConfig, + RecipeBuildSession, + RecipeBuildSessionRequest, + RuntimeBindingPlan, + recipe_build_cache_config_from_policy, +) +from tensorcast.artifact_runtime.recipe.build import ( + build_recipe_session as build_recipe_session_from_request, +) +from tensorcast.artifact_runtime.recipe.compiler import ( + TensorcastSemanticValidationSpec, + TensorSchemaEntry, +) +from tensorcast.artifact_runtime.recipe.trace_ir import TracePlan +from tensorcast.artifact_runtime.source import ( + SourceSubject, + is_public_disk_source_subject, + resolve_source_subject, + source_subject_broadcast_payload, + source_subject_from_broadcast_payload, +) +from tensorcast.artifact_runtime.view import ( + RuntimeWorkerView, + source_selection_projection_from_artifact_realization_report, + source_selection_projection_from_execution_diagnostics, + source_selection_projection_from_materialization_diagnostics, +) +from tensorcast.retained_realization_authority import ( + ParsedRetainedRealizationAuthority, ) from tensorcast.types import ( CollectivePolicy, FinalizeClass, - ServingSupportLevel, + RuntimeSupportLevel, ) ArtifactError = tc.ArtifactError @@ -83,208 +163,136 @@ BindingReservationCapability = tc.BindingReservationCapability BindingValueRef = tc.BindingValueRef BuilderMode = tc.BuilderMode -CompiledServingRecipe = tc_compiler.CompiledServingRecipe -BindingFinalizeMaterializationResult = ( - tc_materialization.BindingFinalizeMaterializationResult -) DEFAULT_RUNTIME_PROFILE = tc_runtime_config.DEFAULT_RUNTIME_PROFILE LOCAL_READY_BOOTSTRAP_BUILD_PIPELINE_VERSION = ( tc_local_ready.LOCAL_READY_BOOTSTRAP_BUILD_PIPELINE_VERSION ) _LOGGER = logging.getLogger(__name__) -FamilyReadiness = tc_dto.FamilyReadiness -FrameworkIntegrationContext = tc_dto.FrameworkIntegrationContext -PreparedServingArtifact = tc_dto.PreparedServingArtifact -ServingBindingValue = tc_dto.ServingBindingValue PublishedModelVersion = tc.PublishedModelVersion -ServingBindingPlan = tc_recipe_build.ServingBindingPlan -RecipeBuildCacheConfig = tc_recipe_build.RecipeBuildCacheConfig -RecipeBuildRunResult = tc_recipe_build.RecipeBuildRunResult -RecipeCacheLookupResult = tc_recipe_build.RecipeCacheLookupResult -RecipeCacheWriteResult = tc_recipe_build.RecipeCacheWriteResult -RecipeBuildSession = tc_recipe_build.RecipeBuildSession -COMPILED_RECIPE_MEMORY_CACHE = tc_recipe_build.COMPILED_RECIPE_MEMORY_CACHE -TRACE_PLAN_MEMORY_CACHE = tc_recipe_build.TRACE_PLAN_MEMORY_CACHE -RecipeCompileInputs = tc_compiler.RecipeCompileInputs -RecipePublicationContext = tc_publication.RecipePublicationContext -ParsedRetainedServingBindingAuthority = ( - tc_retained_binding.ParsedRetainedServingBindingAuthority -) GroupRealizationAcquireRef = tc.GroupRealizationAcquireRef -RuntimeTensorView = tc_dto.RuntimeTensorView SOURCE_BOUND_CONTRACT_PATH_COLLECTIVE_FIRST_V4 = ( - tc_runtime_contract.SOURCE_BOUND_CONTRACT_PATH_COLLECTIVE_FIRST_V4 + tc_contract.SOURCE_BOUND_CONTRACT_PATH_COLLECTIVE_FIRST_V4 ) SERVING_MANIFEST_TENSOR_NAME = tc.SERVING_MANIFEST_TENSOR_NAME -ServingBindingState = tc_session.ServingBindingState -ServingArtifactManifest = tc.ServingArtifactManifest -ServingConfig = tc_config.ServingConfig -ReplicaPublicationPolicy = tc_config.ReplicaPublicationPolicy -ServingBindingMemberRef = tc.ServingBindingMemberRef -ServingPlacement = tc_dto.ServingPlacement -ServingRuntimePolicy = tc.ServingRuntimePolicy -SourceBoundContractState = tc_runtime_contract.SourceBoundContractState -source_bound_contract_profile_fields = ( - tc_runtime_contract.source_bound_contract_profile_fields -) +RuntimeArtifactManifest = tc.RuntimeArtifactManifest +TensorCastRuntimeConfig = tc_runtime_config.TensorCastRuntimeConfig +ReplicaPublicationPolicy = tc_runtime_config.ReplicaPublicationPolicy +RuntimeBindingMemberRef = tc.RuntimeBindingMemberRef +RuntimeArtifactPolicy = tc.RuntimeArtifactPolicy +SourceBoundContractState = tc_contract.SourceBoundContractState +source_bound_contract_profile_fields = tc_contract.source_bound_contract_profile_fields SourceCatalog = tc_source_catalog.SourceCatalog SOURCE_CATALOG_SCHEMA_VERSION = tc_source_catalog.SOURCE_CATALOG_SCHEMA_VERSION -AdmissionRejectedError = tc_errors.AdmissionRejectedError -ArtifactLocatorResolutionError = tc_errors.ArtifactLocatorResolutionError -AttachFinalizeError = tc_errors.AttachFinalizeError -AuthorityValidationError = tc_errors.AuthorityValidationError -CapabilityMissingError = tc_errors.CapabilityMissingError -ConfigConflictError = tc_errors.ConfigConflictError -ManifestMismatchError = tc_errors.ManifestMismatchError -OwnershipTransferError = tc_errors.OwnershipTransferError -PlacementAdmissionError = tc_errors.PlacementAdmissionError -PolicyMismatchError = tc_errors.PolicyMismatchError -PublicationRequiredError = tc_errors.PublicationRequiredError -ReplicaPublicationError = tc_errors.ReplicaPublicationError -RestoreBindingError = tc_errors.RestoreBindingError -RuntimeSwapError = tc_errors.RuntimeSwapError -SchemaMismatchError = tc_errors.SchemaMismatchError -ServingIntegrationError = tc_errors.ServingIntegrationError -ServingIntegrationNotImplementedError = tc_errors.ServingIntegrationNotImplementedError -SourceProviderError = tc_errors.SourceProviderError -SourceSubjectError = tc_errors.SourceSubjectError -TensorCastServingRuntimeError = tc_errors.TensorCastServingRuntimeError -_capability_missing = tc_errors.capability_missing - -RuntimeAttachment = tc_runtime_attachment.RuntimeAttachment -RuntimeBindingState = tc_runtime_attachment.RuntimeBindingState -RuntimeBindingView = tc_runtime_attachment.RuntimeBindingView -RuntimeStateSeed = tc_runtime_attachment.RuntimeStateSeed - -BindingValueRefProjection = tc_runtime_view.BindingValueRefProjection -MaterializationDiagnosticsProjection = ( - tc_runtime_view.MaterializationDiagnosticsProjection -) -PublishedReplicaProjection = tc_runtime_view.PublishedReplicaProjection -ReloadRequestProjection = tc_runtime_view.ReloadRequestProjection -ReloadResponseProjection = tc_runtime_view.ReloadResponseProjection -RuntimeEndpointProjection = tc_runtime_view.RuntimeEndpointProjection -RuntimeWorkerView = tc_runtime_view.RuntimeWorkerView -SourceBoundContractProjection = tc_runtime_view.SourceBoundContractProjection -SourceSelectionProjection = tc_runtime_view.SourceSelectionProjection -WeightVersionProjection = tc_runtime_view.WeightVersionProjection -source_selection_projection_from_artifact_realization_report = ( - tc_runtime_view.source_selection_projection_from_artifact_realization_report -) -source_selection_projection_from_execution_diagnostics = ( - tc_runtime_view.source_selection_projection_from_execution_diagnostics -) -source_selection_projection_from_materialization_diagnostics = ( - tc_runtime_view.source_selection_projection_from_materialization_diagnostics + +ModelRuntimeRequestFactsError = tc_request_facts.ModelRuntimeRequestFactsError +resolve_model_runtime_request_facts = ( + tc_request_facts.resolve_model_runtime_request_facts ) -# Host capability contracts live in hosts.py. Lifecycle uses module-local -# aliases only to keep the orchestration code readable. -AdmissionDecision = tc_hosts.AdmissionDecision -AdmissionPolicy = tc_hosts.AdmissionPolicy -AdmissionRequest = tc_hosts.AdmissionRequest -CollectiveHost = tc_hosts.CollectiveHost -DefaultAdmissionPolicy = tc_hosts.DefaultAdmissionPolicy -FinalizeHookHost = tc_hosts.FinalizeHookHost -FinalizePhase = tc_hosts.FinalizePhase -FinalizePolicy = tc_hosts.FinalizePolicy -FrameworkHost = tc_hosts.FrameworkHost -FrameworkIdentity = tc_hosts.FrameworkIdentity -IntegrationHost = tc_hosts.IntegrationHost -ManifestPolicy = tc_hosts.ManifestPolicy -MaterializationExecutionFacts = tc_hosts.MaterializationExecutionFacts -MaterializationPolicy = tc_hosts.MaterializationPolicy -NativeLoadHost = tc_hosts.NativeLoadHost -ObservabilitySink = tc_hosts.ObservabilitySink -PlacementAdmissionFacts = tc_hosts.PlacementAdmissionFacts -PlacementHost = tc_hosts.PlacementHost -PlacementIdentityFacts = tc_hosts.PlacementIdentityFacts -PlacementMemberFacts = tc_hosts.PlacementMemberFacts -RecipeCachePolicy = tc_hosts.RecipeCachePolicy -RecipeTraceHost = tc_hosts.RecipeTraceHost -RuntimeConfig = tc_hosts.RuntimeConfig -RuntimeProfile = tc_hosts.RuntimeProfile -SourceBoundContractProfile = tc_hosts.SourceBoundContractProfile -SourceCatalogPolicy = tc_hosts.SourceCatalogPolicy -SourceCatalogProvider = tc_hosts.SourceCatalogProvider -SourceCatalogRequest = tc_hosts.SourceCatalogRequest -SourceDownloadPolicy = tc_hosts.SourceDownloadPolicy -SourceHost = tc_hosts.SourceHost -SourceSelector = tc_hosts.SourceSelector -SourceSubjectCoordinator = tc_hosts.SourceSubjectCoordinator -TensorCastEvent = tc_hosts.TensorCastEvent -TensorSurfaceHost = tc_hosts.TensorSurfaceHost -TorchTensorHost = tc_hosts.TorchTensorHost -semantic_placement_digest = tc_hosts.semantic_placement_digest -serving_placement_from_framework_facts = tc_hosts.serving_placement_from_framework_facts -TensorcastSemanticValidationSpec = tc_compiler.TensorcastSemanticValidationSpec -TensorcastServingFacts = tc_compiler.TensorcastServingFacts -TensorSchemaEntry = tc_compiler.TensorSchemaEntry -read_source_bound_contract_state = tc_runtime_contract.read_source_bound_contract_state +read_source_bound_contract_state = tc_contract.read_source_bound_contract_state resolve_runtime_config_profile = tc_runtime_config.resolve_runtime_config_profile -RUNTIME_ENDPOINT_PROJECTION_SCHEMA_VERSION = ( - tc_runtime_view.RUNTIME_ENDPOINT_PROJECTION_SCHEMA_VERSION -) -WEIGHT_VERSION_PROJECTION_SCHEMA_VERSION = ( - tc_runtime_view.WEIGHT_VERSION_PROJECTION_SCHEMA_VERSION -) -RELOAD_RESPONSE_PROJECTION_SCHEMA_VERSION = ( - tc_runtime_view.RELOAD_RESPONSE_PROJECTION_SCHEMA_VERSION -) -PUBLISHED_REPLICA_PROJECTION_SCHEMA_VERSION = ( - tc_runtime_view.PUBLISHED_REPLICA_PROJECTION_SCHEMA_VERSION -) -SOURCE_SELECTION_PROJECTION_SCHEMA_VERSION = ( - tc_runtime_view.SOURCE_SELECTION_PROJECTION_SCHEMA_VERSION -) -SERVING_ARTIFACT_LOCATOR_SCHEMA_VERSION = ( - tc_policy.SERVING_ARTIFACT_LOCATOR_SCHEMA_VERSION -) binding_layout_debug_payload = tc_diagnostics.binding_layout_debug_payload binding_layout_profile_fields = tc_diagnostics.binding_layout_profile_fields binding_layout_tensor_count = tc_diagnostics.binding_layout_tensor_count -SERVING_POLICY_SCHEMA_VERSION = tc_policy.SERVING_POLICY_SCHEMA_VERSION -ServingArtifactLocator = tc_policy.ServingArtifactLocator -ServingPolicy = tc_policy.ServingPolicy -normalize_serving_reload_request_payload = ( - tc_policy.normalize_serving_reload_request_payload -) -merge_serving_reload_extra_config = tc_policy.merge_serving_reload_extra_config -load_source_tensors_for_recipe = tc_materialization.load_source_tensors_for_recipe -materialize_recipe_copy_plan_tensors = ( - tc_materialization.materialize_recipe_copy_plan_tensors -) -materialize_pure_transform_serving_tensors = ( - tc_materialization.materialize_pure_transform_serving_tensors -) -materialize_binding_finalize_serving_tensors = ( - tc_materialization.materialize_binding_finalize_serving_tensors -) -collect_serving_tensors_from_model = ( - tc_materialization.collect_serving_tensors_from_model -) -run_binding_finalize_semantic_validation = ( - tc_materialization.run_binding_finalize_semantic_validation -) -validate_binding_finalize_tensor_schema = ( - tc_materialization.validate_binding_finalize_tensor_schema -) -complete_pure_transform_recipe_publication_from_recipe = ( - tc_publication.complete_pure_transform_recipe_publication -) PLACEMENT_IDENTITY_FACTS_SCHEMA_VERSION = 1 PLACEMENT_ADMISSION_FACTS_SCHEMA_VERSION = 1 SOURCE_DOWNLOAD_POLICY_SCHEMA_VERSION = 1 RECIPE_CACHE_POLICY_SCHEMA_VERSION = 1 SOURCE_CATALOG_REQUEST_SCHEMA_VERSION = 1 +__all__ = [ + "AdmissionDecision", + "AdmissionRejectedError", + "AdmissionRequest", + "ArtifactLocatorResolutionError", + "ArtifactRuntimeIntegration", + "ArtifactRuntimeIntegrationError", + "ArtifactRuntimeNotImplementedError", + "ArtifactRuntimeSession", + "AttachFinalizeError", + "AuthorityValidationError", + "BootstrapPolicy", + "CapabilityMissingError", + "ConfigConflictError", + "DefaultAdmissionPolicy", + "ExistingRuntimeArtifact", + "FinalizeClass", + "FrameworkIdentity", + "IntegrationHost", + "LocalReadyBindingContract", + "LocalReadyManifestCarrierResult", + "LocalReadyMaterializationIdentity", + "LocalSourceBootstrap", + "ManifestMismatchError", + "MaterializationExecutionFacts", + "OwnershipTransferError", + "PLACEMENT_ADMISSION_FACTS_SCHEMA_VERSION", + "PLACEMENT_IDENTITY_FACTS_SCHEMA_VERSION", + "PlacementAdmissionError", + "PlacementAdmissionFacts", + "PlacementIdentityFacts", + "PlacementMemberFacts", + "RECIPE_CACHE_POLICY_SCHEMA_VERSION", + "SERVING_MANIFEST_TENSOR_NAME", + "SOURCE_CATALOG_REQUEST_SCHEMA_VERSION", + "SOURCE_CATALOG_SCHEMA_VERSION", + "SOURCE_DOWNLOAD_POLICY_SCHEMA_VERSION", + "RecipeBuildSessionRequest", + "RecipeCachePolicy", + "RequestContext", + "RestoreBindingError", + "RetainedBindingAcquire", + "RuntimeAttachment", + "RuntimeBindingMaterialization", + "RuntimeBindingPlan", + "RuntimeBindingResult", + "RuntimeBindingState", + "RuntimeBindingView", + "RuntimeLoadResult", + "RuntimePlacement", + "RuntimeProfile", + "RuntimeReloadResult", + "RuntimeStateSeed", + "RuntimeSupportLevel", + "RuntimeWorkerView", + "SchemaMismatchError", + "SourceCatalogRequest", + "SourceDownloadPolicy", + "SourceHost", + "SourceProviderError", + "SourceSelector", + "SourceSubject", + "TensorSchemaEntry", + "TorchTensorHost", + "TensorcastSemanticValidationSpec", + "_DirectRuntimeLoad", + "_LocalReadyBootstrap", + "_LocalReadyFinalize", + "_RetainedBindingAcquire", + "_RuntimeReload", + "bind_runtime_artifact", + "build_local_ready_prepared_artifact", + "is_runtime_binding_swap_capable", + "local_ready_current_value_summary_fields", + "restore_prepared_local_ready_binding", + "restore_retained_binding", + "runtime_binding_state_from_runtime_view", + "runtime_placement_from_framework_facts", + "source_selection_projection_from_artifact_realization_report", + "source_selection_projection_from_execution_diagnostics", + "source_selection_projection_from_materialization_diagnostics", + "source_subject_broadcast_payload", + "source_subject_from_broadcast_payload", + "swap_runtime_artifact", +] + BootstrapPolicy = tc_runtime_intent.BootstrapPolicy -ServingIntent = tc_runtime_intent.ServingIntent -ExistingServingArtifact = tc_runtime_intent.ExistingServingArtifact +RuntimeIntent = tc_runtime_intent.RuntimeIntent +ExistingRuntimeArtifact = tc_runtime_intent.ExistingRuntimeArtifact LocalSourceBootstrap = tc_runtime_intent.LocalSourceBootstrap RetainedBindingAcquire = tc_runtime_intent.RetainedBindingAcquire RequestContext = tc_runtime_intent.RequestContext @@ -313,16 +321,19 @@ def attach_and_finalize( run_post_bind_finalize: bool = True, expected_tensor_schema_hash: str | None = None, semantic_validation_spec: Any | None = None, + model_runtime_spec: ArtifactRealizationSpec | None = None, ) -> RuntimeBindingState: owner: Any = binding_handle transferred = False try: + attach_start = time.perf_counter() self._emit("runtime_materialization.attach.start", state_seed) self._attach_bound_tensors( model, tensors, replace_meta_params=replace_meta_params, ) + attach_done = time.perf_counter() canonical = self._collect_runtime_tensors( model, remove_duplicate=False, @@ -372,16 +383,35 @@ def attach_and_finalize( if callable(transfer_to_runtime): owner = transfer_to_runtime() transferred = True + finalize_done = time.perf_counter() view = state_seed.runtime_view() + realization_report = state_seed.realization_report + if realization_report is not None: + realization_report = replace( + realization_report, + runtime_attach_sec=max(0.0, attach_done - attach_start), + runtime_finalize_sec=max(0.0, finalize_done - attach_done), + total_sec=max(0.0, finalize_done - attach_start), + ) realization_handle = _runtime_attachment_realization_handle( - report=state_seed.realization_report, + report=realization_report, binding_handle=binding_handle, owner=owner, ) model_runtime_ref: dict[str, RuntimeBindingState] = {} - model_runtime_handle = _model_runtime_realization_handle( - context=context, - target_device=target_device, + model_runtime_handle = _model_runtime_realization_handle_for_spec( + spec=( + _model_runtime_spec_with_context_defaults( + spec=model_runtime_spec, + context=context, + target_device=target_device, + ) + if model_runtime_spec is not None + else _model_runtime_spec_for_context( + context=context, + target_device=target_device, + ) + ), runtime_attachment_handle=realization_handle, attach_fn=lambda **_kwargs: model_runtime_ref["state"], ) @@ -407,6 +437,9 @@ def attach_and_finalize( return state except OwnershipTransferError: raise + except ModelRuntimeRequestFactsError: + self._close_quietly(owner) + raise except SchemaMismatchError: self._close_quietly(owner) raise @@ -449,13 +482,15 @@ def _run_semantic_validation( model_config: object | None, ) -> Any: if getattr(spec, "kind", None) == "none": - return evaluate_semantic_validation_spec(spec, None) + return tc_semantic_validation.evaluate_semantic_validation_spec(spec, None) hook_host = self.host.framework semantic_probes = getattr(hook_host, "semantic_probes", None) actual_payload = ( semantic_probes(model, model_config) if callable(semantic_probes) else None ) - return evaluate_semantic_validation_spec(spec, actual_payload) + return tc_semantic_validation.evaluate_semantic_validation_spec( + spec, actual_payload + ) def _surface(self) -> TensorSurfaceHost: if self.host.tensor_surface is None: @@ -558,7 +593,7 @@ class _HostMaterializationRequest: @dataclass(frozen=True) -class _DirectServingLoad: +class _DirectRuntimeLoad: artifact_locator: Any | None = None policy: Any | None = None materialization: Any | None = None @@ -566,7 +601,7 @@ class _DirectServingLoad: source_bound_contract_state: Any | None = None source_bound_contract_path: str | None = None execution_facts: Mapping[str, Any] | None = None - operation_scope: str = "startup.direct_serving_artifact.bind" + operation_scope: str = "startup.direct_runtime_artifact.bind" require_materialization_options: bool = False framework_config: Any | None = None model_config: Any | None = None @@ -574,21 +609,23 @@ class _DirectServingLoad: expected_member: Any | None = None timeout_s: float | None = 30.0 artifact_ref: str | None = None - resolved_artifact: ResolvedServingArtifact | None = None + source_selection: ResolvedArtifactSelection | None = None + resolved_artifact: ResolvedRuntimeArtifact | None = None model: Any | None = None + model_runtime_spec: ArtifactRealizationSpec | None = None @dataclass(frozen=True) -class ServingLoadResult: +class RuntimeLoadResult: model: Any | None = None runtime_state: RuntimeBindingState | None = None runtime_view: RuntimeBindingView | None = None - resolved_artifact: ResolvedServingArtifact | None = None + resolved_artifact: ResolvedRuntimeArtifact | None = None binding_result: RuntimeBindingResult | None = None @dataclass(frozen=True) -class _ServingReload: +class _RuntimeReload: current_state: RuntimeBindingState | Any artifact_locator: Any | None = None policy: Any | None = None @@ -604,27 +641,27 @@ class _ServingReload: model_config: Any | None = None target_device: Any | None = None artifact_ref: str | None = None - resolved_artifact: ResolvedServingArtifact | None = None + resolved_artifact: ResolvedRuntimeArtifact | None = None model: Any | None = None @dataclass(frozen=True) -class ServingReloadResult: +class RuntimeReloadResult: runtime_state: RuntimeBindingState | None = None runtime_view: RuntimeBindingView | None = None - resolved_artifact: ResolvedServingArtifact | None = None + resolved_artifact: ResolvedRuntimeArtifact | None = None binding_result: RuntimeBindingResult | None = None @dataclass(frozen=True) -class _ServingArtifactPreflight: - resolved_artifact: ResolvedServingArtifact - serving_runtime_policy: Any | None +class _RuntimeArtifactPreflight: + resolved_artifact: ResolvedRuntimeArtifact + runtime_artifact_policy: Any | None @dataclass(frozen=True) class _RetainedBindingAcquire: - authority: Any | None = None + authority: ParsedRetainedRealizationAuthority | None = None framework_config: Any | None = None model_config: Any | None = None target_device: Any | None = None @@ -633,6 +670,7 @@ class _RetainedBindingAcquire: client: Any | None = None restore_fn: Any | None = None timeout_s: float | None = 30.0 + model_runtime_spec: ArtifactRealizationSpec | None = None @dataclass(frozen=True) @@ -648,7 +686,7 @@ class _LocalReadyBootstrap: """Internal lowering payload for ``LocalSourceBootstrap``. This is deliberately private: framework integrations enter through - ``ServingIntegration.start(LocalSourceBootstrap, context)`` and host facts. + ``ArtifactRuntimeIntegration.start(LocalSourceBootstrap, context)`` and host facts. """ source_selector: SourceSelector | Any | None = None @@ -673,6 +711,7 @@ class _LocalReadyBootstrap: source_subject: Any | None = None placement: Any | None = None source_artifact_ref: str | None = None + source_selection: ResolvedArtifactSelection | None = None serving_manifest_ref: str | None = None representation_contract_hash: str | None = None serving_build_digest: str | None = None @@ -700,6 +739,7 @@ class _LocalReadyBootstrap: framework_version: str | None = None adapter_version: str | None = None serving_abi_version: str | None = None + model_runtime_spec: ArtifactRealizationSpec | None = None @dataclass(frozen=True) @@ -718,6 +758,7 @@ class _LocalReadyFinalize: source_bound_contract_state: Any source_bound_contract_path: str target_device: Any + source_selection: ResolvedArtifactSelection | None = None manifest_bytes: bytes | None = None framework_config: Any | None = None model_config: Any | None = None @@ -737,15 +778,16 @@ class _LocalReadyFinalize: framework_version: str | None = None adapter_version: str | None = None serving_abi_version: str | None = None + model_runtime_spec: ArtifactRealizationSpec | None = None @dataclass(frozen=True) -class LocalReadyServingResult: +class LocalReadyRuntimeResult: model: Any | None = None runtime_state: RuntimeBindingState | None = None runtime_view: RuntimeBindingView | None = None - prepared: PreparedServingArtifact | None = None - binding_value: ServingBindingValue | None = None + prepared: PreparedRuntimeArtifact | None = None + binding_value: RuntimeBindingValue | None = None recipe: Any | None = None current_value: Any | None = None binding: Any | None = None @@ -756,26 +798,6 @@ class LocalReadyServingResult: realization_report: ArtifactRealizationReport | None = None -@dataclass(frozen=True) -class RecipeBuildSessionRequest: - source_subject: SourceSubject | Any | None = None - framework_config: Any | None = None - model_config: Any | None = None - placement: ServingPlacement | None = None - cache_config: Any | None = None - identity: ServingBindingPlan | None = None - trace_cache_schema_version: int | None = None - tp_rank: int | None = None - tp_world_size: int | None = None - - -@dataclass(frozen=True) -class RecipeBuildResult: - session: RecipeBuildSession - recipe: Any | None = None - diagnostics: Mapping[str, Any] | None = None - - @dataclass(frozen=True) class LocalReadyBindingContract: excluded_names: tuple[str, ...] @@ -836,7 +858,7 @@ def _canonical_index_bytes_from_tensors( def _canonical_index_bytes_for_runtime_selection( *, - resolved: ResolvedServingArtifact | Any | None, + resolved: ResolvedRuntimeArtifact | Any | None, tensors: Mapping[str, torch.Tensor], ) -> bytes: descriptor = getattr(resolved, "descriptor", None) @@ -863,11 +885,12 @@ def _target_layout_digest_for_runtime_attachment( def _runtime_attachment_report_for_resolved( *, - resolved: ResolvedServingArtifact | Any, + resolved: ResolvedRuntimeArtifact | Any, tensors: Mapping[str, torch.Tensor], binding_handle: Any | None, target_device: Any, tensor_schema_hash: str, + source_selection: ResolvedArtifactSelection | None = None, execution_diagnostics: Any | None = None, materialization_diagnostics: Any | None = None, ) -> ArtifactRealizationReport: @@ -885,14 +908,14 @@ def _runtime_attachment_report_for_resolved( ) envelope = envelope_for_runtime_attachment(tensors, retained=False) envelope.validate_for_target(target_plan) - selection = resolve_artifact_selection( + selection = source_selection or resolve_artifact_selection( artifact_id=str(getattr(resolved, "artifact_ref", "") or ""), canonical_index_bytes=_canonical_index_bytes_for_runtime_selection( resolved=resolved, tensors=tensors, ), tensor_names=tuple(str(name) for name in tensors), - artifact_profile="serving_artifact", + artifact_profile="runtime_artifact", authority_scope="daemon_mediated_runtime_attachment", ) return report_for_runtime_attachment( @@ -908,12 +931,13 @@ def _runtime_attachment_report_for_resolved( def _runtime_attachment_report_for_retained( *, - authority: tc_retained_binding.ParsedRetainedServingBindingAuthority, + authority: ParsedRetainedRealizationAuthority, tensors: Mapping[str, torch.Tensor], binding_handle: Any | None, target_device: Any, tensor_schema_hash: str, reservation_bytes: int, + source_selection: ResolvedArtifactSelection | None = None, ) -> ArtifactRealizationReport: binding_layout_id = _optional_text( getattr(binding_handle, "binding_layout_id", None) @@ -939,7 +963,7 @@ def _runtime_attachment_report_for_retained( or authority.local_serving_ref or authority.binding_value_ref.binding_value_id ) - selection = resolve_artifact_selection( + selection = source_selection or resolve_artifact_selection( artifact_id=str(artifact_id), canonical_index_bytes=_canonical_index_bytes_from_tensors(tensors), tensor_names=tuple(str(name) for name in tensors), @@ -965,6 +989,7 @@ def _runtime_attachment_report_for_artifact_id( tensor_schema_hash: str, artifact_profile: str, authority_scope: str, + source_selection: ResolvedArtifactSelection | None = None, retained: bool = False, reservation_bytes: int = 0, ) -> ArtifactRealizationReport: @@ -986,7 +1011,7 @@ def _runtime_attachment_report_for_artifact_id( reservation_bytes=reservation_bytes, ) envelope.validate_for_target(target_plan) - selection = resolve_artifact_selection( + selection = source_selection or resolve_artifact_selection( artifact_id=str(artifact_id), canonical_index_bytes=_canonical_index_bytes_from_tensors(tensors), tensor_names=tuple(str(name) for name in tensors), @@ -1057,6 +1082,21 @@ def _model_runtime_spec_for_context( ) +def _model_runtime_spec_with_context_defaults( + *, + spec: ArtifactRealizationSpec, + context: FrameworkIntegrationContext, + target_device: Any, +) -> ArtifactRealizationSpec: + facts = resolve_model_runtime_request_facts( + spec=spec, + runtime_context=RequestContext(target_device=target_device), + host_context=context, + host_target_device=target_device, + ) + return cast(ArtifactRealizationSpec, facts.spec) + + def _model_runtime_realization_handle( *, context: FrameworkIntegrationContext, @@ -1098,9 +1138,25 @@ def _model_runtime_realization_handle_for_spec( return handle +def _project_model_runtime_attachment( + state: RuntimeBindingState, + attachment: RuntimeAttachment, +) -> RuntimeAttachment: + handle = state.model_runtime_handle + if not isinstance(handle, ArtifactRealizationHandle): + return attachment + state.model_runtime_handle = ArtifactRealizationHandle( + target_kind="model_runtime", + report=handle.report, + attachment_value=attachment, + release_contract=handle.release_contract, + ) + return attachment + + @dataclass(frozen=True) class RuntimeBindingResult: - """Attach-ready result from a serving bind or swap operation.""" + """Attach-ready result from a runtime bind or swap operation.""" binding: Any tensors: Mapping[str, torch.Tensor] @@ -1130,125 +1186,6 @@ def from_binding( ) -@dataclass -class RestoredRetainedBinding: - """Restored retained binding tensors before runtime ownership transfer.""" - - _attached: tc_retained_binding.AttachedRetainedBinding - _runtime_handle: ( - tc_retained_binding.RuntimeRetainedBindingAttachmentHandle | None - ) = None - - @property - def tensors(self) -> Mapping[str, torch.Tensor]: - return self._attached.tensors - - @property - def binding_layout_id(self) -> str: - return self._attached.binding_layout_id - - @property - def binding_value_ref(self) -> tc.BindingValueRef: - return self._attached.binding_value_ref - - @property - def member_ref(self) -> tc.ServingBindingMemberRef: - return self._attached.member_ref - - @property - def reservation_bytes(self) -> int: - return self._attached.reservation_bytes - - @property - def authority(self) -> tc_retained_binding.ParsedRetainedServingBindingAuthority: - return self._attached.authority - - @property - def runtime_handle( - self, - ) -> tc_retained_binding.RuntimeRetainedBindingAttachmentHandle | None: - return self._runtime_handle - - def transfer_to_runtime( - self, - ) -> tc_retained_binding.RuntimeRetainedBindingAttachmentHandle: - if self._runtime_handle is None: - self._runtime_handle = self._attached.transfer_to_runtime() - return self._runtime_handle - - def close(self) -> None: - if self._runtime_handle is None: - self._attached.close() - - -@dataclass(frozen=True) -class SourceSubject: - """Opaque framework-facing source subject wrapper.""" - - artifact_ref: str - subject: Any - source_kind: str = "opaque" - metadata_fingerprint: str | None = None - - def broadcast_payload(self) -> dict[str, Any]: - if self.source_kind == "public_disk": - subject_payload = _public_disk_source_payload(self.subject) - else: - subject_payload = self.subject - return { - "kind": self.source_kind, - "artifact_ref": self.artifact_ref, - "subject": subject_payload, - "metadata_fingerprint": self.metadata_fingerprint, - } - - def profile_fields(self) -> dict[str, Any]: - source = self.subject - fields: dict[str, Any] = { - "artifact_ref": self.artifact_ref, - "source_kind": self.source_kind, - } - if self.metadata_fingerprint is not None: - fields["metadata_fingerprint"] = self.metadata_fingerprint - canonical_index = getattr(source, "canonical_index_bytes", None) - if canonical_index is not None: - fields["canonical_index_bytes"] = len(canonical_index) - source_index = getattr(source, "source_index_bytes", None) - if source_index is not None: - fields["source_index_bytes"] = len(bytes(source_index or b"")) - for name in ("format_kind", "metadata_capability"): - value = getattr(source, name, None) - if value is not None: - fields[name] = str(value or "") - return fields - - -def _public_disk_source_payload(source: Any) -> dict[str, Any]: - return { - "path": str(getattr(source, "path", "") or ""), - "canonical_index_bytes": bytes(source.canonical_index_bytes), - "artifact_id": str(getattr(source, "artifact_id", "") or ""), - "generation": int(getattr(source, "generation", 0) or 0), - "verify_checksums": bool(getattr(source, "verify_checksums", True)), - "trusted_content_artifact_id": _optional_str( - getattr(source, "trusted_content_artifact_id", None) - ), - "source_index_bytes": _optional_bytes( - getattr(source, "source_index_bytes", None) - ), - "format_kind": _enum_wire_value(getattr(source, "format_kind", None)), - "metadata_capability": _enum_wire_value( - getattr(source, "metadata_capability", None) - ), - "resolution_strategy": _enum_wire_value( - getattr(source, "resolution_strategy", None) - ), - "validation_mode": _enum_wire_value(getattr(source, "validation_mode", None)), - "policy_id": _optional_str(getattr(source, "policy_id", None)), - "exact_size_bytes": int(getattr(source, "exact_size_bytes", 0) or 0), - } - - def _optional_str(value: Any) -> str | None: if value is None: return None @@ -1308,110 +1245,6 @@ def _artifact_locator_kind(artifact_locator: object) -> str: return str(getattr(artifact_locator, "kind", "") or "") -def _optional_bool(fields: Mapping[str, object], name: str, default: bool) -> bool: - value = fields.get(name) - if value is None: - return default - return bool(value) - - -def _optional_path(value: object | None) -> Path | None: - if value is None: - return None - text = str(value).strip() - if not text: - return None - return Path(text).expanduser() - - -def _unique_paths(paths: Sequence[Path]) -> tuple[Path, ...]: - unique: list[Path] = [] - seen: set[str] = set() - for path in paths: - key = str(path) - if key in seen: - continue - seen.add(key) - unique.append(path) - return tuple(unique) - - -def _model_adjacent_cache_root(source_catalog: object) -> Path | None: - raw_selected_files = getattr(source_catalog, "selected_files", ()) or () - selected_files = tuple(cast(Sequence[Any], raw_selected_files)) - if not selected_files: - return None - parent_paths: list[str] = [] - for entry in selected_files: - path = getattr(entry, "path", None) - if path is None: - continue - parent_paths.append(str(Path(path).expanduser().resolve().parent)) - if not parent_paths: - return None - return Path(os.path.commonpath(parent_paths)) / ".tensorcast" / "bootstrap_cache" - - -def _is_writable_or_creatable(path: Path) -> bool: - if path.exists(): - return os.access(path, os.W_OK) - parent = path.parent - while not parent.exists() and parent != parent.parent: - parent = parent.parent - return parent.exists() and os.access(parent, os.W_OK) - - -def _recipe_build_cache_config_from_policy( - policy: RecipeCachePolicy, - *, - source_catalog: object, -) -> RecipeBuildCacheConfig: - fields = dict(policy.fields or {}) - explicit_cache_root = _optional_bool(fields, "explicit_cache_root", False) - prefer_model_adjacent = _optional_bool(fields, "prefer_model_adjacent", True) - cache_root = _optional_path(fields.get("cache_root")) - - roots: list[Path] = [] - if prefer_model_adjacent: - model_adjacent = _model_adjacent_cache_root(source_catalog) - if model_adjacent is not None: - roots.append(model_adjacent) - if cache_root is not None and (explicit_cache_root or not roots): - roots.append(cache_root) - roots = list(_unique_paths(roots)) - - write_roots: list[Path] = [] - if prefer_model_adjacent: - model_adjacent = _model_adjacent_cache_root(source_catalog) - if model_adjacent is not None and _is_writable_or_creatable(model_adjacent): - write_roots.append(model_adjacent) - if cache_root is not None and (explicit_cache_root or not write_roots): - write_roots.append(cache_root) - write_roots = list(_unique_paths(write_roots)) - - debug_output_dir = _optional_path(fields.get("debug_output_dir")) - return RecipeBuildCacheConfig( - cache_dirs=tuple(str(root / "trace_plans") for root in roots), - trace_write_dirs=tuple(str(root / "trace_plans") for root in write_roots), - recipe_cache_dirs=tuple(str(root / "compiled_recipes") for root in roots), - recipe_cache_write_dirs=tuple( - str(root / "compiled_recipes") for root in write_roots - ), - debug_output_dir=debug_output_dir, - allow_cache=_optional_bool(fields, "allow_cache", True), - allow_recipe_cache=_optional_bool(fields, "allow_recipe_cache", True), - allow_trace=_optional_bool(fields, "allow_trace", True), - trace_tp_slices=_optional_bool(fields, "trace_tp_slices", True), - debug_dump_trace=_optional_bool(fields, "debug_dump_trace", False), - synchronous_cache_write=_optional_bool( - fields, "synchronous_cache_write", False - ), - synchronous_recipe_cache_write=_optional_bool( - fields, "synchronous_recipe_cache_write", False - ), - ) - - def _collective_policy_value(policy: MaterializationPolicy) -> str: collective = str(policy.fields.get("collective", "auto") or "auto") return { @@ -1444,128 +1277,10 @@ def _framework_payload_mapping(payload: object | None) -> dict[str, object] | No return {str(key): value for key, value in payload.items()} -def _optional_bytes(value: Any) -> bytes | None: - if value is None: - return None - data = bytes(value) - return data or None - - -def _enum_wire_value(value: Any) -> str | int | None: - if value is None: - return None - enum_value = getattr(value, "value", value) - if isinstance(enum_value, (str, int)): - return enum_value - return str(enum_value) - - -def _source_subject_from_handle(source: Any) -> SourceSubject: - artifact_ref = str(getattr(source, "artifact_id", "") or "") - if not artifact_ref: - raise RuntimeError("TensorCast source subject is missing a source artifact_id") - return SourceSubject( - artifact_ref=artifact_ref, - subject=source, - source_kind="public_disk", - ) - - -def resolve_source_subject( - path: str, - *, - verify_checksums: bool, -) -> SourceSubject: - return _source_subject_from_handle( - tc.resolve_public_disk_source( - path, - verify_checksums=verify_checksums, - ) - ) - - -def source_subject_from_broadcast_payload(payload: Mapping[str, Any]) -> SourceSubject: - payload_dict = dict(payload) - if "kind" not in payload_dict: - raise SourceSubjectError( - "TensorCast source subject broadcast payload is missing kind" - ) - kind = str(payload_dict.get("kind") or "") - artifact_ref = str(payload_dict.get("artifact_ref") or "") - if not artifact_ref: - raise SourceSubjectError( - "TensorCast source subject broadcast payload is missing artifact_ref" - ) - source: Any - if kind == "public_disk": - subject_payload = payload_dict.get("subject") - if not isinstance(subject_payload, Mapping): - raise SourceSubjectError( - "TensorCast public_disk source subject payload must be a mapping" - ) - source = tc.PublicDiskSourceHandle(**dict(subject_payload)) - else: - source = payload_dict.get("subject") - return SourceSubject( - artifact_ref=artifact_ref, - subject=source, - source_kind=kind, - metadata_fingerprint=_optional_text(payload_dict.get("metadata_fingerprint")), - ) - - -def source_subject_broadcast_payload(subject: SourceSubject) -> dict[str, Any]: - return subject.broadcast_payload() - - -def is_public_disk_source_subject(subject: Any) -> bool: - return isinstance(subject, tc.PublicDiskSourceHandle) - - def source_subject_slice_count(recipe: Any, subject: Any) -> int: if is_public_disk_source_subject(subject): return 0 - return tensorcast_view_slice_count(recipe) - - -def serving_binding_state_from_runtime_view( - *, - runtime_view: RuntimeBindingView, - artifact_locator: Any, - policy: Any, - readiness: str | None = None, -) -> ServingBindingState: - binding_value_ref = runtime_view.binding_value_ref - to_ref = getattr(binding_value_ref, "to_binding_value_ref", None) - if callable(to_ref): - binding_value_ref = to_ref() - if binding_value_ref is not None and not isinstance( - binding_value_ref, - BindingValueRef, - ): - if isinstance(binding_value_ref, Mapping): - binding_value_ref = BindingValueRef.model_validate(dict(binding_value_ref)) - else: - raise ServingIntegrationError( - "RuntimeBindingView.binding_value_ref must be BindingValueRef or a mapping" - ) - typed_binding_value_ref = cast(BindingValueRef | None, binding_value_ref) - resolved_readiness = readiness or runtime_view.readiness or "loaded" - state = "loaded" if resolved_readiness == "serving" else resolved_readiness - return ServingBindingState( - state=state, - artifact_locator=artifact_locator, - serving_artifact_ref=runtime_view.serving_artifact_ref, - manifest_ref=getattr(policy, "manifest_ref", None), - representation_contract_hash=( - runtime_view.representation_contract_hash - or getattr(policy, "representation_contract_hash", "") - ), - serving_build_digest=getattr(policy, "serving_build_digest", None), - binding_value_ref=typed_binding_value_ref, - local_serving_ref=runtime_view.local_serving_ref, - readiness=resolved_readiness, - ) + return tc_local_ready.tensorcast_view_slice_count(recipe) def runtime_binding_state_from_runtime_view( @@ -1718,12 +1433,14 @@ def local_ready_current_value_summary_fields( ) -> dict[str, Any]: local_serving_ref = getattr(current_value, "local_serving_ref", None) if require_local_serving_ref and not local_serving_ref: - raise ServingIntegrationError( + raise ArtifactRuntimeIntegrationError( "TensorCast local-ready current value did not include local_serving_ref" ) return { "binding_value_id": getattr(current_value, "binding_value_id", None), - "verification_state": binding_value_verification_state_name(current_value), + "verification_state": tc_local_ready.binding_value_verification_state_name( + current_value + ), "local_serving_ref": local_serving_ref, } @@ -1761,7 +1478,7 @@ def build_local_ready_prepared_artifact( source_bound_contract_path: str, artifact_realization_report: ArtifactRealizationReport | None = None, model_runtime_spec: ArtifactRealizationSpec | None = None, -) -> LocalReadyServingResult: +) -> LocalReadyRuntimeResult: current_value_fields = local_ready_current_value_summary_fields( current_value, require_local_serving_ref=True, @@ -1787,7 +1504,7 @@ def build_local_ready_prepared_artifact( ), prefix="realize", ) - realization_report = tc_diagnostics.ServingRealizationReport( + realization_report = tc_diagnostics.RuntimeRealizationReport( source_artifact_ref=source_artifact_ref, serving_manifest_ref=serving_manifest_ref, representation_contract_hash=representation_contract_hash, @@ -1828,7 +1545,7 @@ def build_local_ready_prepared_artifact( tensor_schema_hash=tensor_schema_hash, binding_value_ref=binding_value_ref, local_serving_ref=local_serving_ref, - readiness="serving_local_ready", + readiness="runtime_local_ready", diagnostics=diagnostics, ) runtime_state = runtime_binding_state_from_runtime_view( @@ -1838,14 +1555,14 @@ def build_local_ready_prepared_artifact( artifact_realization_report=artifact_realization_report, model_runtime_spec=model_runtime_spec, ) - prepared = PreparedServingArtifact( + prepared = PreparedRuntimeArtifact( source_artifact_ref=source_artifact_ref, serving_artifact_ref=None, serving_manifest_ref=serving_manifest_ref, representation_contract_hash=representation_contract_hash, serving_build_digest=serving_build_digest, binding_value_ref=binding_value_ref, - readiness="serving_local_ready", + readiness="runtime_local_ready", family=family, tensor_schema_hash=tensor_schema_hash, binding_layout_id=binding_layout_id, @@ -1855,7 +1572,7 @@ def build_local_ready_prepared_artifact( tp_rank=int(tp_rank), tp_world_size=int(tp_world_size), ) - return LocalReadyServingResult( + return LocalReadyRuntimeResult( runtime_state=runtime_state, runtime_view=runtime_view, prepared=prepared, @@ -1893,22 +1610,22 @@ def build_collective_group_id( @dataclass(frozen=True) -class ServingIntegration: - """Small service object for framework-facing serving lifecycle calls.""" +class ArtifactRuntimeIntegration: + """Small service object for framework-facing runtime lifecycle calls.""" - resolver: ServingArtifactResolver | None = None + resolver: RuntimeArtifactResolver | None = None profile_sink: Any | None = None host: IntegrationHost | None = None @staticmethod - def _lifecycle_not_implemented(method: str, phase: str) -> None: - raise ServingIntegrationNotImplementedError( - f"ServingIntegration.{method} request DTO is available, but the " + def _lifecycle_not_implemented(method: str, phase: str) -> NoReturn: + raise ArtifactRuntimeNotImplementedError( + f"ArtifactRuntimeIntegration.{method} request DTO is available, but the " f"deep core-owned lifecycle is scheduled for {phase}" ) - def resolve(self, artifact_ref: str, **kwargs: Any) -> ResolvedServingArtifact: - return resolve_serving_artifact( + def resolve(self, artifact_ref: str, **kwargs: Any) -> ResolvedRuntimeArtifact: + return resolve_runtime_artifact( artifact_ref, resolver=self.resolver, **kwargs, @@ -1919,10 +1636,12 @@ def read_manifest( artifact: Any, *, artifact_ref: str, - ) -> ResolvedServingArtifact: + ) -> ResolvedRuntimeArtifact: if self.resolver is None: - raise ValueError("ServingIntegration.read_manifest requires resolver") - return read_serving_artifact_manifest( + raise ValueError( + "ArtifactRuntimeIntegration.read_manifest requires resolver" + ) + return read_runtime_artifact_manifest( artifact, artifact_ref=artifact_ref, resolver=self.resolver, @@ -1930,12 +1649,12 @@ def read_manifest( def cross_check( self, - resolved_artifact: ResolvedServingArtifact, + resolved_artifact: ResolvedRuntimeArtifact, **kwargs: Any, - ) -> ResolvedServingArtifact: + ) -> ResolvedRuntimeArtifact: if self.resolver is None: - raise ValueError("ServingIntegration.cross_check requires resolver") - return cross_check_serving_artifact( + raise ValueError("ArtifactRuntimeIntegration.cross_check requires resolver") + return cross_check_runtime_artifact( resolved_artifact, resolver=self.resolver, **kwargs, @@ -1943,20 +1662,20 @@ def cross_check( def start( self, - intent: ServingIntent, + intent: RuntimeIntent, context: RequestContext, ) -> RuntimeAttachment: - """Start serving from a public intent DTO.""" + """Start runtime materialization from a public intent DTO.""" decision = self._admit_intent(intent, context) - if isinstance(intent, ExistingServingArtifact): + if isinstance(intent, ExistingRuntimeArtifact): self._reject_source_selector_for_existing_artifact(intent.artifact_locator) materialization_request = self._host_materialization_request( context, - operation_scope="startup.direct_serving_artifact.bind", + operation_scope="startup.direct_runtime_artifact.bind", ) - load_result = self._load_existing_serving_artifact( - _DirectServingLoad( + load_result = self._load_existing_runtime_artifact( + _DirectRuntimeLoad( artifact_locator=intent.artifact_locator, policy=intent.policy, framework_config=context.framework_config, @@ -1980,9 +1699,9 @@ def start( ) ) if load_result.model is None or load_result.runtime_state is None: - raise ServingIntegrationError( - "ServingIntegration.start returned no model/state for " - "ExistingServingArtifact" + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.start returned no model/state for " + "ExistingRuntimeArtifact" ) return self._attachment_from_load_result(load_result, decision) if isinstance(intent, RetainedBindingAcquire): @@ -1999,7 +1718,7 @@ def start( and placement.member != authority.member ): raise AuthorityValidationError( - "ParsedRetainedServingBindingAuthority.member does not match " + "ParsedRetainedRealizationAuthority.member does not match " "runtime placement", details={ "authority_member": repr(authority.member), @@ -2019,8 +1738,8 @@ def start( ) ) if retained_result.model is None or retained_result.runtime_state is None: - raise ServingIntegrationError( - "ServingIntegration.start returned no model/state for " + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.start returned no model/state for " "RetainedBindingAcquire" ) return self._attachment_from_retained_result(retained_result, decision) @@ -2036,8 +1755,8 @@ def start( local_ready_result.model is None or local_ready_result.runtime_state is None ): - raise ServingIntegrationError( - "ServingIntegration.start returned no model/state for " + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.start returned no model/state for " "LocalSourceBootstrap" ) self._run_local_ready_barrier(context) @@ -2045,25 +1764,241 @@ def start( local_ready_result, decision, ) - raise ServingIntegrationError( - f"Unsupported TensorCast serving intent: {type(intent).__name__}" + raise ArtifactRuntimeIntegrationError( + f"Unsupported TensorCast runtime intent: {type(intent).__name__}" + ) + + def _retained_expected_member( + self, + authority: ParsedRetainedRealizationAuthority, + context: RequestContext, + ) -> Any: + expected_member = authority.member + if self.host is None: + return expected_member + placement = self._framework_context( + context.framework_config, + context.model_config, + ).placement + if ( + placement is not None + and placement.member is not None + and placement.member != authority.member + ): + raise AuthorityValidationError( + "ParsedRetainedRealizationAuthority.member does not match " + "runtime placement", + details={ + "authority_member": repr(authority.member), + "placement_member": repr(placement.member), + }, + ) + if placement is not None and placement.member is not None: + return placement.member + return expected_member + + def realize_model_runtime( + self, + *, + artifact_ref: str, + spec: ArtifactRealizationSpec, + context: RequestContext, + source_selection: ResolvedArtifactSelection | None = None, + runtime_artifact_policy: Any | None = None, + materialization: Any | None = None, + ) -> RuntimeAttachment: + """Realize an artifact-rooted model runtime without a session.""" + + if spec.target_kind != "model_runtime": + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.realize_model_runtime requires a model_runtime spec" + ) + framework_context = self._framework_context( + context.framework_config, + context.model_config, + ) + facts = resolve_model_runtime_request_facts( + spec=spec, + runtime_context=context, + host_context=framework_context, + ) + spec = cast(ArtifactRealizationSpec, facts.spec) + context = cast(RequestContext, facts.context) + intent = ExistingRuntimeArtifact( + artifact_locator=str(artifact_ref), policy=runtime_artifact_policy + ) + decision = self._admit_intent(intent, context) + materialization_request = self._host_materialization_request( + context, + operation_scope="startup.direct_artifact_runtime.bind", + ) + result = self._load_existing_runtime_artifact( + _DirectRuntimeLoad( + artifact_ref=str(artifact_ref), + policy=runtime_artifact_policy, + materialization=materialization, + framework_config=context.framework_config, + model_config=context.model_config, + target_device=context.target_device, + timeout_s=context.timeout_s, + configured_collective_policy=( + materialization_request.configured_collective_policy + ), + source_selection=source_selection, + source_bound_contract_state=( + materialization_request.source_bound_contract_state + ), + source_bound_contract_path=( + materialization_request.source_bound_contract_path + ), + execution_facts=materialization_request.execution_facts, + operation_scope=materialization_request.operation_scope, + require_materialization_options=( + materialization_request.require_materialization_options + ), + model_runtime_spec=spec, + ) + ) + if result.model is None or result.runtime_state is None: + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.realize_model_runtime returned no model/state" + ) + return self._attachment_from_load_result(result, decision) + + def realize_retained_model_runtime( + self, + *, + authority: ParsedRetainedRealizationAuthority, + spec: ArtifactRealizationSpec, + context: RequestContext, + ) -> RuntimeAttachment: + """Realize a retained handoff for a model runtime without a session.""" + + if spec.target_kind != "model_runtime": + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.realize_retained_model_runtime requires " + "a model_runtime spec" + ) + framework_context = self._framework_context( + context.framework_config, + context.model_config, + ) + facts = resolve_model_runtime_request_facts( + spec=spec, + runtime_context=context, + host_context=framework_context, + ) + spec = cast(ArtifactRealizationSpec, facts.spec) + context = cast(RequestContext, facts.context) + intent = RetainedBindingAcquire(authority) + decision = self._admit_intent(intent, context) + retained_result = self._restore_retained_for_intent( + _RetainedBindingAcquire( + authority=authority, + framework_config=context.framework_config, + model_config=context.model_config, + target_device=context.target_device, + expected_member=self._retained_expected_member(authority, context), + timeout_s=context.timeout_s, + model_runtime_spec=spec, + ) + ) + if retained_result.model is None or retained_result.runtime_state is None: + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.realize_retained_model_runtime returned no " + "model/state" + ) + return self._attachment_from_retained_result(retained_result, decision) + + def realize_mounted_source_model_runtime( + self, + *, + artifact_ref: str, + source_subject: Any, + spec: ArtifactRealizationSpec, + context: RequestContext, + source_selection: ResolvedArtifactSelection | None = None, + source_selector: SourceSelector | None = None, + bootstrap_policy: Any | None = None, + materialization: Any | None = None, + ) -> RuntimeAttachment: + """Realize a daemon-attested mounted source as a model runtime.""" + + if spec.target_kind != "model_runtime": + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.realize_mounted_source_model_runtime " + "requires a model_runtime spec" + ) + framework_context = self._framework_context( + context.framework_config, + context.model_config, + ) + facts = resolve_model_runtime_request_facts( + spec=spec, + runtime_context=context, + host_context=framework_context, + ) + spec = cast(ArtifactRealizationSpec, facts.spec) + context = cast(RequestContext, facts.context) + source_artifact_ref = tc_source_catalog.resolve_source_artifact_ref( + str(artifact_ref) + ) + if not source_artifact_ref.startswith("msa1:"): + raise ArtifactRuntimeIntegrationError( + "mounted-source model_runtime realization requires an msa1 " + "mounted-source artifact" + ) + subject = self._source_subject_for_mounted_source( + source_artifact_ref=source_artifact_ref, + source_subject=source_subject, + ) + resolved_selector = source_selector or self._source_selector_for_subject( + subject + ) + intent = LocalSourceBootstrap( + source_selector=resolved_selector, + bootstrap_policy=bootstrap_policy or BootstrapPolicy(), + ) + decision = self._admit_intent(intent, context) + request = self._local_source_bootstrap_request( + intent, + context, + decision=decision, + model_runtime_spec=spec, ) + if materialization is not None: + request = replace(request, options=materialization) + local_ready_result = self._prepare_local_source_bootstrap( + replace( + request, + source_subject=subject, + source_artifact_ref=source_artifact_ref, + source_selection=source_selection, + ) + ) + if local_ready_result.model is None or local_ready_result.runtime_state is None: + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.realize_mounted_source_model_runtime " + "returned no model/state" + ) + self._run_local_ready_barrier(context) + return self._attachment_from_local_ready_result(local_ready_result, decision) def reload( self, current_state: RuntimeBindingState | Any, - intent: ExistingServingArtifact, + intent: ExistingRuntimeArtifact, context: RequestContext, *, model: object | None = None, contract_identity: str | None = None, ) -> RuntimeAttachment: - """Reload an existing runtime binding from a public serving intent.""" + """Reload an existing runtime binding from a public runtime intent.""" - if not isinstance(intent, ExistingServingArtifact): - raise ServingIntegrationError( - "ServingIntegration.reload currently accepts " - "ExistingServingArtifact intent only" + if not isinstance(intent, ExistingRuntimeArtifact): + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.reload currently accepts " + "ExistingRuntimeArtifact intent only" ) self._reject_source_selector_for_existing_artifact(intent.artifact_locator) decision = self._admit_intent(intent, context, reload=True) @@ -2071,8 +2006,8 @@ def reload( context, operation_scope="runtime_binding.swap", ) - result = self._reload_existing_serving_artifact( - _ServingReload( + result = self._reload_existing_runtime_artifact( + _RuntimeReload( current_state=current_state, artifact_locator=intent.artifact_locator, policy=intent.policy, @@ -2098,8 +2033,8 @@ def reload( ) ) if result.runtime_state is None: - raise ServingIntegrationError( - "ServingIntegration.reload returned no runtime state" + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.reload returned no runtime state" ) runtime_model = ( model if model is not None else getattr(current_state, "model", None) @@ -2109,11 +2044,12 @@ def reload( decision=decision, include_reload_response=True, ) - return RuntimeAttachment( + attachment = RuntimeAttachment( model=runtime_model, state=result.runtime_state, view=view, ) + return _project_model_runtime_attachment(result.runtime_state, attachment) def describe(self, state: RuntimeBindingState | Any) -> RuntimeWorkerView: """Return the typed endpoint/worker projection for core runtime state.""" @@ -2124,7 +2060,7 @@ def describe(self, state: RuntimeBindingState | Any) -> RuntimeWorkerView: def _admit_intent( self, - intent: ServingIntent, + intent: RuntimeIntent, context: RequestContext, *, reload: bool = False, @@ -2132,8 +2068,8 @@ def _admit_intent( if self.host is None: return None if context.model_config is None: - raise ServingIntegrationError( - "ServingIntegration host admission requires model_config" + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration host admission requires model_config" ) framework_identity = self.host.framework.identity(context.model_config) placement_identity = self.host.placement.identity_facts( @@ -2169,7 +2105,7 @@ def _admit_intent( allowed = decision.startup_allowed action = "startup" if not allowed: - raise ServingIntegrationError( + raise ArtifactRuntimeIntegrationError( "TensorCast admission rejected " f"{action}: family={decision.family!r}, " f"support_level={decision.support_level!r}" @@ -2181,31 +2117,32 @@ def _reject_source_selector_for_existing_artifact( artifact_locator: object, ) -> None: if isinstance(artifact_locator, SourceSelector): - raise ServingIntegrationError( - "ExistingServingArtifact requires a durable serving artifact " + raise ArtifactRuntimeIntegrationError( + "ExistingRuntimeArtifact requires a durable runtime artifact " "locator; local source selectors must use LocalSourceBootstrap" ) if _artifact_locator_kind(artifact_locator) == "local_path": - raise ServingIntegrationError( - "ExistingServingArtifact rejects local_path artifact locators; use " + raise ArtifactRuntimeIntegrationError( + "ExistingRuntimeArtifact rejects local_path artifact locators; use " "LocalSourceBootstrap for local source acquisition" ) def _attachment_from_load_result( self, - result: ServingLoadResult, + result: RuntimeLoadResult, decision: AdmissionDecision | None, ) -> RuntimeAttachment: state = result.runtime_state if state is None or result.model is None: - raise ServingIntegrationError( - "ServingLoadResult is missing model or runtime_state" + raise ArtifactRuntimeIntegrationError( + "RuntimeLoadResult is missing model or runtime_state" ) - return RuntimeAttachment( + attachment = RuntimeAttachment( model=result.model, state=state, view=self._worker_view_from_state(state, decision=decision), ) + return _project_model_runtime_attachment(state, attachment) def _attachment_from_retained_result( self, @@ -2214,32 +2151,34 @@ def _attachment_from_retained_result( ) -> RuntimeAttachment: state = result.runtime_state if state is None or result.model is None: - raise ServingIntegrationError( + raise ArtifactRuntimeIntegrationError( "RetainedBindingResult is missing model or runtime_state" ) - return RuntimeAttachment( + attachment = RuntimeAttachment( model=result.model, state=state, view=self._worker_view_from_state(state, decision=decision), ) + return _project_model_runtime_attachment(state, attachment) def _attachment_from_local_ready_result( self, - result: LocalReadyServingResult, + result: LocalReadyRuntimeResult, decision: AdmissionDecision | None, ) -> RuntimeAttachment: state = result.runtime_state if state is None or result.model is None: - raise ServingIntegrationError( - "LocalReadyServingResult is missing model or runtime_state" + raise ArtifactRuntimeIntegrationError( + "LocalReadyRuntimeResult is missing model or runtime_state" ) - return RuntimeAttachment( + attachment = RuntimeAttachment( model=result.model, state=state, view=self._worker_view_from_state(state, decision=decision), prepared=result.prepared, recipe=result.recipe, ) + return _project_model_runtime_attachment(state, attachment) def _local_source_bootstrap_request( self, @@ -2247,22 +2186,23 @@ def _local_source_bootstrap_request( context: RequestContext, *, decision: AdmissionDecision | None, + model_runtime_spec: ArtifactRealizationSpec | None = None, ) -> _LocalReadyBootstrap: if self.host is None: - raise ServingIntegrationError( - "ServingIntegration.start(LocalSourceBootstrap) requires " + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires " "IntegrationHost" ) if context.model_config is None: - raise ServingIntegrationError( - "ServingIntegration.start(LocalSourceBootstrap) requires model_config" + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires model_config" ) profile = self.host.runtime_profile or RuntimeProfile() identity = self.host.framework.identity(context.model_config) placement_identity = self.host.placement.identity_facts( context.framework_config ) - placement = self._host_serving_placement(context.framework_config) + placement = self._host_runtime_placement(context.framework_config) recipe = getattr(intent, "recipe", None) model = getattr(intent, "model", None) coordinator = getattr(intent, "coordinator", None) @@ -2336,7 +2276,49 @@ def _local_source_bootstrap_request( require_materialization_options=( materialization_request.require_materialization_options ), + model_runtime_spec=model_runtime_spec, + ) + + @staticmethod + def _source_subject_for_mounted_source( + *, + source_artifact_ref: str, + source_subject: Any, + ) -> SourceSubject: + if isinstance(source_subject, SourceSubject): + subject_ref = tc_source_catalog.resolve_source_artifact_ref( + source_subject.artifact_ref + ) + if subject_ref != source_artifact_ref: + raise ArtifactRuntimeIntegrationError( + "mounted-source subject artifact_ref does not match " + "realization artifact_ref" + ) + return source_subject + subject_artifact_ref = str(getattr(source_subject, "artifact_id", "") or "") + if subject_artifact_ref and subject_artifact_ref != source_artifact_ref: + raise ArtifactRuntimeIntegrationError( + "mounted-source handle artifact_id does not match realization " + "artifact_ref" + ) + source_kind = ( + "public_disk" if is_public_disk_source_subject(source_subject) else "opaque" ) + return SourceSubject( + artifact_ref=source_artifact_ref, + subject=source_subject, + source_kind=source_kind, + ) + + @staticmethod + def _source_selector_for_subject(subject: SourceSubject) -> SourceSelector: + source_path = getattr(subject.subject, "path", None) + if source_path is None or not str(source_path).strip(): + raise ArtifactRuntimeIntegrationError( + "mounted-source model_runtime realization requires a source " + "selector or a source subject with a path" + ) + return SourceSelector.local_path(str(source_path)) def _host_source_subject_coordinator( self, @@ -2370,7 +2352,7 @@ def _host_recipe_cache_policy( model_config, ) if policy is not None and not isinstance(policy, RecipeCachePolicy): - raise ServingIntegrationError( + raise ArtifactRuntimeIntegrationError( "IntegrationHost.source.recipe_cache_policy must return " "RecipeCachePolicy or None" ) @@ -2393,8 +2375,8 @@ def _worker_view_from_state( ) -> RuntimeWorkerView: runtime_view = getattr(state, "runtime_view", None) if runtime_view is None: - raise ServingIntegrationError( - "ServingIntegration.describe requires state.runtime_view" + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.describe requires state.runtime_view" ) endpoint_fields = dict(decision.endpoint_fields) if decision else {} return RuntimeWorkerView.from_runtime_view( @@ -2432,13 +2414,13 @@ def _host_materialization_request( require_materialization_options=True, ) - def _host_serving_placement( + def _host_runtime_placement( self, framework_config: object | None, - ) -> ServingPlacement: + ) -> RuntimePlacement: if self.host is None: - raise ServingIntegrationError( - "ServingIntegration host placement requires IntegrationHost" + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration host placement requires IntegrationHost" ) framework_payload = None framework_payload_fn = getattr(self.host.placement, "framework_payload", None) @@ -2452,7 +2434,7 @@ def _host_serving_placement( identity_payload = _framework_payload_mapping( identity_payload_fn(framework_config) ) - return serving_placement_from_framework_facts( + return runtime_placement_from_framework_facts( identity_facts=self.host.placement.identity_facts(framework_config), admission_facts=self.host.placement.admission_facts(framework_config), member_facts=self.host.placement.member_facts(framework_config), @@ -2471,20 +2453,20 @@ def _serving_artifact_schema_version(profile: RuntimeProfile) -> int: "serving_artifact_schema_version", None ) if value is None: - model_fields = getattr(ServingArtifactManifest, "model_fields", {}) + model_fields = getattr(RuntimeArtifactManifest, "model_fields", {}) schema_field = model_fields.get("schema_version") value = getattr(schema_field, "default", 1) return _optional_int(value) or 1 - def _load_existing_serving_artifact( - self, request: _DirectServingLoad - ) -> ServingLoadResult: + def _load_existing_runtime_artifact( + self, request: _DirectRuntimeLoad + ) -> RuntimeLoadResult: target_device = self._require_target_device(request.target_device) context = self._framework_context( request.framework_config, request.model_config, ) - preflight = self._preflight_serving_artifact( + preflight = self._preflight_runtime_artifact( resolved_artifact=request.resolved_artifact, artifact_ref=request.artifact_ref, artifact_locator=request.artifact_locator, @@ -2493,7 +2475,7 @@ def _load_existing_serving_artifact( placement=context.placement, ) resolved = preflight.resolved_artifact - policy = preflight.serving_runtime_policy + policy = preflight.runtime_artifact_policy model = request.model if model is None: self._prepare_model_construction( @@ -2506,7 +2488,7 @@ def _load_existing_serving_artifact( ) self._assert_model_ready_for_runtime_binding( model, - context="TensorCast direct serving artifact startup", + context="TensorCast direct runtime artifact startup", ) self._align_runtime_tensor_names( model, @@ -2524,7 +2506,7 @@ def _load_existing_serving_artifact( current_tensors, remove_duplicate=False, ) - preflight = self._preflight_serving_artifact( + preflight = self._preflight_runtime_artifact( resolved_artifact=resolved, artifact_ref=request.artifact_ref, artifact_locator=request.artifact_locator, @@ -2533,7 +2515,7 @@ def _load_existing_serving_artifact( placement=context.placement, ) resolved = preflight.resolved_artifact - policy = preflight.serving_runtime_policy + policy = preflight.runtime_artifact_policy manifest = getattr(resolved, "manifest", None) local_serving_ref = getattr(manifest, "local_serving_ref", None) if local_serving_ref: @@ -2542,7 +2524,7 @@ def _load_existing_serving_artifact( expected_member = context.placement.member if expected_member is None: raise RestoreBindingError( - "ServingIntegration._load_existing_serving_artifact prepared " + "ArtifactRuntimeIntegration._load_existing_runtime_artifact prepared " "local-ready restore requires expected_member" ) with restore_prepared_local_ready_binding( @@ -2567,6 +2549,7 @@ def _load_existing_serving_artifact( tensor_schema_hash=tensor_schema_hash, artifact_profile="retained_binding", authority_scope="daemon_retained_runtime_attachment", + source_selection=request.source_selection, retained=True, reservation_bytes=int(restored.reservation_bytes), ) @@ -2578,6 +2561,7 @@ def _load_existing_serving_artifact( target_device=target_device, tensor_schema_hash=tensor_schema_hash, reservation_bytes=restored.reservation_bytes, + source_selection=request.source_selection, ) state_seed = self._state_seed( resolved, @@ -2588,7 +2572,7 @@ def _load_existing_serving_artifact( ), binding_handle=restored, artifact_realization_report=artifact_report, - readiness="serving_local_ready", + readiness="runtime_local_ready", ) runtime_state = self._materializer().attach_and_finalize( model=model, @@ -2599,17 +2583,18 @@ def _load_existing_serving_artifact( replace_meta_params=True, target_device=target_device, model_config=request.model_config, + model_runtime_spec=request.model_runtime_spec, ) else: materialization = self._load_materialization_options( request, resolved, ) - binding_result = bind_serving_artifact( + binding_result = bind_runtime_artifact( resolved_artifact=resolved, tensor_names=tuple(current_tensors.keys()), device=target_device, - serving_runtime_policy=policy, + runtime_artifact_policy=policy, options=materialization, ) artifact_report = _runtime_attachment_report_for_resolved( @@ -2618,6 +2603,7 @@ def _load_existing_serving_artifact( binding_handle=binding_result.binding, target_device=target_device, tensor_schema_hash=tensor_schema_hash, + source_selection=request.source_selection, execution_diagnostics=binding_result.execution_diagnostics, materialization_diagnostics=binding_result.materialization_diagnostics, ) @@ -2638,8 +2624,9 @@ def _load_existing_serving_artifact( replace_meta_params=True, target_device=target_device, model_config=request.model_config, + model_runtime_spec=request.model_runtime_spec, ) - return ServingLoadResult( + return RuntimeLoadResult( model=model, runtime_state=runtime_state, runtime_view=runtime_state.runtime_view, @@ -2647,9 +2634,9 @@ def _load_existing_serving_artifact( binding_result=binding_result, ) - def _reload_existing_serving_artifact( - self, request: _ServingReload - ) -> ServingReloadResult: + def _reload_existing_runtime_artifact( + self, request: _RuntimeReload + ) -> RuntimeReloadResult: target_device = ( torch.device(request.target_device) if request.target_device is not None @@ -2657,13 +2644,13 @@ def _reload_existing_serving_artifact( ) binding = getattr(request.current_state, "binding", None) if binding is None: - raise ServingIntegrationError( - "ServingIntegration._reload_existing_serving_artifact requires current_state.binding" + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration._reload_existing_runtime_artifact requires current_state.binding" ) if not is_runtime_binding_swap_capable(binding): - raise ServingIntegrationError( - "ServingIntegration._reload_existing_serving_artifact requires a " - "swap-capable serving binding" + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration._reload_existing_runtime_artifact requires a " + "swap-capable runtime binding" ) current_view = getattr(request.current_state, "runtime_view", None) expected_tensor_schema_hash = getattr(current_view, "tensor_schema_hash", None) @@ -2692,7 +2679,7 @@ def _reload_existing_serving_artifact( request.model_config, ) placement = None if context is None else context.placement - preflight = self._preflight_serving_artifact( + preflight = self._preflight_runtime_artifact( resolved_artifact=request.resolved_artifact, artifact_ref=request.artifact_ref, artifact_locator=request.artifact_locator, @@ -2701,18 +2688,18 @@ def _reload_existing_serving_artifact( placement=placement, ) resolved = preflight.resolved_artifact - policy = preflight.serving_runtime_policy + policy = preflight.runtime_artifact_policy materialization = self._reload_materialization_options( request, resolved, ) - binding_result = swap_serving_artifact( + binding_result = swap_runtime_artifact( binding=binding, resolved_artifact=resolved, tensor_names=( None if runtime_tensors is None else tuple(runtime_tensors.keys()) ), - serving_runtime_policy=policy, + runtime_artifact_policy=policy, options=materialization, ) artifact_report = _runtime_attachment_report_for_resolved( @@ -2769,7 +2756,7 @@ def _reload_existing_serving_artifact( else realization_handle.release_contract, realization_handle=realization_handle, ) - return ServingReloadResult( + return RuntimeReloadResult( runtime_state=runtime_state, runtime_view=runtime_state.runtime_view, resolved_artifact=resolved, @@ -2783,27 +2770,12 @@ def _restore_retained_for_intent( authority = request.authority if authority is None: raise RestoreBindingError( - "ServingIntegration._restore_retained_for_intent requires authority" - ) - readiness = getattr(authority, "readiness", None) - if readiness == "serving_reserved": - raise RestoreBindingError( - "TensorCast retained acquire readiness='serving_reserved' " - "is not attachable" - ) - if readiness in { - "serving_group_prepared", - "serving_group_published_ready", - }: - raise RestoreBindingError( - "TensorCast retained acquire group readiness requires a " - "published group-realization transaction authority" - ) - if readiness == "serving_published_ready": - raise RestoreBindingError( - "TensorCast retained acquire readiness='serving_published_ready' " - "requires a swap-capable serving binding handle" + "ArtifactRuntimeIntegration._restore_retained_for_intent requires " + "authority" ) + rejection_reason = runtime_restore_rejection_reason(authority) + if rejection_reason is not None: + raise RestoreBindingError(rejection_reason) model = self._build_meta_model( request.framework_config, request.model_config, @@ -2844,7 +2816,7 @@ def _restore_retained_for_intent( binding_value_ref=restored.binding_value_ref, local_serving_ref=getattr(authority, "local_serving_ref", None), readiness=str( - getattr(authority, "readiness", "") or "serving_local_ready" + getattr(authority, "readiness", "") or "runtime_local_ready" ), diagnostics={ "reservation_bytes": int(restored.reservation_bytes), @@ -2871,6 +2843,7 @@ def _restore_retained_for_intent( model_config=request.model_config, run_process_after_load=False, expected_tensor_schema_hash=expected_tensor_schema_hash, + model_runtime_spec=request.model_runtime_spec, ) return RetainedBindingResult( model=model, @@ -2887,7 +2860,7 @@ def _restore_retained_for_intent( def _prepare_local_source_bootstrap( self, request: _LocalReadyBootstrap - ) -> LocalReadyServingResult: + ) -> LocalReadyRuntimeResult: if ( request.recipe is None or request.source_subject is None ) and request.build_recipe_from_framework_context: @@ -2895,18 +2868,18 @@ def _prepare_local_source_bootstrap( if request.recipe is None or request.source_subject is None: self._lifecycle_not_implemented("_prepare_local_source_bootstrap", "P5") if request.target_device is None: - raise ServingIntegrationError( - "ServingIntegration.start(LocalSourceBootstrap) requires target_device" + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires target_device" ) if not request.manifest_tensor_name: - raise ServingIntegrationError( - "ServingIntegration.start(LocalSourceBootstrap) requires manifest_tensor_name" + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires manifest_tensor_name" ) model = request.model if request.build_model_from_framework_context and model is None: if request.model_config is None: - raise ServingIntegrationError( - "ServingIntegration.start(LocalSourceBootstrap) requires " + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires " "model_config to build a framework model" ) model = self._build_meta_model( @@ -2923,24 +2896,24 @@ def _prepare_local_source_bootstrap( or not serving_build_digest ): if request.model_config is None: - raise ServingIntegrationError( - "ServingIntegration.start(LocalSourceBootstrap) requires " + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires " "model_config to build a local-ready manifest carrier" ) if request.placement is None: - raise ServingIntegrationError( - "ServingIntegration.start(LocalSourceBootstrap) requires " + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires " "placement to build a local-ready manifest carrier" ) if request.runtime_binding_schema_version is None: - raise ServingIntegrationError( - "ServingIntegration.start(LocalSourceBootstrap) requires " + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires " "runtime_binding_schema_version to build a local-ready " "manifest carrier" ) if request.serving_artifact_schema_version is None: - raise ServingIntegrationError( - "ServingIntegration.start(LocalSourceBootstrap) requires " + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires " "serving_artifact_schema_version to build a local-ready " "manifest carrier" ) @@ -2993,7 +2966,7 @@ def _prepare_local_source_bootstrap( ) if options is None: options = self._local_ready_materialization_options(request) - realization = prepare_local_ready_serving( + realization = tc_local_ready.realize_local_ready_binding_from_source( recipe=request.recipe, source_subject=request.source_subject, target_device=torch.device(request.target_device), @@ -3002,7 +2975,7 @@ def _prepare_local_source_bootstrap( options=options, binding_factory=request.binding_factory, ) - realized = LocalReadyServingResult( + realized = LocalReadyRuntimeResult( recipe=request.recipe, binding=realization.binding, update_epoch=realization.update_epoch, @@ -3032,6 +3005,7 @@ def _prepare_local_source_bootstrap( binding=realization.binding, update_epoch=realization.update_epoch, source_artifact_ref=str(request.source_artifact_ref), + source_selection=request.source_selection, serving_manifest_ref=str(serving_manifest_ref), representation_contract_hash=str(representation_contract_hash), serving_build_digest=str(serving_build_digest), @@ -3058,9 +3032,10 @@ def _prepare_local_source_bootstrap( framework_version=request.framework_version, adapter_version=request.adapter_version, serving_abi_version=request.serving_abi_version, + model_runtime_spec=request.model_runtime_spec, ) ) - return LocalReadyServingResult( + return LocalReadyRuntimeResult( model=finalized.model, runtime_state=finalized.runtime_state, runtime_view=finalized.runtime_view, @@ -3088,8 +3063,8 @@ def _local_ready_prepare_with_built_recipe( source_subject_record, "artifact_ref", None ) if not source_artifact_ref: - raise ServingIntegrationError( - "ServingIntegration.start(LocalSourceBootstrap) could not " + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) could not " "derive source_artifact_ref from source subject" ) try: @@ -3097,8 +3072,8 @@ def _local_ready_prepare_with_built_recipe( source_artifact_ref ) except ValueError as exc: - raise ServingIntegrationError( - "ServingIntegration.start(LocalSourceBootstrap) requires " + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires " "a real source artifact identity" ) from exc source_realization_subject = getattr( @@ -3142,8 +3117,8 @@ def _resolve_local_ready_source_subject( request: _LocalReadyBootstrap, ) -> SourceSubject: if request.source_selector is None: - raise ServingIntegrationError( - "ServingIntegration.start(LocalSourceBootstrap) requires " + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires " "source_selector when source_subject is not supplied" ) verify_checksums = bool( @@ -3167,8 +3142,8 @@ def _local_ready_source_catalog( source_artifact_ref ) except ValueError as exc: - raise ServingIntegrationError( - "ServingIntegration.start(LocalSourceBootstrap) requires " + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires " "a real source artifact identity" ) from exc if request.source_catalog is not None: @@ -3179,11 +3154,11 @@ def _local_ready_source_catalog( return request.source_catalog if self.host is not None and self.host.source_catalog is not None: if not isinstance(request.source_selector, SourceSelector): - raise ServingIntegrationError( + raise ArtifactRuntimeIntegrationError( "IntegrationHost.source_catalog requires a core SourceSelector" ) if request.model_config is None: - raise ServingIntegrationError( + raise ArtifactRuntimeIntegrationError( "IntegrationHost.source_catalog requires model_config" ) source_catalog = self.host.source_catalog.build_catalog( @@ -3217,7 +3192,7 @@ def _local_ready_source_catalog( ) return source_catalog raise _capability_missing( - "ServingIntegration.start(LocalSourceBootstrap) requires " + "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires " "IntegrationHost.source_catalog when recipe is not supplied", level="level2-local-bootstrap", capability="source_catalog", @@ -3237,7 +3212,7 @@ def _validate_source_catalog_artifact_ref( ) -> None: catalog_artifact_ref = getattr(source_catalog, "source_artifact_ref", None) if catalog_artifact_ref is None: - raise ServingIntegrationError( + raise ArtifactRuntimeIntegrationError( "SourceCatalogProvider returned a catalog without a real " "source_artifact_ref" ) @@ -3246,12 +3221,12 @@ def _validate_source_catalog_artifact_ref( str(catalog_artifact_ref) ) except ValueError as exc: - raise ServingIntegrationError( + raise ArtifactRuntimeIntegrationError( "SourceCatalogProvider returned a catalog without a real " "source_artifact_ref" ) from exc if catalog_source_ref != expected_source_artifact_ref: - raise ServingIntegrationError( + raise ArtifactRuntimeIntegrationError( "SourceCatalogProvider returned source_artifact_ref " f"{catalog_source_ref!r}, expected {expected_source_artifact_ref!r}" ) @@ -3266,7 +3241,7 @@ def _local_ready_recipe_cache_config( if callable(cache_config_factory): return cache_config_factory(source_catalog=source_catalog) if isinstance(request.cache_config, RecipeCachePolicy): - return _recipe_build_cache_config_from_policy( + return recipe_build_cache_config_from_policy( request.cache_config, source_catalog=source_catalog, ) @@ -3285,8 +3260,8 @@ def _build_local_ready_recipe_from_framework_context( placement: Any | None, ) -> Any: if request.model_config is None: - raise ServingIntegrationError( - "ServingIntegration.start(LocalSourceBootstrap) requires " + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires " "model_config when recipe is not supplied" ) adapter = self._recipe_framework_adapter(request.model_config) @@ -3309,7 +3284,7 @@ def _build_local_ready_recipe_from_framework_context( request.model_config, ), cache_config=cache_config, - is_reserved_serving_tensor_name=is_reserved_serving_tensor_name, + is_reserved_runtime_tensor_name=is_reserved_runtime_tensor_name, semantic_validation_spec=request.semantic_validation_spec, placement=placement, debug_extra={ @@ -3352,8 +3327,8 @@ def _local_ready_materialization_options( or execution_facts is None ): if request.require_materialization_options: - raise ServingIntegrationError( - "ServingIntegration.start(LocalSourceBootstrap) requires " + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires " "materialization execution context" ) return None @@ -3362,8 +3337,8 @@ def _local_ready_materialization_options( "source_bound_contract_ready", False, ): - raise ServingIntegrationError( - "ServingIntegration.start(LocalSourceBootstrap) requires " + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration.start(LocalSourceBootstrap) requires " "ready source-bound contract state" ) identity = self.local_ready_materialization_identity(request.recipe) @@ -3399,7 +3374,7 @@ def _assert_local_ready_contract_realizable( context: str, ) -> None: if contract.realization_entry_count <= 0: - raise ServingIntegrationError( + raise ArtifactRuntimeIntegrationError( f"{context} requires a non-empty BindingRealizationPlan" ) if not contract.fallback_copy_plan: @@ -3410,7 +3385,7 @@ def _assert_local_ready_contract_realizable( ) if len(contract.fallback_copy_plan) > 8: unsupported = f"{unsupported}, ..." if unsupported else "..." - raise ServingIntegrationError( + raise ArtifactRuntimeIntegrationError( f"{context} requires a fully representable BindingRealizationPlan; " f"unsupported_entries={len(contract.fallback_copy_plan)} " f"[{unsupported}]" @@ -3418,27 +3393,27 @@ def _assert_local_ready_contract_realizable( def _finalize_local_ready_runtime( self, request: _LocalReadyFinalize - ) -> LocalReadyServingResult: + ) -> LocalReadyRuntimeResult: target_device = self._require_target_device(request.target_device) if request.recipe is None: - raise ServingIntegrationError( - "ServingIntegration._finalize_local_ready_runtime requires recipe" + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration._finalize_local_ready_runtime requires recipe" ) if request.model is None: - raise ServingIntegrationError( - "ServingIntegration._finalize_local_ready_runtime requires model" + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration._finalize_local_ready_runtime requires model" ) if request.binding is None: - raise ServingIntegrationError( - "ServingIntegration._finalize_local_ready_runtime requires binding" + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration._finalize_local_ready_runtime requires binding" ) if request.update_epoch is None: - raise ServingIntegrationError( - "ServingIntegration._finalize_local_ready_runtime requires update_epoch" + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration._finalize_local_ready_runtime requires update_epoch" ) if not request.manifest_tensor_name: - raise ServingIntegrationError( - "ServingIntegration._finalize_local_ready_runtime requires manifest_tensor_name" + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration._finalize_local_ready_runtime requires manifest_tensor_name" ) try: framework_context = self._framework_context( @@ -3478,7 +3453,7 @@ def _finalize_local_ready_runtime( request.representation_contract_hash ), tensor_schema_hash=tensor_schema_hash, - readiness="serving_local_ready", + readiness="runtime_local_ready", ), replace_meta_params=bool(request.replace_meta_params), target_device=target_device, @@ -3500,14 +3475,26 @@ def _finalize_local_ready_runtime( update_epoch=request.update_epoch, source_artifact_ref=str(request.source_artifact_ref), ) + source_ref = str(request.source_artifact_ref) + artifact_profile = ( + "mounted_source" + if source_ref.startswith("msa1:") + else "local_ready_source_artifact" + ) + authority_scope = ( + "daemon_local_mounted_source" + if source_ref.startswith("msa1:") + else "daemon_mediated_local_ready_runtime_attachment" + ) artifact_report = _runtime_attachment_report_for_artifact_id( - artifact_id=str(request.source_artifact_ref), + artifact_id=source_ref, tensors=_binding_tensors(request.binding), binding_handle=request.binding, target_device=target_device, tensor_schema_hash=tensor_schema_hash, - artifact_profile="local_ready_source_artifact", - authority_scope="daemon_mediated_local_ready_runtime_attachment", + artifact_profile=artifact_profile, + authority_scope=authority_scope, + source_selection=request.source_selection, ) prepared = build_local_ready_prepared_artifact( source_artifact_ref=str(request.source_artifact_ref), @@ -3523,12 +3510,20 @@ def _finalize_local_ready_runtime( source_bound_contract_state=request.source_bound_contract_state, source_bound_contract_path=str(request.source_bound_contract_path), artifact_realization_report=artifact_report, - model_runtime_spec=_model_runtime_spec_for_context( - context=framework_context, - target_device=target_device, + model_runtime_spec=( + _model_runtime_spec_with_context_defaults( + spec=request.model_runtime_spec, + context=framework_context, + target_device=target_device, + ) + if request.model_runtime_spec is not None + else _model_runtime_spec_for_context( + context=framework_context, + target_device=target_device, + ) ), ) - return LocalReadyServingResult( + return LocalReadyRuntimeResult( model=request.model, runtime_state=prepared.runtime_state, runtime_view=prepared.runtime_view, @@ -3553,12 +3548,12 @@ def _assert_local_ready_finalize_admitted( if not self.local_ready_requires_binding_finalize(request.recipe): return if not request.run_process_after_load: - raise ServingIntegrationError( + raise ArtifactRuntimeIntegrationError( "TensorCast representation-changing local-ready finalize " "requires process_after_load execution" ) if not request.run_semantic_validation: - raise ServingIntegrationError( + raise ArtifactRuntimeIntegrationError( "TensorCast representation-changing local-ready finalize " "requires explicit semantic validation" ) @@ -3566,12 +3561,12 @@ def _assert_local_ready_finalize_admitted( semantic_validation_spec is None or getattr(semantic_validation_spec, "kind", "none") == "none" ): - raise ServingIntegrationError( + raise ArtifactRuntimeIntegrationError( "TensorCast representation-changing local-ready finalize " "requires an explicit semantic validation spec" ) if not request.validate_representation_contract_hash: - raise ServingIntegrationError( + raise ArtifactRuntimeIntegrationError( "TensorCast representation-changing local-ready finalize " "requires representation contract validation" ) @@ -3579,7 +3574,7 @@ def _assert_local_ready_finalize_admitted( request.source_bound_contract_state is None or not request.source_bound_contract_path ): - raise ServingIntegrationError( + raise ArtifactRuntimeIntegrationError( "TensorCast representation-changing local-ready finalize " "requires same-binding contract proof" ) @@ -3588,7 +3583,7 @@ def _assert_local_ready_finalize_admitted( "source_bound_contract_ready", False, ): - raise ServingIntegrationError( + raise ArtifactRuntimeIntegrationError( "TensorCast representation-changing local-ready finalize " "requires ready same-binding contract proof" ) @@ -3612,23 +3607,23 @@ def _validate_local_ready_representation_contract_hash( if not request.validate_representation_contract_hash: return if request.model_config is None: - raise ServingIntegrationError( - "ServingIntegration local-ready representation validation " + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration local-ready representation validation " "requires model_config" ) if request.placement is None: - raise ServingIntegrationError( - "ServingIntegration local-ready representation validation " + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration local-ready representation validation " "requires placement" ) if request.runtime_binding_schema_version is None: - raise ServingIntegrationError( - "ServingIntegration local-ready representation validation " + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration local-ready representation validation " "requires runtime_binding_schema_version" ) if request.serving_artifact_schema_version is None: - raise ServingIntegrationError( - "ServingIntegration local-ready representation validation " + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration local-ready representation validation " "requires serving_artifact_schema_version" ) actual = self.local_ready_representation_contract_hash( @@ -3661,7 +3656,7 @@ def build_local_ready_manifest_carrier( logical_topology_json_payload: str | None = None, topology_admission_digest: str | None = None, ) -> tuple[str, bytes]: - return prepare_same_binding_manifest_carrier( + return tc_local_ready.prepare_same_binding_manifest_carrier( recipe, manifest_tensor_name=manifest_tensor_name, representation_contract_hash=representation_contract_hash, @@ -3678,18 +3673,20 @@ def build_local_ready_manifest_carrier_from_contract( topology: Any | None = None, framework_payload: Mapping[str, Any] | None = None, ) -> tuple[str, bytes]: - base_canonical_index = canonical_index_from_recipe(recipe) - tensor_schema_hash = compute_serving_tensor_schema_hash( + base_canonical_index = tc_local_ready.canonical_index_from_recipe(recipe) + tensor_schema_hash = tc_contract.compute_canonical_runtime_tensor_schema_hash( base_canonical_index, manifest_tensor_name=manifest_tensor_name, ) representation_contract_hash = representation_contract_hash_factory( tensor_schema_hash ) - logical_topology_json_payload = logical_topology_json_from_recipe( - recipe, - topology=topology, - framework_payload=dict(framework_payload or {}), + logical_topology_json_payload = ( + tc_local_ready.logical_topology_json_from_recipe( + recipe, + topology=topology, + framework_payload=dict(framework_payload or {}), + ) ) topology_admission_digest = _optional_text( getattr(topology, "schema_topology_digest", None) @@ -3738,10 +3735,17 @@ def local_ready_representation_contract_hash( "serving_artifact_schema_version": int(serving_artifact_schema_version), "placement": placement_identity, } - return compute_runtime_representation_contract_hash( + topology_ref = getattr(placement, "topology", None) + member_ref = getattr(placement, "member", None) + if topology_ref is None or member_ref is None: + raise ArtifactRuntimeIntegrationError( + "TensorCast local-ready manifest carrier requires placement " + "topology and member identity" + ) + return tc_contract.compute_runtime_representation_contract_hash( tensor_schema_hash=str(tensor_schema_hash or ""), - topology_ref=getattr(placement, "topology", None), - member_ref=getattr(placement, "member", None), + topology_ref=topology_ref, + member_ref=member_ref, framework_name=framework_name or self._framework_identity(model_config).framework_name, framework_version=framework_version @@ -3815,7 +3819,7 @@ def prepare_local_ready_manifest_carrier_from_framework_context( serving_abi_version=serving_abi_version, ) ) - manifest = ServingArtifactManifest.from_bytes(manifest_bytes) + manifest = RuntimeArtifactManifest.from_bytes(manifest_bytes) return LocalReadyManifestCarrierResult( representation_contract_hash=representation_contract_hash, manifest_bytes=manifest_bytes, @@ -3830,7 +3834,7 @@ def local_ready_tensor_schema_hash( manifest_tensor_name: str, manifest_bytes: bytes | None = None, ) -> str: - return compute_serving_binding_tensor_schema_hash( + return tc_local_ready.compute_runtime_binding_tensor_schema_hash( recipe, manifest_tensor_name=manifest_tensor_name, manifest_bytes=manifest_bytes, @@ -3840,7 +3844,10 @@ def local_ready_materialized_tensor_names( self, recipe: Any, ) -> tuple[str, ...]: - return tuple(str(entry.name) for entry in materialized_tensor_schema(recipe)) + return tuple( + str(entry.name) + for entry in tc_local_ready.materialized_tensor_schema(recipe) + ) def _assert_local_ready_binding_tensor_set( self, @@ -3880,18 +3887,20 @@ def build_local_ready_binding_contract( realization_plan_proto = bytes( getattr(recipe, "realization_plan_proto", b"") or b"" ) - realization_entry_count = compiled_recipe_realization_plan_count(recipe) + realization_entry_count = tc_local_ready.compiled_recipe_realization_plan_count( + recipe + ) if realization_entry_count <= 0: - raise ServingIntegrationError( + raise ArtifactRuntimeIntegrationError( "TensorCast local-ready binding contract requires a compiled " "recipe with a pre-lowered BindingRealizationPlan" ) if not realization_plan_proto: - raise ServingIntegrationError( + raise ArtifactRuntimeIntegrationError( "TensorCast local-ready binding contract requires compiled " "recipe realization_plan_proto; regenerate the compiled recipe cache" ) - validate_tensor_schema_against_tensors( + tc_tensor_schema.validate_tensor_schema_against_tensors( recipe.tensor_schema, canonical_tensors, ) @@ -3930,9 +3939,9 @@ def local_ready_materialization_identity( ) def local_ready_requires_binding_finalize(self, recipe: Any) -> bool: - serving_facts = getattr(recipe, "serving_facts", None) + runtime_facts = getattr(recipe, "runtime_facts", None) process_after_load_class = tc_readiness.coerce_finalize_class( - getattr(serving_facts, "process_after_load_class", None), + getattr(runtime_facts, "process_after_load_class", None), default=FinalizeClass.RUNTIME_ONLY, ) return process_after_load_class == FinalizeClass.REPRESENTATION_CHANGING @@ -3943,7 +3952,9 @@ def validate_local_ready_tensor_schema( recipe: Any, tensors: Mapping[str, Any], ) -> None: - validate_tensor_schema_against_tensors(recipe.tensor_schema, tensors) + tc_tensor_schema.validate_tensor_schema_against_tensors( + recipe.tensor_schema, tensors + ) def freeze_local_ready( self, @@ -3952,7 +3963,7 @@ def freeze_local_ready( update_epoch: Any, source_artifact_ref: str, ) -> Any: - return freeze_local_ready_binding( + return tc_local_ready.freeze_local_ready_binding( binding=binding, update_epoch=update_epoch, source_artifact_ref=source_artifact_ref, @@ -3998,93 +4009,24 @@ def build_materialization_options( def build_recipe_session( self, request: RecipeBuildSessionRequest ) -> RecipeBuildSession: - identity = request.identity - if identity is None: - identity = self._recipe_build_identity(request) - return RecipeBuildSession(identity) - - def _recipe_build_identity( - self, - request: RecipeBuildSessionRequest, - ) -> ServingBindingPlan: - model_config = request.model_config - if model_config is None: - self._lifecycle_not_implemented("build_recipe_session", "P2") - adapter = self._recipe_framework_adapter(model_config) + adapter = None placement = request.placement - if placement is None and self.host is not None: - placement = self._framework_context( - request.framework_config, - model_config, - ).placement - serving_placement = getattr(placement, "serving_placement", placement) - member = getattr(serving_placement, "member", None) - stable_identity_payload = getattr( - serving_placement, "stable_identity_payload", None - ) - if callable(stable_identity_payload): - placement_payload = stable_identity_payload() - else: - placement_payload = getattr(placement, "identity_payload", None) - if placement_payload is None: - placement_payload = getattr(serving_placement, "identity_payload", None) - trace_cache_schema_version = request.trace_cache_schema_version - if trace_cache_schema_version is None: - trace_cache_schema_version = getattr( - request.cache_config, - "trace_cache_schema_version", - 1, - ) - tp_rank = request.tp_rank - if tp_rank is None: - tp_rank = getattr(placement, "tp_rank", None) - if tp_rank is None and member is not None: - tp_rank = getattr(member, "member_index", None) - tp_world_size = request.tp_world_size - if tp_world_size is None: - tp_world_size = getattr(placement, "tp_world_size", None) - if tp_world_size is None and member is not None: - tp_world_size = getattr(member, "member_count", None) - compute_hash = getattr(model_config, "compute_hash", None) - model_id = str(getattr(model_config, "model", "unknown")) - framework_version = self._adapter_text(adapter, "framework_version") - return ServingBindingPlan( - model_hash=str( - compute_hash() - if callable(compute_hash) - else getattr(model_config, "model", "unknown") - ), - model_id=model_id, - model_revision=getattr(model_config, "revision", None), - dtype=str(getattr(model_config, "dtype", "unknown")), - runtime_version=framework_version, - framework_name=self._adapter_text(adapter, "framework_name"), - framework_version=framework_version, - adapter_version=self._adapter_text(adapter, "adapter_version"), - serving_abi_version=self._adapter_text( - adapter, - "serving_abi_version", - model_config, - ), - trace_cache_schema_version=int(trace_cache_schema_version), - tp_rank=int(tp_rank or 0), - tp_world_size=int(tp_world_size or 1), - topology_ref=getattr(serving_placement, "topology", None), - member_ref=member, - placement=placement_payload, + if request.identity is None: + model_config = request.model_config + if model_config is None: + self._lifecycle_not_implemented("build_recipe_session", "P2") + adapter = self._recipe_framework_adapter(model_config) + if placement is None and self.host is not None: + placement = self._framework_context( + request.framework_config, + model_config, + ).placement + return build_recipe_session_from_request( + request, + adapter=adapter, + placement=placement, ) - @staticmethod - def _adapter_text( - adapter: Any | None, - method_name: str, - *args: Any, - ) -> str: - method = getattr(adapter, method_name, None) - if callable(method): - return str(method(*args)) - return "" - def resolve_source_subject( self, path: str | SourceSelector, @@ -4155,7 +4097,7 @@ def _framework_host(self) -> FrameworkHost: if self.host is not None: return self.host.framework raise _capability_missing( - "ServingIntegration requires IntegrationHost.framework", + "ArtifactRuntimeIntegration requires IntegrationHost.framework", level="level1-runtime", capability="framework", operation="framework_host", @@ -4165,7 +4107,7 @@ def _framework_host(self) -> FrameworkHost: "assert_model_ready_for_runtime_binding", ), next_action=( - "Construct ServingRuntimeSession with IntegrationHost.framework." + "Construct ArtifactRuntimeSession with IntegrationHost.framework." ), ) @@ -4206,7 +4148,7 @@ def _surface(self) -> TensorSurfaceHost: ) return self.host.tensor_surface raise _capability_missing( - "ServingIntegration requires IntegrationHost.tensor_surface", + "ArtifactRuntimeIntegration requires IntegrationHost.tensor_surface", level="level1-runtime", capability="tensor_surface", operation="runtime_tensor_surface", @@ -4216,15 +4158,15 @@ def _surface(self) -> TensorSurfaceHost: "compute_runtime_tensor_schema_hash", ), next_action=( - "Construct ServingRuntimeSession with IntegrationHost.tensor_surface." + "Construct ArtifactRuntimeSession with IntegrationHost.tensor_surface." ), ) @staticmethod def _require_target_device(target_device: Any | None) -> torch.device: if target_device is None: - raise ServingIntegrationError( - "ServingIntegration request requires target_device" + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration request requires target_device" ) return torch.device(target_device) @@ -4247,7 +4189,7 @@ def _runtime_policy_with_placement( if digest is None: return policy if policy is None: - return ServingRuntimePolicy( + return RuntimeArtifactPolicy( require_manifest=True, expected_topology_admission_digest=digest, ) @@ -4279,11 +4221,11 @@ def _json_object_payload(value: Any, *, field_name: str) -> Any: payload = json.loads(str(value)) except Exception as exc: raise ManifestMismatchError( - f"TensorCast serving artifact {field_name} is invalid JSON" + f"TensorCast runtime artifact {field_name} is invalid JSON" ) from exc if not isinstance(payload, dict): raise ManifestMismatchError( - f"TensorCast serving artifact {field_name} must be a JSON object" + f"TensorCast runtime artifact {field_name} must be a JSON object" ) return payload @@ -4308,12 +4250,12 @@ def _validate_resolved_artifact_placement( if manifest_topology_digest is not None: if placement_topology_digest is None: raise ManifestMismatchError( - "TensorCast serving artifact topology admission digest " + "TensorCast runtime artifact topology admission digest " "requires current framework placement" ) if manifest_topology_digest != placement_topology_digest: raise ManifestMismatchError( - "TensorCast serving artifact topology admission digest " + "TensorCast runtime artifact topology admission digest " "mismatch: " f"manifest={manifest_topology_digest}, " f"current={placement_topology_digest}" @@ -4326,7 +4268,7 @@ def _validate_resolved_artifact_placement( return if placement is None: raise ManifestMismatchError( - "TensorCast serving artifact logical topology requires current " + "TensorCast runtime artifact logical topology requires current " "framework placement" ) try: @@ -4336,7 +4278,7 @@ def _validate_resolved_artifact_placement( ) except Exception as exc: raise ManifestMismatchError( - "TensorCast serving artifact logical topology could not be " + "TensorCast runtime artifact logical topology could not be " "computed from current framework placement" ) from exc if cls._json_object_payload( @@ -4345,7 +4287,7 @@ def _validate_resolved_artifact_placement( current_logical_topology, field_name="current logical topology" ): raise ManifestMismatchError( - "TensorCast serving artifact logical topology mismatch" + "TensorCast runtime artifact logical topology mismatch" ) def _prepare_model_construction( @@ -4407,15 +4349,15 @@ def support_level( self, model: object, model_config: object, - ) -> ServingSupportLevel: + ) -> RuntimeSupportLevel: host = self._framework_host() support_level = getattr(host, "support_level", None) if callable(support_level): - return tc_readiness.coerce_serving_support_level( + return tc_readiness.coerce_runtime_support_level( support_level(model, model_config), - default=ServingSupportLevel.BLOCKED, + default=RuntimeSupportLevel.BLOCKED, ) - return ServingSupportLevel.BLOCKED + return RuntimeSupportLevel.BLOCKED def process_after_load_class( self, @@ -4465,7 +4407,7 @@ def trace_model_load( trace = getattr(host, "trace_model_load", None) if not callable(trace): raise _capability_missing( - "ServingIntegration host requires RecipeTraceHost." + "ArtifactRuntimeIntegration host requires RecipeTraceHost." "trace_model_load on recipe cache miss", level="level2-local-bootstrap", capability="recipe_trace", @@ -4514,7 +4456,7 @@ def native_load_weights(self, model: object, weights: object) -> None: native_load = getattr(host, "native_load_weights", None) if not callable(native_load): raise _capability_missing( - "ServingIntegration host requires NativeLoadHost for native " + "ArtifactRuntimeIntegration host requires NativeLoadHost for native " "checkpoint/source loading", level="level2-local-bootstrap", capability="native_load", @@ -4560,13 +4502,13 @@ def _assert_tensor_names_match_expected( if not missing and not unexpected: return raise SchemaMismatchError( - "TensorCast runtime tensor set does not match serving artifact: " + "TensorCast runtime tensor set does not match runtime artifact: " f"missing_count={len(missing)}, unexpected_count={len(unexpected)}" ) def _load_materialization_options( self, - request: _DirectServingLoad, + request: _DirectRuntimeLoad, resolved: Any, ) -> Any | None: if request.materialization is not None: @@ -4579,8 +4521,8 @@ def _load_materialization_options( or execution_facts is None ): if request.require_materialization_options: - raise ServingIntegrationError( - "ServingIntegration._load_existing_serving_artifact requires " + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration._load_existing_runtime_artifact requires " "materialization execution context for direct bind" ) return None @@ -4589,8 +4531,8 @@ def _load_materialization_options( "source_bound_contract_ready", False, ): - raise ServingIntegrationError( - "ServingIntegration._load_existing_serving_artifact requires ready " + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration._load_existing_runtime_artifact requires ready " "source-bound contract state for direct bind" ) manifest = getattr(resolved, "manifest", None) @@ -4607,7 +4549,7 @@ def _load_materialization_options( def _reload_materialization_options( self, - request: _ServingReload, + request: _RuntimeReload, resolved: Any, ) -> Any | None: if request.materialization is not None: @@ -4620,8 +4562,8 @@ def _reload_materialization_options( or execution_facts is None ): if request.require_materialization_options: - raise ServingIntegrationError( - "ServingIntegration._reload_existing_serving_artifact requires " + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration._reload_existing_runtime_artifact requires " "materialization execution context for swap" ) return None @@ -4630,8 +4572,8 @@ def _reload_materialization_options( "source_bound_contract_ready", False, ): - raise ServingIntegrationError( - "ServingIntegration._reload_existing_serving_artifact requires ready " + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration._reload_existing_runtime_artifact requires ready " "source-bound contract state for swap" ) manifest = getattr(resolved, "manifest", None) @@ -4656,19 +4598,19 @@ def _reload_materialization_options( def _resolved_artifact( self, *, - resolved_artifact: ResolvedServingArtifact | None, + resolved_artifact: ResolvedRuntimeArtifact | None, artifact_ref: str | None, artifact_locator: Any | None, expected_tensor_schema_hash: str | None, - serving_runtime_policy: Any | None, - placement: ServingPlacement | None = None, - ) -> ResolvedServingArtifact: + runtime_artifact_policy: Any | None, + placement: RuntimePlacement | None = None, + ) -> ResolvedRuntimeArtifact: if resolved_artifact is not None: if artifact_ref is not None and str(resolved_artifact.artifact_ref) != str( artifact_ref ): raise ManifestMismatchError( - "TensorCast resolved serving artifact ref mismatch: " + "TensorCast resolved runtime artifact ref mismatch: " f"resolved={resolved_artifact.artifact_ref}, " f"requested={artifact_ref}" ) @@ -4677,11 +4619,11 @@ def _resolved_artifact( placement=placement, ) if self.resolver is not None and expected_tensor_schema_hash: - return cross_check_serving_artifact( + return cross_check_runtime_artifact( resolved_artifact, resolver=self.resolver, expected_tensor_schema_hash=expected_tensor_schema_hash, - serving_runtime_policy=serving_runtime_policy, + runtime_artifact_policy=runtime_artifact_policy, ) return resolved_artifact resolved_ref = artifact_ref @@ -4697,15 +4639,15 @@ def _resolved_artifact( else: resolved_ref = str(artifact_locator) if not resolved_ref: - raise ServingIntegrationError( - "ServingIntegration request requires resolved_artifact, " + raise ArtifactRuntimeIntegrationError( + "ArtifactRuntimeIntegration request requires resolved_artifact, " "artifact_ref, or artifact_locator" ) - resolved = resolve_serving_artifact( + resolved = resolve_runtime_artifact( str(resolved_ref), resolver=self.resolver, expected_tensor_schema_hash=expected_tensor_schema_hash, - serving_runtime_policy=serving_runtime_policy, + runtime_artifact_policy=runtime_artifact_policy, ) self._validate_resolved_artifact_placement( resolved, @@ -4713,26 +4655,26 @@ def _resolved_artifact( ) return resolved - def _preflight_serving_artifact( + def _preflight_runtime_artifact( self, *, - resolved_artifact: ResolvedServingArtifact | None, + resolved_artifact: ResolvedRuntimeArtifact | None, artifact_ref: str | None, artifact_locator: Any | None, expected_tensor_schema_hash: str | None, policy: Any | None, - placement: ServingPlacement | None = None, - ) -> _ServingArtifactPreflight: + placement: RuntimePlacement | None = None, + ) -> _RuntimeArtifactPreflight: base_policy = self._runtime_policy(policy) resolved = self._resolved_artifact( resolved_artifact=resolved_artifact, artifact_ref=artifact_ref, artifact_locator=artifact_locator, expected_tensor_schema_hash=None, - serving_runtime_policy=None, + runtime_artifact_policy=None, placement=placement, ) - serving_runtime_policy = self._runtime_policy_from_manifest( + runtime_artifact_policy = self._runtime_policy_from_manifest( base_policy, resolved, placement=placement, @@ -4743,12 +4685,12 @@ def _preflight_serving_artifact( artifact_ref=artifact_ref, artifact_locator=artifact_locator, expected_tensor_schema_hash=expected_tensor_schema_hash, - serving_runtime_policy=serving_runtime_policy, + runtime_artifact_policy=runtime_artifact_policy, placement=placement, ) - return _ServingArtifactPreflight( + return _RuntimeArtifactPreflight( resolved_artifact=resolved, - serving_runtime_policy=serving_runtime_policy, + runtime_artifact_policy=runtime_artifact_policy, ) def _framework_context( @@ -4760,7 +4702,7 @@ def _framework_context( placement = None if self.host is not None: try: - placement = self._host_serving_placement(framework_config) + placement = self._host_runtime_placement(framework_config) except Exception: placement = None return FrameworkIntegrationContext( @@ -4774,13 +4716,13 @@ def _framework_context( def _materializer(self) -> RuntimeBindingMaterialization: if self.host is None: raise _capability_missing( - "ServingIntegration runtime materialization requires IntegrationHost", + "ArtifactRuntimeIntegration runtime materialization requires IntegrationHost", level="level1-runtime", capability="integration_host", operation="runtime_materialization", required_methods=("framework", "placement", "tensor_surface"), next_action=( - "Construct ServingRuntimeSession with an IntegrationHost " + "Construct ArtifactRuntimeSession with an IntegrationHost " "instead of calling lifecycle helpers without host facts." ), ) @@ -4791,14 +4733,14 @@ def _materializer(self) -> RuntimeBindingMaterialization: @staticmethod def _state_seed( - resolved: ResolvedServingArtifact, + resolved: ResolvedRuntimeArtifact, *, tensor_schema_hash: str, execution_diagnostics: Any | None, materialization_diagnostics: Any | None = None, binding_handle: Any | None = None, artifact_realization_report: ArtifactRealizationReport | None = None, - readiness: str = "serving", + readiness: str = "runtime_ready", ) -> RuntimeStateSeed: artifact_ref = str(getattr(resolved, "artifact_ref", "") or "") manifest = getattr(resolved, "manifest", None) @@ -4837,23 +4779,23 @@ def _state_seed( ) -def resolve_serving_artifact( +def resolve_runtime_artifact( artifact_ref: str, *, - resolver: ServingArtifactResolver | None = None, + resolver: RuntimeArtifactResolver | None = None, manifest_tensor_name: str | None = None, schema_version: int | None = None, expected_tensor_schema_hash: str | None = None, - serving_runtime_policy: Any | None = None, -) -> ResolvedServingArtifact: - """Resolve a serving artifact and optionally cross-check runtime schema.""" + runtime_artifact_policy: Any | None = None, +) -> ResolvedRuntimeArtifact: + """Resolve a runtime artifact and optionally cross-check runtime schema.""" - resolved_resolver = resolver or ServingArtifactResolver( + resolved_resolver = resolver or RuntimeArtifactResolver( manifest_tensor_name=manifest_tensor_name or tc.SERVING_MANIFEST_TENSOR_NAME, schema_version=( schema_version if schema_version is not None - else int(tc.ServingArtifactManifest.model_fields["schema_version"].default) + else int(tc.RuntimeArtifactManifest.model_fields["schema_version"].default) ), ) resolved = resolved_resolver.resolve(str(artifact_ref)) @@ -4861,65 +4803,65 @@ def resolve_serving_artifact( resolved_resolver.cross_check( resolved, expected_tensor_schema_hash=expected_tensor_schema_hash, - serving_runtime_policy=serving_runtime_policy, + runtime_artifact_policy=runtime_artifact_policy, ) return resolved -def read_serving_artifact_manifest( +def read_runtime_artifact_manifest( artifact: Any, *, artifact_ref: str, - resolver: ServingArtifactResolver, -) -> ResolvedServingArtifact: - """Read a serving manifest from an already opened artifact handle.""" + resolver: RuntimeArtifactResolver, +) -> ResolvedRuntimeArtifact: + """Read a runtime manifest from an already opened artifact handle.""" return resolver.read_manifest(artifact, artifact_ref=str(artifact_ref)) -def cross_check_serving_artifact( - resolved_artifact: ResolvedServingArtifact, +def cross_check_runtime_artifact( + resolved_artifact: ResolvedRuntimeArtifact, *, - resolver: ServingArtifactResolver, + resolver: RuntimeArtifactResolver, expected_tensor_schema_hash: str, - serving_runtime_policy: Any | None = None, -) -> ResolvedServingArtifact: + runtime_artifact_policy: Any | None = None, +) -> ResolvedRuntimeArtifact: """Validate manifest, descriptor schema, and runtime policy agreement.""" return resolver.cross_check( resolved_artifact, expected_tensor_schema_hash=expected_tensor_schema_hash, - serving_runtime_policy=serving_runtime_policy, + runtime_artifact_policy=runtime_artifact_policy, ) @dataclass(frozen=True) -class ServingRuntimeSession: - """Config-planned serving runtime lifecycle entrypoint.""" +class ArtifactRuntimeSession: + """Config-planned artifact runtime lifecycle entrypoint.""" - serving_config: ServingConfig + runtime_config: TensorCastRuntimeConfig host: IntegrationHost - integration: ServingIntegration + integration: ArtifactRuntimeIntegration profile_sink: Any | None = None @classmethod def from_config( cls, - serving_config: ServingConfig | Mapping[str, Any], + runtime_config: TensorCastRuntimeConfig | Mapping[str, Any], *, host: IntegrationHost, - resolver: ServingArtifactResolver | None = None, + resolver: RuntimeArtifactResolver | None = None, profile_sink: Any | None = None, - ) -> "ServingRuntimeSession": + ) -> "ArtifactRuntimeSession": config = ( - serving_config - if isinstance(serving_config, ServingConfig) - else ServingConfig.from_mapping(serving_config) + runtime_config + if isinstance(runtime_config, TensorCastRuntimeConfig) + else TensorCastRuntimeConfig.from_mapping(runtime_config) ) return cls( - serving_config=config, + runtime_config=config, host=host, - integration=ServingIntegration( + integration=ArtifactRuntimeIntegration( resolver=resolver, profile_sink=profile_sink, host=host, @@ -4943,7 +4885,10 @@ def publish_current_replica( del context return tc_replica_publication.publish_current_replica( current_attachment=current_attachment, - policy=self._replica_publication_policy(policy), + policy=tc_replica_publication.replica_publication_policy( + policy, + default_policy=self.runtime_config.replica_publication, + ), ensure_runtime_initialized=self._ensure_runtime_initialized, profile_sink=self.profile_sink, ) @@ -4986,7 +4931,7 @@ def retire_current_replica( reason=reason, drain_timeout_s=drain_timeout_s, default_drain_timeout_s=( - self.serving_config.replica_publication.drain_timeout_s + self.runtime_config.replica_publication.drain_timeout_s ), ensure_runtime_initialized=self._ensure_runtime_initialized, profile_sink=self.profile_sink, @@ -4994,10 +4939,10 @@ def retire_current_replica( def _start_intent( self, - intent: ServingIntent, + intent: RuntimeIntent, context: RequestContext, ) -> RuntimeAttachment: - """Private/admin entrypoint for already lowered serving intents.""" + """Private/admin entrypoint for already lowered runtime intents.""" self._ensure_runtime_initialized() return self.integration.start(intent, context) @@ -5006,20 +4951,20 @@ def reload( self, *, current_attachment: RuntimeAttachment | RuntimeBindingState | Any, - artifact_locator: ServingArtifactLocator, - policy: ServingPolicy | None, + artifact_locator: ArtifactLocator, + policy: RuntimePolicy | None, context: RequestContext, model: object | None = None, contract_identity: str | None = None, ) -> RuntimeAttachment: self._reject_local_reload_artifact_locator(artifact_locator) - if not isinstance(artifact_locator, ServingArtifactLocator): + if not isinstance(artifact_locator, ArtifactLocator): raise ConfigConflictError( - "TensorCast serving reload requires a ServingArtifactLocator" + "TensorCast runtime artifact reload requires an ArtifactLocator" ) - if policy is not None and not isinstance(policy, ServingPolicy): + if policy is not None and not isinstance(policy, RuntimePolicy): raise ConfigConflictError( - "TensorCast serving reload requires a ServingPolicy or None" + "TensorCast runtime artifact reload requires a RuntimePolicy or None" ) if isinstance(current_attachment, RuntimeAttachment): self._reject_reload_with_active_publication(current_attachment) @@ -5034,7 +4979,7 @@ def reload( ) return self.integration.reload( current_state, - ExistingServingArtifact(artifact_locator=artifact_locator, policy=policy), + ExistingRuntimeArtifact(artifact_locator=artifact_locator, policy=policy), context, model=runtime_model, contract_identity=contract_identity, @@ -5049,17 +4994,7 @@ def describe( return self.integration.describe(attachment_or_state) def _ensure_runtime_initialized(self) -> None: - self.serving_config.runtime.ensure_initialized() - - def _replica_publication_policy( - self, - policy: ReplicaPublicationPolicy | Mapping[str, Any] | None, - ) -> ReplicaPublicationPolicy: - if policy is None: - return self.serving_config.replica_publication - if isinstance(policy, ReplicaPublicationPolicy): - return policy - return ReplicaPublicationPolicy.model_validate(dict(policy)) + self.runtime_config.runtime.ensure_initialized() @staticmethod def _reject_reload_with_active_publication( @@ -5067,11 +5002,11 @@ def _reject_reload_with_active_publication( ) -> None: tc_replica_publication.reject_reload_with_active_publication(current_attachment) - def _plan_start_intent(self, context: RequestContext) -> ServingIntent: + def _plan_start_intent(self, context: RequestContext) -> RuntimeIntent: source_selector = self._source_selector_from_context(context) expected_member = None if ( - self.serving_config.retained_binding_acquire.mode == "external" + self.runtime_config.retained_binding_acquire.mode == "external" and self.host is not None ): placement = self.integration._framework_context( @@ -5081,28 +5016,28 @@ def _plan_start_intent(self, context: RequestContext) -> ServingIntent: if placement is not None: expected_member = placement.member try: - plan = tc_config.plan_serving_start( - config=self.serving_config, + plan = tc_runtime_config.plan_runtime_start( + config=self.runtime_config, source_selector=source_selector, expected_member=expected_member, ) - except tc_config.ServingStartPlanError as exc: + except tc_runtime_config.RuntimeStartPlanError as exc: raise ConfigConflictError(str(exc)) from exc - if isinstance(plan, tc_config.RetainedBindingAcquireStartPlan): + if isinstance(plan, tc_runtime_config.RuntimeRetainedRealizationStartPlan): return RetainedBindingAcquire(plan.authority) - if isinstance(plan, tc_config.ArtifactBindStartPlan): - return ExistingServingArtifact( + if isinstance(plan, tc_runtime_config.RuntimeArtifactBindStartPlan): + return ExistingRuntimeArtifact( artifact_locator=plan.artifact_locator, policy=plan.policy, ) - if isinstance(plan, tc_config.SourceBootstrapToBindingStartPlan): + if isinstance(plan, tc_runtime_config.RuntimeSourceBootstrapStartPlan): return LocalSourceBootstrap( source_selector=plan.source_selector, bootstrap_policy=plan.bootstrap_policy, ) raise ConfigConflictError( - f"TensorCast serving planner returned unsupported plan: {plan!r}" + f"TensorCast runtime planner returned unsupported plan: {plan!r}" ) def _source_selector_from_context( @@ -5130,46 +5065,46 @@ def _reject_local_reload_artifact_locator(artifact_locator: object) -> None: or _artifact_locator_kind(artifact_locator) == "local_path" ): raise ConfigConflictError( - "TensorCast serving reload requires a durable serving " + "TensorCast runtime artifact reload requires a durable runtime " "artifact locator, not a local source selector" ) -def bind_serving_artifact( +def bind_runtime_artifact( *, - resolved_artifact: ResolvedServingArtifact, + resolved_artifact: ResolvedRuntimeArtifact, tensor_names: Sequence[str], device: Any, - serving_runtime_policy: Any | None, + runtime_artifact_policy: Any | None, options: Any | None, ) -> RuntimeBindingResult: - """Bind a durable serving artifact and return an attach-ready result.""" + """Bind a durable runtime artifact and return an attach-ready result.""" - binding = tc_binding_runtime.bind_serving_artifact( + binding = tc_binding_runtime.bind_runtime_artifact( resolved_artifact=resolved_artifact, tensor_names=tuple(tensor_names), device=device, - serving_runtime_policy=serving_runtime_policy, + runtime_artifact_policy=runtime_artifact_policy, options=options, ) return RuntimeBindingResult.from_binding(binding) -def swap_serving_artifact( +def swap_runtime_artifact( *, binding: Any, - resolved_artifact: ResolvedServingArtifact, + resolved_artifact: ResolvedRuntimeArtifact, tensor_names: Sequence[str] | None = None, - serving_runtime_policy: Any | None, + runtime_artifact_policy: Any | None, options: Any | None, ) -> RuntimeBindingResult: - """Swap an existing runtime binding to another serving artifact.""" + """Swap an existing runtime binding to another runtime artifact.""" - operation_result = tc_binding_runtime.swap_serving_artifact( + operation_result = tc_binding_runtime.swap_runtime_artifact( binding=binding, resolved_artifact=resolved_artifact, tensor_names=tensor_names, - serving_runtime_policy=serving_runtime_policy, + runtime_artifact_policy=runtime_artifact_policy, options=options, ) result_binding = operation_result if operation_result is not None else binding @@ -5179,304 +5114,3 @@ def swap_serving_artifact( result_binding, operation_result=operation_result, ) - - -@contextmanager -def restore_retained_binding( - *, - authority: tc_retained_binding.ParsedRetainedServingBindingAuthority | None = None, - local_serving_ref: str | None = None, - target_device: torch.device | str, - expected_member: tc.ServingBindingMemberRef | None = None, - expected_tensor_schema_hash: str | None = None, - expected_serving_build_digest: str | None = None, - expected_target_layout_hash: str | None = None, - expected_daemon_id: str | None = None, - expected_daemon_session_id: str | None = None, - serving_artifact_id: str | None = None, - caller_pid: int | None = None, - runtime: Any | None = None, - client: Any | None = None, - restore_fn: Any | None = None, - timeout_s: float | None = None, -) -> Iterator[RestoredRetainedBinding]: - """Acquire and restore a retained binding value for framework attach. - - If the framework does not call ``transfer_to_runtime()``, the restored owner - is released automatically when the context exits. After transfer, close - ownership belongs to the returned runtime handle. - """ - - with tc_retained_binding.acquire_retained_serving_binding( - authority=authority, - local_serving_ref=local_serving_ref, - target_device=target_device, - expected_member=expected_member, - expected_tensor_schema_hash=expected_tensor_schema_hash, - expected_serving_build_digest=expected_serving_build_digest, - expected_target_layout_hash=expected_target_layout_hash, - expected_daemon_id=expected_daemon_id, - expected_daemon_session_id=expected_daemon_session_id, - serving_artifact_id=serving_artifact_id, - caller_pid=caller_pid if caller_pid is not None else os.getpid(), - runtime=runtime, - client=client, - timeout_s=timeout_s, - ) as lease: - attached = lease.restore( - target_device=torch.device(target_device), - restore_fn=restore_fn, - ) - restored = RestoredRetainedBinding(attached) - try: - yield restored - finally: - restored.close() - - -@contextmanager -def restore_prepared_local_ready_binding( - *, - resolved_artifact: ResolvedServingArtifact, - target_device: torch.device | str, - expected_member: tc.ServingBindingMemberRef, - expected_tensor_schema_hash: str, - expected_serving_build_digest: str | None = None, - caller_pid: int | None = None, - timeout_s: float | None = None, - runtime: Any | None = None, - client: Any | None = None, - restore_fn: Any | None = None, -) -> Iterator[RestoredRetainedBinding]: - """Restore a local-ready retained value referenced by a serving manifest.""" - - manifest = resolved_artifact.manifest - local_serving_ref = getattr(manifest, "local_serving_ref", None) - if manifest is None or not local_serving_ref: - raise RuntimeError( - "TensorCast prepared local-ready startup requires local_serving_ref " - "in the serving artifact manifest" - ) - serving_build_digest = ( - expected_serving_build_digest - if expected_serving_build_digest is not None - else getattr(manifest, "serving_build_digest", None) - ) - if not serving_build_digest: - raise RuntimeError( - "TensorCast prepared local-ready startup requires serving_build_digest" - ) - with restore_retained_binding( - local_serving_ref=str(local_serving_ref), - target_device=target_device, - expected_member=expected_member, - expected_tensor_schema_hash=expected_tensor_schema_hash, - expected_serving_build_digest=str(serving_build_digest), - serving_artifact_id=str(resolved_artifact.artifact_ref), - caller_pid=caller_pid, - timeout_s=timeout_s, - runtime=runtime, - client=client, - restore_fn=restore_fn, - ) as restored: - yield restored - - -def evaluate_semantic_validation_spec(spec: Any, actual_payload: Any) -> Any: - return tc_semantic_validation.evaluate_semantic_validation_spec( - spec, actual_payload - ) - - -def validate_tensor_schema_against_tensors( - tensor_schema: Any, - tensors: Mapping[str, torch.Tensor], -) -> None: - tc_tensor_schema.validate_tensor_schema_against_tensors(tensor_schema, tensors) - - -def collect_runtime_tensor_schema( - tensors: Mapping[str, torch.Tensor], - *, - remove_duplicate: bool, -) -> Any: - return tc_contract.collect_runtime_tensor_schema( - tensors, - remove_duplicate=remove_duplicate, - ) - - -def compute_runtime_tensor_schema_hash(schema: Any) -> str: - return tc_contract.compute_runtime_tensor_schema_hash(schema) - - -def compute_runtime_representation_contract_hash(**kwargs: Any) -> str: - return tc_contract.compute_runtime_representation_contract_hash(**kwargs) - - -def compute_serving_tensor_schema_hash(*args: Any, **kwargs: Any) -> str: - return tc.compute_serving_tensor_schema_hash(*args, **kwargs) - - -def canonical_index_from_recipe(recipe: Any) -> Any: - return tc_local_ready.canonical_index_from_recipe(recipe) - - -def materialized_tensor_schema(recipe: Any) -> Any: - return tc_local_ready.materialized_tensor_schema(recipe) - - -def prepare_same_binding_manifest_carrier(*args: Any, **kwargs: Any) -> Any: - return tc_local_ready.prepare_same_binding_manifest_carrier(*args, **kwargs) - - -def compute_serving_binding_tensor_schema_hash(*args: Any, **kwargs: Any) -> str: - return tc_local_ready.compute_serving_binding_tensor_schema_hash(*args, **kwargs) - - -def prepare_local_ready_serving(*args: Any, **kwargs: Any) -> Any: - return tc_local_ready.prepare_local_ready_serving(*args, **kwargs) - - -def freeze_local_ready_binding(*args: Any, **kwargs: Any) -> Any: - return tc_local_ready.freeze_local_ready_binding(*args, **kwargs) - - -def tensorcast_view_slice_count(recipe: Any) -> int: - return tc_local_ready.tensorcast_view_slice_count(recipe) - - -def compiled_recipe_realization_plan_count(recipe: Any) -> int: - return tc_local_ready.compiled_recipe_realization_plan_count(recipe) - - -def binding_value_verification_state_name(value: Any) -> str: - return tc_local_ready.binding_value_verification_state_name(value) - - -def logical_topology_json_from_recipe(*args: Any, **kwargs: Any) -> Any: - return tc_local_ready.logical_topology_json_from_recipe(*args, **kwargs) - - -def publication_context_from_recipe(*args: Any, **kwargs: Any) -> Any: - return tc_local_ready.publication_context_from_recipe(*args, **kwargs) - - -def resolve_source_artifact_ref(*args: Any, **kwargs: Any) -> Any: - return tc_source_catalog.resolve_source_artifact_ref(*args, **kwargs) - - -def source_catalog_from_selected_safetensors(*args: Any, **kwargs: Any) -> Any: - return tc_source_catalog.source_catalog_from_selected_safetensors(*args, **kwargs) - - -def compute_trace_build_cache_key(*args: Any, **kwargs: Any) -> str: - return tc_recipe_build.compute_trace_cache_key(*args, **kwargs) - - -def compute_recipe_build_cache_key(*args: Any, **kwargs: Any) -> str: - return tc_recipe_build.compute_recipe_cache_key(*args, **kwargs) - - -def trace_build_cache_path(*args: Any, **kwargs: Any) -> str: - return tc_recipe_build.trace_cache_path(*args, **kwargs) - - -def recipe_build_cache_path(*args: Any, **kwargs: Any) -> str: - return tc_recipe_build.recipe_cache_path(*args, **kwargs) - - -def stable_recipe_build_hash(*args: Any, **kwargs: Any) -> str: - return tc_recipe_build.stable_recipe_build_hash(*args, **kwargs) - - -def load_trace_plan_cache(*args: Any, **kwargs: Any) -> Any: - return tc_trace_cache.load_trace_plan_cache(*args, **kwargs) - - -def write_trace_plan_cache(*args: Any, **kwargs: Any) -> None: - tc_trace_cache.write_trace_plan_cache(*args, **kwargs) - - -def dump_trace_plan_debug(*args: Any, **kwargs: Any) -> None: - tc_trace_cache.dump_trace_plan_debug(*args, **kwargs) - - -def load_compiled_recipe_cache(*args: Any, **kwargs: Any) -> Any: - return tc_recipe_cache.load_compiled_recipe_cache(*args, **kwargs) - - -def write_compiled_recipe_cache(*args: Any, **kwargs: Any) -> None: - tc_recipe_cache.write_compiled_recipe_cache(*args, **kwargs) - - -def compute_recipe_compile_key(*args: Any, **kwargs: Any) -> str: - return tc_compiler.compute_recipe_compile_key(*args, **kwargs) - - -def compute_recipe_compile_key_from_inputs(*args: Any, **kwargs: Any) -> str: - return tc_compiler.compute_recipe_compile_key(*args, **kwargs) - - -def compile_recipe_from_inputs(*args: Any, **kwargs: Any) -> Any: - return tc_compiler.compile_serving_recipe(*args, **kwargs) - - -def allocate_tensors_from_schema(*args: Any, **kwargs: Any) -> Any: - return tc_materialization.allocate_tensors_from_schema(*args, **kwargs) - - -def apply_copy_plan(*args: Any, **kwargs: Any) -> Any: - return tc_materialization.apply_copy_plan(*args, **kwargs) - - -def tensorcast_view_slices_from_trace_plan(*args: Any, **kwargs: Any) -> Any: - return tc_materialization.tensorcast_view_slices_from_trace_plan(*args, **kwargs) - - -def validate_dst_coverage(*args: Any, **kwargs: Any) -> None: - tc_materialization.validate_dst_coverage(*args, **kwargs) - - -def validate_source_tensor_names(*args: Any, **kwargs: Any) -> None: - tc_materialization.validate_source_tensor_names(*args, **kwargs) - - -def validate_recipe_for_builder_mode(*args: Any, **kwargs: Any) -> None: - tc_recipe_validation.validate_recipe_for_builder_mode(*args, **kwargs) - - -def build_pure_transform_build_intent(*args: Any, **kwargs: Any) -> Any: - return tc_publication.build_pure_transform_build_intent(*args, **kwargs) - - -def complete_pure_transform_publication(*args: Any, **kwargs: Any) -> Any: - return tc.complete_pure_transform_publication(*args, **kwargs) - - -def build_materialization_execution_context(*args: Any, **kwargs: Any) -> Any: - return tc_binding_runtime.build_materialization_execution_context(*args, **kwargs) - - -def retained_binding_acquire_mode(*args: Any, **kwargs: Any) -> str: - return tc_retained_binding.retained_binding_acquire_mode(*args, **kwargs) - - -def retained_serving_binding_trusted_reservation_bytes( - *args: Any, **kwargs: Any -) -> int: - return tc_retained_binding.retained_serving_binding_trusted_reservation_bytes( - *args, **kwargs - ) - - -def retained_serving_binding_extra_from_prefetched_binding( - *args: Any, **kwargs: Any -) -> Any: - return tc_retained_binding.retained_serving_binding_extra_from_prefetched_binding( - *args, **kwargs - ) - - -def parse_retained_serving_binding_authority(*args: Any, **kwargs: Any) -> Any: - return tc_retained_binding.parse_retained_serving_binding_authority(*args, **kwargs) diff --git a/tensorcast/artifact_runtime/locator.py b/tensorcast/artifact_runtime/locator.py new file mode 100644 index 00000000..5e0c04c4 --- /dev/null +++ b/tensorcast/artifact_runtime/locator.py @@ -0,0 +1,149 @@ +# Copyright (c) 2026, TensorCast Team. + +"""Artifact runtime locator schema and resolution helpers.""" + +from __future__ import annotations + +from collections.abc import Mapping +from typing import Any +from urllib.parse import quote + +from pydantic import BaseModel, ConfigDict, field_validator + +_ARTIFACT_LOCATOR_KINDS = {"version_key", "artifact_ref", "ranked_version_key"} +ARTIFACT_LOCATOR_SCHEMA_VERSION = 1 +RANKED_VERSION_KEY_MEMBER_SEGMENT = "members" + + +def _normalize_optional_text(value: Any) -> str | None: + if value is None: + return None + normalized = str(value).strip() + return normalized or None + + +def _normalize_enum(value: Any, *, allowed: set[str], field_name: str) -> str: + normalized = str(value).strip().lower() + if normalized not in allowed: + raise ValueError( + f"{field_name} must be one of {sorted(allowed)}, got: {value!r}" + ) + return normalized + + +def _member_id_from_ref(member: Any) -> str: + if member is None: + raise ValueError( + "ranked_version_key artifact locator resolution requires a member" + ) + if isinstance(member, Mapping): + member_id = member.get("member_id") + else: + member_id = getattr(member, "member_id", None) + normalized = _normalize_optional_text(member_id) + if normalized is None: + raise ValueError( + "ranked_version_key artifact locator resolution requires member.member_id" + ) + return normalized + + +def _member_from_placement(placement: Any | None) -> Any | None: + if placement is None: + return None + if isinstance(placement, Mapping): + return placement.get("member") + return getattr(placement, "member", None) + + +def ranked_version_key_for_member(version_key: str, member: Any) -> str: + base_key = _normalize_optional_text(version_key) + if base_key is None: + raise ValueError("ranked_version_key base value is required") + member_id = quote(_member_id_from_ref(member), safe=":._-") + return f"{base_key.rstrip('/')}/{RANKED_VERSION_KEY_MEMBER_SEGMENT}/{member_id}" + + +class ArtifactLocator(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + + kind: str + value: str + schema_version: int = ARTIFACT_LOCATOR_SCHEMA_VERSION + + @field_validator("kind", mode="before") + @classmethod + def _normalize_kind(cls, value: Any) -> str: + return _normalize_enum( + value, + allowed=_ARTIFACT_LOCATOR_KINDS, + field_name="artifact_locator.kind", + ) + + @field_validator("value", mode="before") + @classmethod + def _normalize_value(cls, value: Any) -> str: + normalized = _normalize_optional_text(value) + if normalized is None: + raise ValueError("artifact_locator.value is required") + return normalized + + @classmethod + def artifact_ref(cls, artifact_ref: str) -> ArtifactLocator: + return cls(kind="artifact_ref", value=str(artifact_ref)) + + @classmethod + def version_key(cls, version_key: str) -> ArtifactLocator: + return cls(kind="version_key", value=str(version_key)) + + @classmethod + def ranked_version_key(cls, version_key: str) -> ArtifactLocator: + return cls(kind="ranked_version_key", value=str(version_key)) + + def resolve_version_key( + self, + *, + member: Any | None = None, + placement: Any | None = None, + ) -> str: + if self.kind == "artifact_ref": + return self.value + if self.kind == "ranked_version_key": + if member is None: + member = _member_from_placement(placement) + return ranked_version_key_for_member(self.value, member) + return self.value + + def resolve_artifact_ref( + self, + *, + member: Any | None = None, + placement: Any | None = None, + ) -> str: + if self.kind == "artifact_ref": + return self.value + + from tensorcast.api.store import get_runtime_context + + resolved_mapping = get_runtime_context().resolve_key_mapping_cached( + key=self.resolve_version_key(member=member, placement=placement) + ) + artifact_id = ( + resolved_mapping[0] + if isinstance(resolved_mapping, tuple) + else getattr(resolved_mapping, "artifact_id", None) + ) + if not artifact_id: + raise ValueError( + "artifact locator version key did not resolve to an artifact: " + f"{self.value!r}" + ) + return artifact_id + + +__all__ = [ + "ARTIFACT_LOCATOR_SCHEMA_VERSION", + "ArtifactLocator", + "RANKED_VERSION_KEY_MEMBER_SEGMENT", + "ranked_version_key_for_member", +] diff --git a/tensorcast/artifact_runtime/policy.py b/tensorcast/artifact_runtime/policy.py new file mode 100644 index 00000000..afafc88b --- /dev/null +++ b/tensorcast/artifact_runtime/policy.py @@ -0,0 +1,161 @@ +# Copyright (c) 2026, TensorCast Team. + +"""Artifact runtime policy schema and reload request helpers.""" + +from __future__ import annotations + +from collections.abc import Mapping +from typing import Any + +from pydantic import BaseModel, ConfigDict, field_validator, model_validator + +from tensorcast.artifact_runtime.locator import ArtifactLocator +from tensorcast.types import RuntimeArtifactPolicy + +_POLICY_MODES = {"from_manifest", "pinned"} +RUNTIME_POLICY_SCHEMA_VERSION = 1 + + +def _normalize_optional_text(value: Any) -> str | None: + if value is None: + return None + normalized = str(value).strip() + return normalized or None + + +def _normalize_enum(value: Any, *, allowed: set[str], field_name: str) -> str: + normalized = str(value).strip().lower() + if normalized not in allowed: + raise ValueError( + f"{field_name} must be one of {sorted(allowed)}, got: {value!r}" + ) + return normalized + + +class RuntimePolicy(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + + mode: str = "from_manifest" + manifest_ref: str | None = None + representation_contract_hash: str | None = None + serving_build_digest: str | None = None + schema_version: int = RUNTIME_POLICY_SCHEMA_VERSION + + @field_validator("mode", mode="before") + @classmethod + def _normalize_mode(cls, value: Any) -> str: + if value is None: + return "from_manifest" + return _normalize_enum( + value, + allowed=_POLICY_MODES, + field_name="runtime.policy.mode", + ) + + @field_validator( + "manifest_ref", + "representation_contract_hash", + "serving_build_digest", + mode="before", + ) + @classmethod + def _normalize_optional_fields(cls, value: Any) -> Any: + return _normalize_optional_text(value) + + @model_validator(mode="after") + def _validate_pinned_policy(self) -> RuntimePolicy: + if self.mode != "pinned": + return self + missing = [ + name + for name, value in ( + ("manifest_ref", self.manifest_ref), + ( + "representation_contract_hash", + self.representation_contract_hash, + ), + ("serving_build_digest", self.serving_build_digest), + ) + if value is None + ] + if missing: + raise ValueError( + f"runtime.policy.mode='pinned' requires {', '.join(missing)}" + ) + return self + + def to_runtime_policy(self) -> RuntimeArtifactPolicy | None: + if self.mode == "from_manifest": + return None + return RuntimeArtifactPolicy( + require_manifest=True, + serving_manifest_ref=self.manifest_ref, + expected_representation_contract_hash=(self.representation_contract_hash), + expected_serving_build_digest=self.serving_build_digest, + ) + + +def normalize_runtime_reload_request_payload( + *, + artifact_locator: ArtifactLocator | Mapping[str, Any], + policy: RuntimePolicy | Mapping[str, Any] | None = None, +) -> tuple[dict[str, Any], dict[str, Any]]: + """Normalize runtime reload locator/policy data to the stable wire shape.""" + + parsed_locator = ( + artifact_locator + if isinstance(artifact_locator, ArtifactLocator) + else ArtifactLocator.model_validate(artifact_locator) + ) + parsed_policy = ( + policy + if isinstance(policy, RuntimePolicy) + else RuntimePolicy.model_validate(policy or {"mode": "from_manifest"}) + ) + locator_payload = { + "kind": parsed_locator.kind, + "value": parsed_locator.value, + } + policy_payload: dict[str, Any] = {"mode": parsed_policy.mode} + if parsed_policy.manifest_ref is not None: + policy_payload["manifest_ref"] = parsed_policy.manifest_ref + if parsed_policy.representation_contract_hash is not None: + policy_payload["representation_contract_hash"] = ( + parsed_policy.representation_contract_hash + ) + if parsed_policy.serving_build_digest is not None: + policy_payload["serving_build_digest"] = parsed_policy.serving_build_digest + return locator_payload, policy_payload + + +def merge_runtime_reload_extra_config( + extra: Mapping[str, Any] | None, + *, + artifact_locator: ArtifactLocator | Mapping[str, Any], + policy: RuntimePolicy | Mapping[str, Any] | None = None, +) -> dict[str, Any]: + """Return model-loader config with a normalized runtime reload request.""" + + normalized_locator, normalized_policy = normalize_runtime_reload_request_payload( + artifact_locator=artifact_locator, + policy=policy, + ) + merged_extra = dict(extra or {}) + if "serving" in merged_extra: + raise ValueError( + "TensorCast runtime reload config section 'serving' was removed; " + "use 'runtime_artifact'" + ) + runtime_artifact = dict(merged_extra.get("runtime_artifact", {})) + runtime_artifact["artifact_locator"] = normalized_locator + runtime_artifact["policy"] = normalized_policy + merged_extra["runtime_artifact"] = runtime_artifact + return merged_extra + + +__all__ = [ + "RUNTIME_POLICY_SCHEMA_VERSION", + "RuntimePolicy", + "merge_runtime_reload_extra_config", + "normalize_runtime_reload_request_payload", +] diff --git a/tensorcast/artifact_runtime/publication/__init__.py b/tensorcast/artifact_runtime/publication/__init__.py new file mode 100644 index 00000000..0d654d7a --- /dev/null +++ b/tensorcast/artifact_runtime/publication/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) 2026, TensorCast Team. +"""Runtime publication and replica lifecycle helpers.""" diff --git a/tensorcast/artifact_runtime/publication/actions.py b/tensorcast/artifact_runtime/publication/actions.py new file mode 100644 index 00000000..fa5e460c --- /dev/null +++ b/tensorcast/artifact_runtime/publication/actions.py @@ -0,0 +1,110 @@ +# Copyright (c) 2026, TensorCast Team. +"""Artifact runtime replica publication actions. + +These helpers are the public artifact-runtime actions for publishing or +retiring the replica represented by a realized runtime attachment. The current +implementation delegates to the serving runtime binding implementation while +keeping callers away from ``ArtifactRuntimeSession``. +""" + +from __future__ import annotations + +from collections.abc import Callable, Mapping +from dataclasses import dataclass +from typing import Any + +from tensorcast.artifact_runtime.attachment import RuntimeAttachment +from tensorcast.artifact_runtime.config import TensorCastRuntimeConfig +from tensorcast.artifact_runtime.publication import replica as replica_publication + + +@dataclass(frozen=True) +class RuntimeReplicaPublicationSettings: + """Runtime replica publication settings parsed from loader configuration.""" + + policy: object + ensure_runtime_initialized: Callable[[], None] + + @property + def drain_timeout_s(self) -> float: + return float(getattr(self.policy, "drain_timeout_s", 30.0)) + + +def runtime_replica_publication_settings( + config: TensorCastRuntimeConfig | Mapping[str, Any] | None = None, +) -> RuntimeReplicaPublicationSettings: + """Parse publication settings from runtime loader configuration.""" + + parsed = ( + config + if isinstance(config, TensorCastRuntimeConfig) + else TensorCastRuntimeConfig.from_mapping(config or {}) + ) + return RuntimeReplicaPublicationSettings( + policy=parsed.replica_publication, + ensure_runtime_initialized=parsed.runtime.ensure_initialized, + ) + + +def publish_runtime_replica( + *, + current_attachment: RuntimeAttachment, + policy: object, + ensure_runtime_initialized: Callable[[], None], + profile_sink: Callable[[Mapping[str, object]], object] | None = None, +) -> RuntimeAttachment: + """Publish the current artifact-backed runtime attachment as a replica.""" + + return replica_publication.publish_current_replica( + current_attachment=current_attachment, + policy=policy, + ensure_runtime_initialized=ensure_runtime_initialized, + profile_sink=profile_sink, + ) + + +def project_runtime_replica_publication_state( + *, + current_attachment: RuntimeAttachment, + state: str, + reason: str | None = None, + operation_id: str | None = None, +) -> RuntimeAttachment: + """Return an attachment with an observational publication projection.""" + + return replica_publication.project_current_replica_publication_state( + current_attachment=current_attachment, + state=state, + reason=reason, + operation_id=operation_id, + ) + + +def retire_runtime_replica( + *, + current_attachment: RuntimeAttachment, + reason: str = "retire", + drain_timeout_s: float | None = None, + default_drain_timeout_s: float | None = None, + ensure_runtime_initialized: Callable[[], None], + profile_sink: Callable[[Mapping[str, object]], object] | None = None, +) -> RuntimeAttachment: + """Retire the published replica tied to a runtime attachment.""" + + return replica_publication.retire_current_replica( + current_attachment=current_attachment, + reason=reason, + drain_timeout_s=drain_timeout_s, + default_drain_timeout_s=default_drain_timeout_s, + ensure_runtime_initialized=ensure_runtime_initialized, + profile_sink=profile_sink, + ) + + +__all__ = [ + "RuntimeReplicaPublicationSettings", + "project_runtime_replica_publication_state", + "publish_runtime_replica", + "retire_runtime_replica", + "runtime_replica_publication_settings", +] diff --git a/tensorcast/artifact_runtime/publication/context.py b/tensorcast/artifact_runtime/publication/context.py new file mode 100644 index 00000000..bef0ff2e --- /dev/null +++ b/tensorcast/artifact_runtime/publication/context.py @@ -0,0 +1,113 @@ +# Copyright (c) 2026, TensorCast Team. + +"""Artifact runtime publication context helpers for recipe-backed artifacts.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +from tensorcast.artifact_runtime.contract import logical_topology_json +from tensorcast.types import BuilderMode, RuntimeArtifactBuildIntent, RuntimeTopologyRef + + +@dataclass(frozen=True) +class RecipePublicationContext: + source_artifact_ref: str + framework_name: str + adapter_version: str + serving_abi_version: str + logical_topology_json: str | None = None + + +def logical_topology_json_from_recipe( + recipe: Any, + *, + topology: RuntimeTopologyRef | None = None, + framework_payload: dict[str, Any] | None = None, +) -> str | None: + if topology is None: + if ( + getattr(recipe, "topology_ref", None) is None + and getattr(recipe, "member_ref", None) is None + ): + return None + raise ValueError( + "TensorCast publication manifest requires RuntimeTopologyRef for " + "a topology-sensitive recipe" + ) + return logical_topology_json( + topology, + framework_payload=framework_payload or {}, + ) + + +def publication_context_from_recipe( + recipe: Any, + *, + logical_topology_json_payload: str | None = None, +) -> RecipePublicationContext: + return RecipePublicationContext( + source_artifact_ref=recipe.source_artifact_ref, + framework_name=recipe.runtime_facts.framework_name, + adapter_version=recipe.runtime_facts.adapter_version, + serving_abi_version=recipe.runtime_facts.serving_abi_version, + logical_topology_json=logical_topology_json_payload, + ) + + +def build_recipe_runtime_build_intent( + context: RecipePublicationContext, + *, + builder_mode: BuilderMode, + build_pipeline_version: str, + representation_contract_hash: str | None = None, +) -> RuntimeArtifactBuildIntent: + return RuntimeArtifactBuildIntent( + representation_contract_hash=representation_contract_hash, + builder_mode=builder_mode, + framework_name=context.framework_name, + adapter_version=context.adapter_version, + serving_abi_version=context.serving_abi_version, + build_pipeline_version=str(build_pipeline_version), + source_artifact_ref=context.source_artifact_ref, + ) + + +def build_pure_transform_build_intent( + context: RecipePublicationContext, + *, + build_pipeline_version: str, + representation_contract_hash: str | None = None, +) -> RuntimeArtifactBuildIntent: + return build_recipe_runtime_build_intent( + context, + builder_mode=BuilderMode.PURE_TRANSFORM, + build_pipeline_version=build_pipeline_version, + representation_contract_hash=representation_contract_hash, + ) + + +def build_binding_finalize_build_intent( + context: RecipePublicationContext, + *, + build_pipeline_version: str, + representation_contract_hash: str, +) -> RuntimeArtifactBuildIntent: + return build_recipe_runtime_build_intent( + context, + builder_mode=BuilderMode.BINDING_FINALIZE, + build_pipeline_version=build_pipeline_version, + representation_contract_hash=str(representation_contract_hash), + ) + + +__all__ = [ + "RecipePublicationContext", + "build_binding_finalize_build_intent", + "build_pure_transform_build_intent", + "build_recipe_runtime_build_intent", + "logical_topology_json", + "logical_topology_json_from_recipe", + "publication_context_from_recipe", +] diff --git a/tensorcast/serving/replica_publication.py b/tensorcast/artifact_runtime/publication/replica.py similarity index 96% rename from tensorcast/serving/replica_publication.py rename to tensorcast/artifact_runtime/publication/replica.py index f4d2a1ef..7334d577 100644 --- a/tensorcast/serving/replica_publication.py +++ b/tensorcast/artifact_runtime/publication/replica.py @@ -25,12 +25,12 @@ release_contract_for, report_for_publication, ) -from tensorcast.serving.errors import ReplicaPublicationError -from tensorcast.serving.runtime_attachment import ( +from tensorcast.artifact_runtime.attachment import ( RuntimeAttachment, RuntimeBindingState, ) -from tensorcast.serving.runtime_view import ( +from tensorcast.artifact_runtime.errors import ReplicaPublicationError +from tensorcast.artifact_runtime.view import ( BindingValueRefProjection, PublishedReplicaProjection, ) @@ -306,7 +306,7 @@ def _attachment_with_published_replica( projection: PublishedReplicaProjection, ) -> RuntimeAttachment: binding = state_publication_binding(attachment.state) - spec = ArtifactRealizationSpec.publication(target=projection) + spec = ArtifactRealizationSpec._publication(target=projection) target_layout_digest = ( projection.binding_layout_id or attachment.view.endpoint.weight_version.binding_layout_id @@ -784,7 +784,7 @@ def reject_reload_with_active_publication( published = current_attachment.view.endpoint.weight_version.published_replica if published is not None and published.state in _ACTIVE_PUBLICATION_STATES: raise ReplicaPublicationError( - "TensorCast serving reload requires retiring the active published " + "TensorCast runtime reload requires retiring the active published " "replica before swap", operation="reload", details={ @@ -798,7 +798,7 @@ def reject_reload_with_active_publication( if not binding_has_active_published_replica(binding): return raise ReplicaPublicationError( - "TensorCast serving reload found an active published replica on the " + "TensorCast runtime reload found an active published replica on the " "runtime binding but no active attachment projection; retire the " "current replica before swap", operation="reload", @@ -810,12 +810,37 @@ def reject_reload_with_active_publication( ) +def _policy_validation_payload(policy: object) -> object: + if isinstance(policy, Mapping): + return dict(policy) + model_dump = getattr(policy, "model_dump", None) + if callable(model_dump): + return model_dump(mode="python") + return policy + + +def replica_publication_policy( + policy: object | None, + *, + default_policy: object, +) -> object: + if policy is None: + return default_policy + if isinstance(policy, type(default_policy)): + return policy + validate = getattr(type(default_policy), "model_validate", None) + if callable(validate): + return validate(_policy_validation_payload(policy)) + return policy + + __all__ = [ "binding_has_active_published_replica", "project_current_replica_publication_state", "publication_generation", "publish_current_replica", "reject_reload_with_active_publication", + "replica_publication_policy", "retire_current_replica", "state_publication_binding", ] diff --git a/tensorcast/serving/readiness.py b/tensorcast/artifact_runtime/readiness.py similarity index 71% rename from tensorcast/serving/readiness.py rename to tensorcast/artifact_runtime/readiness.py index 62c5c033..b2bb951e 100644 --- a/tensorcast/serving/readiness.py +++ b/tensorcast/artifact_runtime/readiness.py @@ -1,19 +1,19 @@ # Copyright (c) 2026, TensorCast Team. -"""Framework-neutral serving readiness and admission helpers.""" +"""Artifact runtime readiness and admission helpers.""" from __future__ import annotations from collections.abc import Callable from typing import Any -from tensorcast.types import BuilderMode, FinalizeClass, ServingSupportLevel +from tensorcast.types import BuilderMode, FinalizeClass, RuntimeSupportLevel -_SUPPORT_LEVEL_ORDER: dict[ServingSupportLevel, int] = { - ServingSupportLevel.BLOCKED: -1, - ServingSupportLevel.SOURCE_BIND_BOOTSTRAP_ONLY: 0, - ServingSupportLevel.BUILDER_PUBLICATION_READY: 1, - ServingSupportLevel.RUNTIME_BIND_SWAP_READY: 2, +_SUPPORT_LEVEL_ORDER: dict[RuntimeSupportLevel, int] = { + RuntimeSupportLevel.BLOCKED: -1, + RuntimeSupportLevel.SOURCE_BIND_BOOTSTRAP_ONLY: 0, + RuntimeSupportLevel.BUILDER_PUBLICATION_READY: 1, + RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY: 2, } @@ -29,32 +29,32 @@ def coerce_finalize_class( return FinalizeClass(str(value).strip()) -def coerce_serving_support_level( +def coerce_runtime_support_level( value: Any, *, - default: ServingSupportLevel = ServingSupportLevel.SOURCE_BIND_BOOTSTRAP_ONLY, -) -> ServingSupportLevel: + default: RuntimeSupportLevel = RuntimeSupportLevel.SOURCE_BIND_BOOTSTRAP_ONLY, +) -> RuntimeSupportLevel: if value is None: return default - if isinstance(value, ServingSupportLevel): + if isinstance(value, RuntimeSupportLevel): return value normalized = str(value).strip().lower() - return ServingSupportLevel(normalized) + return RuntimeSupportLevel(normalized) -def serving_support_level_at_least( - value: ServingSupportLevel | str, - minimum: ServingSupportLevel | str, +def runtime_support_level_at_least( + value: RuntimeSupportLevel | str, + minimum: RuntimeSupportLevel | str, ) -> bool: - resolved_value = coerce_serving_support_level(value) - resolved_minimum = coerce_serving_support_level(minimum) + resolved_value = coerce_runtime_support_level(value) + resolved_minimum = coerce_runtime_support_level(minimum) return ( _SUPPORT_LEVEL_ORDER[resolved_value] >= _SUPPORT_LEVEL_ORDER[resolved_minimum] ) -def serving_support_level_display_name(value: ServingSupportLevel | str) -> str: - return coerce_serving_support_level(value).value +def runtime_support_level_display_name(value: RuntimeSupportLevel | str) -> str: + return coerce_runtime_support_level(value).value def readiness_family(row: Any) -> str: @@ -75,8 +75,8 @@ def readiness_post_bind_finalize_class(row: Any) -> FinalizeClass: ) -def readiness_support_level(row: Any) -> ServingSupportLevel: - return coerce_serving_support_level(getattr(row, "support_level", None)) +def readiness_support_level(row: Any) -> RuntimeSupportLevel: + return coerce_runtime_support_level(getattr(row, "support_level", None)) def readiness_publication_modes(row: Any) -> tuple[str, ...]: @@ -94,9 +94,9 @@ def is_pure_transform_publication_allowlisted(row: Any) -> bool: pure_transform_candidate and readiness_process_after_load_class(row) == FinalizeClass.RUNTIME_ONLY and readiness_post_bind_finalize_class(row) == FinalizeClass.RUNTIME_ONLY - and serving_support_level_at_least( + and runtime_support_level_at_least( readiness_support_level(row), - ServingSupportLevel.BUILDER_PUBLICATION_READY, + RuntimeSupportLevel.BUILDER_PUBLICATION_READY, ) ) @@ -110,9 +110,9 @@ def is_binding_finalize_publication_allowlisted(row: Any) -> bool: ) return ( binding_finalize_candidate - and serving_support_level_at_least( + and runtime_support_level_at_least( readiness_support_level(row), - ServingSupportLevel.BUILDER_PUBLICATION_READY, + RuntimeSupportLevel.BUILDER_PUBLICATION_READY, ) and readiness_process_after_load_class(row) == FinalizeClass.REPRESENTATION_CHANGING @@ -121,22 +121,19 @@ def is_binding_finalize_publication_allowlisted(row: Any) -> bool: def is_runtime_bind_swap_allowlisted(row: Any) -> bool: - allowed = bool( - getattr(row, "runtime_bind_swap_allowed", False) - or getattr(row, "serving_only_runtime_allowed", False) - ) + allowed = bool(getattr(row, "runtime_bind_swap_allowed", False)) return ( allowed and readiness_post_bind_finalize_class(row) == FinalizeClass.RUNTIME_ONLY - and serving_support_level_at_least( + and runtime_support_level_at_least( readiness_support_level(row), - ServingSupportLevel.RUNTIME_BIND_SWAP_READY, + RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY, ) ) class ReadinessInventoryAdmissionPolicy: - """AdmissionPolicy implementation backed by a framework readiness resolver.""" + """Admission policy backed by a framework readiness resolver.""" def __init__( self, @@ -148,14 +145,14 @@ def __init__( self._endpoint_fields = endpoint_fields def admit(self, request: Any) -> Any: - from tensorcast.serving.hosts import AdmissionDecision + from tensorcast.artifact_runtime.host import RuntimeAdmissionDecision row = self._resolve_readiness(request.model_config) missing_semantic_proofs = ( request.placement_admission.missing_framework_semantic_proofs() ) allowed = is_runtime_bind_swap_allowlisted(row) and not missing_semantic_proofs - support_level = serving_support_level_display_name(readiness_support_level(row)) + support_level = runtime_support_level_display_name(readiness_support_level(row)) if missing_semantic_proofs: support_level = ( f"{support_level}:placement_missing_semantic_proof:" @@ -170,7 +167,7 @@ def admit(self, request: Any) -> Any: endpoint_fields: dict[str, object] = {"family": family} if self._endpoint_fields is not None: endpoint_fields.update(self._endpoint_fields(row)) - return AdmissionDecision( + return RuntimeAdmissionDecision( family=family, support_level=support_level, startup_allowed=allowed, @@ -183,7 +180,7 @@ def admit(self, request: Any) -> Any: __all__ = [ "ReadinessInventoryAdmissionPolicy", "coerce_finalize_class", - "coerce_serving_support_level", + "coerce_runtime_support_level", "is_binding_finalize_publication_allowlisted", "is_pure_transform_publication_allowlisted", "is_runtime_bind_swap_allowlisted", @@ -192,6 +189,6 @@ def admit(self, request: Any) -> Any: "readiness_process_after_load_class", "readiness_publication_modes", "readiness_support_level", - "serving_support_level_at_least", - "serving_support_level_display_name", + "runtime_support_level_at_least", + "runtime_support_level_display_name", ] diff --git a/tensorcast/artifact_runtime/recipe/__init__.py b/tensorcast/artifact_runtime/recipe/__init__.py new file mode 100644 index 00000000..330d7af8 --- /dev/null +++ b/tensorcast/artifact_runtime/recipe/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) 2026, TensorCast Team. +"""Runtime recipe build, compilation, trace, and materialization helpers.""" diff --git a/tensorcast/serving/recipe_build.py b/tensorcast/artifact_runtime/recipe/build.py similarity index 79% rename from tensorcast/serving/recipe_build.py rename to tensorcast/artifact_runtime/recipe/build.py index 7420bdd1..5a0a3e7d 100644 --- a/tensorcast/serving/recipe_build.py +++ b/tensorcast/artifact_runtime/recipe/build.py @@ -1,5 +1,5 @@ # Copyright (c) 2026, TensorCast Team. -"""Framework-neutral recipe build identity and cache helpers.""" +"""Framework-neutral runtime recipe build and cache helpers.""" from __future__ import annotations @@ -11,12 +11,12 @@ import threading import time from collections import OrderedDict -from collections.abc import Callable, Iterator, MutableMapping, Sequence +from collections.abc import Callable, Iterator, Mapping, MutableMapping, Sequence from dataclasses import dataclass from pathlib import Path -from typing import Any +from typing import Any, cast -from tensorcast.serving.binding_plan import ServingBindingPlan +from tensorcast.artifact_runtime.recipe.identity import RuntimeBindingPlan _LOGGER = logging.getLogger(__name__) @@ -40,7 +40,7 @@ def stable_recipe_build_hash(payload: dict[str, Any]) -> str: def compute_trace_cache_key( - identity: ServingBindingPlan, + identity: RuntimeBindingPlan, *, metadata_fingerprint: str, ) -> str: @@ -50,7 +50,7 @@ def compute_trace_cache_key( def compute_recipe_cache_key( - identity: ServingBindingPlan, + identity: RuntimeBindingPlan, *, metadata_fingerprint: str, ) -> str: @@ -84,6 +84,115 @@ class RecipeBuildCacheConfig: synchronous_recipe_cache_write: bool = False +def _optional_bool(fields: Mapping[str, object], name: str, default: bool) -> bool: + value = fields.get(name) + if value is None: + return default + return bool(value) + + +def _optional_path(value: object | None) -> Path | None: + if value is None: + return None + text = str(value).strip() + if not text: + return None + return Path(text).expanduser() + + +def _unique_paths(paths: Sequence[Path]) -> tuple[Path, ...]: + unique: list[Path] = [] + seen: set[str] = set() + for path in paths: + key = str(path) + if key in seen: + continue + seen.add(key) + unique.append(path) + return tuple(unique) + + +def _selected_file_parent_paths(source_catalog: object) -> tuple[str, ...]: + selected_files_value = getattr(source_catalog, "selected_files", None) + if selected_files_value is None: + return () + selected_files = cast(Sequence[object], selected_files_value) + parent_paths: list[str] = [] + for entry in selected_files: + path = getattr(entry, "path", None) + if path is None: + continue + parent_paths.append(str(Path(path).expanduser().resolve().parent)) + return tuple(parent_paths) + + +def _model_adjacent_cache_root(source_catalog: object) -> Path | None: + parent_paths = _selected_file_parent_paths(source_catalog) + if not parent_paths: + return None + return Path(os.path.commonpath(parent_paths)) / ".tensorcast" / "bootstrap_cache" + + +def _is_writable_or_creatable(path: Path) -> bool: + if path.exists(): + return os.access(path, os.W_OK) + parent = path.parent + while not parent.exists() and parent != parent.parent: + parent = parent.parent + return parent.exists() and os.access(parent, os.W_OK) + + +def recipe_build_cache_config_from_policy( + policy: object, + *, + source_catalog: object, +) -> RecipeBuildCacheConfig: + fields = dict(getattr(policy, "fields", None) or {}) + explicit_cache_root = _optional_bool(fields, "explicit_cache_root", False) + prefer_model_adjacent = _optional_bool(fields, "prefer_model_adjacent", True) + cache_root = _optional_path(fields.get("cache_root")) + + roots: list[Path] = [] + if prefer_model_adjacent: + model_adjacent = _model_adjacent_cache_root(source_catalog) + if model_adjacent is not None: + roots.append(model_adjacent) + if cache_root is not None and (explicit_cache_root or not roots): + roots.append(cache_root) + roots = list(_unique_paths(roots)) + + write_roots: list[Path] = [] + if prefer_model_adjacent: + model_adjacent = _model_adjacent_cache_root(source_catalog) + if model_adjacent is not None and _is_writable_or_creatable(model_adjacent): + write_roots.append(model_adjacent) + if cache_root is not None and (explicit_cache_root or not write_roots): + write_roots.append(cache_root) + write_roots = list(_unique_paths(write_roots)) + + debug_output_dir = _optional_path(fields.get("debug_output_dir")) + return RecipeBuildCacheConfig( + cache_dirs=tuple(str(root / "trace_plans") for root in roots), + trace_write_dirs=tuple(str(root / "trace_plans") for root in write_roots), + recipe_cache_dirs=tuple(str(root / "compiled_recipes") for root in roots), + recipe_cache_write_dirs=tuple( + str(root / "compiled_recipes") for root in write_roots + ), + debug_output_dir=debug_output_dir, + allow_cache=_optional_bool(fields, "allow_cache", True), + allow_recipe_cache=_optional_bool(fields, "allow_recipe_cache", True), + allow_trace=_optional_bool(fields, "allow_trace", True), + trace_tp_slices=_optional_bool(fields, "trace_tp_slices", True), + debug_dump_trace=_optional_bool(fields, "debug_dump_trace", False), + synchronous_cache_write=_optional_bool( + fields, "synchronous_cache_write", False + ), + synchronous_recipe_cache_write=_optional_bool( + fields, "synchronous_recipe_cache_write", False + ), + ) + + DEFAULT_RECIPE_BUILD_MEMORY_CACHE_ENTRIES = 128 @@ -176,6 +285,119 @@ class RecipeBuildRunResult: diagnostics: dict[str, Any] +@dataclass(frozen=True) +class RecipeBuildSessionRequest: + source_subject: Any | None = None + framework_config: Any | None = None + model_config: Any | None = None + placement: Any | None = None + cache_config: Any | None = None + identity: RuntimeBindingPlan | None = None + trace_cache_schema_version: int | None = None + tp_rank: int | None = None + tp_world_size: int | None = None + + +@dataclass(frozen=True) +class RecipeBuildResult: + session: "RecipeBuildSession" + recipe: Any | None = None + diagnostics: Mapping[str, Any] | None = None + + +def _adapter_text( + adapter: Any | None, + method_name: str, + *args: Any, +) -> str: + method = getattr(adapter, method_name, None) + if callable(method): + return str(method(*args)) + return "" + + +def recipe_build_identity( + request: RecipeBuildSessionRequest, + *, + adapter: Any | None, + placement: Any | None, +) -> RuntimeBindingPlan: + model_config = request.model_config + if model_config is None: + raise ValueError("RecipeBuildSessionRequest requires model_config") + runtime_placement = getattr(placement, "runtime_placement", placement) + member = getattr(runtime_placement, "member", None) + stable_identity_payload = getattr( + runtime_placement, "stable_identity_payload", None + ) + if callable(stable_identity_payload): + placement_payload = stable_identity_payload() + else: + placement_payload = getattr(placement, "identity_payload", None) + if placement_payload is None: + placement_payload = getattr(runtime_placement, "identity_payload", None) + trace_cache_schema_version = request.trace_cache_schema_version + if trace_cache_schema_version is None: + trace_cache_schema_version = getattr( + request.cache_config, + "trace_cache_schema_version", + 1, + ) + tp_rank = request.tp_rank + if tp_rank is None: + tp_rank = getattr(placement, "tp_rank", None) + if tp_rank is None and member is not None: + tp_rank = getattr(member, "member_index", None) + tp_world_size = request.tp_world_size + if tp_world_size is None: + tp_world_size = getattr(placement, "tp_world_size", None) + if tp_world_size is None and member is not None: + tp_world_size = getattr(member, "member_count", None) + compute_hash = getattr(model_config, "compute_hash", None) + framework_version = _adapter_text(adapter, "framework_version") + return RuntimeBindingPlan( + model_hash=str( + compute_hash() + if callable(compute_hash) + else getattr(model_config, "model", "unknown") + ), + model_id=str(getattr(model_config, "model", "unknown")), + model_revision=getattr(model_config, "revision", None), + dtype=str(getattr(model_config, "dtype", "unknown")), + runtime_version=framework_version, + framework_name=_adapter_text(adapter, "framework_name"), + framework_version=framework_version, + adapter_version=_adapter_text(adapter, "adapter_version"), + serving_abi_version=_adapter_text( + adapter, + "serving_abi_version", + model_config, + ), + trace_cache_schema_version=int(trace_cache_schema_version), + tp_rank=int(tp_rank or 0), + tp_world_size=int(tp_world_size or 1), + topology_ref=getattr(runtime_placement, "topology", None), + member_ref=member, + placement=placement_payload, + ) + + +def build_recipe_session( + request: RecipeBuildSessionRequest, + *, + adapter: Any | None = None, + placement: Any | None = None, +) -> "RecipeBuildSession": + identity = request.identity + if identity is None: + identity = recipe_build_identity( + request, + adapter=adapter, + placement=placement, + ) + return RecipeBuildSession(identity) + + def _metadata_fingerprint(source_catalog: Any) -> str: return str(getattr(source_catalog, "metadata_fingerprint", "")) @@ -187,7 +409,7 @@ def _cache_config_attr(cache_config: Any, name: str, default: Any) -> Any: class RecipeBuildSession: """Small core-owned shell for stable recipe build cache identity.""" - def __init__(self, identity: ServingBindingPlan) -> None: + def __init__(self, identity: RuntimeBindingPlan) -> None: self.identity = identity def trace_cache_key(self, *, metadata_fingerprint: str) -> str: @@ -226,26 +448,26 @@ def recipe_cache_path( tp_rank=self.identity.tp_rank, ) - def compile_identity(self, *, serving_facts: Any) -> Any: - return ServingBindingPlan( + def compile_identity(self, *, runtime_facts: Any) -> Any: + return RuntimeBindingPlan( model_id=self.identity.model_id, model_revision=self.identity.model_revision, dtype=self.identity.dtype, model_hash=self.identity.model_hash, runtime_version=self.identity.runtime_version, framework_name=getattr( - serving_facts, "framework_name", self.identity.framework_name + runtime_facts, "framework_name", self.identity.framework_name ), adapter_version=getattr( - serving_facts, "adapter_version", self.identity.adapter_version + runtime_facts, "adapter_version", self.identity.adapter_version ), serving_abi_version=getattr( - serving_facts, + runtime_facts, "serving_abi_version", self.identity.serving_abi_version, ), framework_version=getattr( - serving_facts, "framework_version", self.identity.framework_version + runtime_facts, "framework_version", self.identity.framework_version ), trace_cache_schema_version=self.identity.trace_cache_schema_version, tp_rank=self.identity.tp_rank, @@ -257,7 +479,7 @@ def compile_identity(self, *, serving_facts: Any) -> Any: def compile_recipe(self, *, inputs: Any) -> Any: return self.compile_recipe_from_inputs( - identity=self.compile_identity(serving_facts=inputs.serving_facts), + identity=self.compile_identity(runtime_facts=inputs.runtime_facts), inputs=inputs, ) @@ -270,7 +492,7 @@ def build_recipe( framework_adapter: Any, build_meta_model: Callable[[], Any], cache_config: Any, - is_reserved_serving_tensor_name: Callable[[str], bool], + is_reserved_runtime_tensor_name: Callable[[str], bool], semantic_validation_spec: object | None = None, trace_capture_fn: Callable[[Any, list[str], dict[str, Any]], Any] | None = None, trace_plan_memory_cache: MutableMapping[str, Any] | None = None, @@ -392,15 +614,15 @@ def build_recipe( "meta_model_class": type(meta_model).__name__, }, ) - serving_facts = self.collect_serving_facts( + runtime_facts = self.collect_runtime_facts( meta_model, model_config, framework_adapter, ) tensor_schema = self.collect_tensor_schema( meta_model, - runtime_only_tensor_names=serving_facts.runtime_only_tensor_names, - is_reserved_serving_tensor_name=is_reserved_serving_tensor_name, + runtime_only_tensor_names=runtime_facts.runtime_only_tensor_names, + is_reserved_runtime_tensor_name=is_reserved_runtime_tensor_name, ) resolved_semantic_validation_spec = self.resolve_semantic_validation_spec( meta_model, @@ -413,20 +635,20 @@ def build_recipe( "recipe.collect_model_metadata", { "support_level": getattr( - serving_facts.support_level, "value", serving_facts.support_level + runtime_facts.support_level, "value", runtime_facts.support_level ), "process_after_load_class": getattr( - serving_facts.process_after_load_class, + runtime_facts.process_after_load_class, "value", - serving_facts.process_after_load_class, + runtime_facts.process_after_load_class, ), "post_bind_finalize_class": getattr( - serving_facts.post_bind_finalize_class, + runtime_facts.post_bind_finalize_class, "value", - serving_facts.post_bind_finalize_class, + runtime_facts.post_bind_finalize_class, ), "runtime_only_tensor_count": len( - serving_facts.runtime_only_tensor_names + runtime_facts.runtime_only_tensor_names ), "tensor_schema_count": len(tensor_schema), }, @@ -556,7 +778,7 @@ def build_recipe( inputs=self._recipe_compile_inputs( source_catalog=source_catalog, trace_plan=trace_plan, - serving_facts=serving_facts, + runtime_facts=runtime_facts, tensor_schema=tensor_schema, semantic_validation_spec=resolved_semantic_validation_spec, ) @@ -672,16 +894,16 @@ def _recipe_compile_inputs( *, source_catalog: Any, trace_plan: Any, - serving_facts: Any, + runtime_facts: Any, tensor_schema: Any, semantic_validation_spec: Any, ) -> Any: - from tensorcast.serving.builder import compiler as tc_compiler + import tensorcast.artifact_runtime.recipe.compiler as tc_compiler return tc_compiler.RecipeCompileInputs( source_catalog=source_catalog, trace_plan=trace_plan, - serving_facts=serving_facts, + runtime_facts=runtime_facts, tensor_schema=tensor_schema, semantic_validation_spec=semantic_validation_spec, ) @@ -773,8 +995,8 @@ def rebind_cached_recipe_template( ) -> Any: from dataclasses import replace - from tensorcast.serving.builder import compiler as tc_compiler - from tensorcast.serving.source_catalog import ( + import tensorcast.artifact_runtime.recipe.compiler as tc_compiler + from tensorcast.artifact_runtime.source import ( resolve_source_artifact_ref, ) @@ -782,12 +1004,12 @@ def rebind_cached_recipe_template( source_catalog.source_artifact_ref ) source_metadata_fingerprint = str(source_catalog.metadata_fingerprint) - identity = self.compile_identity(serving_facts=cached_recipe.serving_facts) + identity = self.compile_identity(runtime_facts=cached_recipe.runtime_facts) realization_plan_proto = bytes(cached_recipe.realization_plan_proto or b"") binding_plan = identity.with_compiled_artifacts( source_artifact_ref=source_artifact_ref, source_metadata_fingerprint=source_metadata_fingerprint, - serving_facts=cached_recipe.serving_facts, + runtime_facts=cached_recipe.runtime_facts, trace_plan=cached_recipe.trace_plan, tensor_schema=tuple(cached_recipe.tensor_schema), source_hull=tuple(cached_recipe.source_hull), @@ -813,7 +1035,7 @@ def rebind_cached_recipe_template( identity=binding_plan, source_artifact_ref=source_artifact_ref, source_metadata_fingerprint=source_metadata_fingerprint, - serving_facts=cached_recipe.serving_facts, + runtime_facts=cached_recipe.runtime_facts, tensor_schema=cached_recipe.tensor_schema, semantic_validation_spec=cached_recipe.semantic_validation_spec, ) @@ -839,9 +1061,9 @@ def cached_recipe_matches_context( ): return False if placement is not None: - serving_placement = getattr(placement, "serving_placement", placement) - placement_topology = getattr(serving_placement, "topology", None) - placement_member = getattr(serving_placement, "member", None) + runtime_placement = getattr(placement, "runtime_placement", placement) + placement_topology = getattr(runtime_placement, "topology", None) + placement_member = getattr(runtime_placement, "member", None) recipe_topology = getattr(recipe, "topology_ref", None) recipe_member = getattr(recipe, "member_ref", None) if recipe_topology is not None and recipe_topology != placement_topology: @@ -1076,14 +1298,14 @@ def _worker() -> None: ).start() @staticmethod - def collect_serving_facts( + def collect_runtime_facts( model: Any, model_config: Any, framework_adapter: Any, ) -> Any: - from tensorcast.serving.builder import compiler as tc_compiler + import tensorcast.artifact_runtime.recipe.compiler as tc_compiler - return tc_compiler.TensorcastServingFacts( + return tc_compiler.TensorcastRuntimeFacts( framework_name=framework_adapter.framework_name(), framework_version=framework_adapter.framework_version(), adapter_version=framework_adapter.adapter_version(), @@ -1105,9 +1327,9 @@ def collect_tensor_schema( model: Any, *, runtime_only_tensor_names: tuple[str, ...], - is_reserved_serving_tensor_name: Any, + is_reserved_runtime_tensor_name: Any, ) -> tuple[Any, ...]: - from tensorcast.serving.builder import compiler as tc_compiler + import tensorcast.artifact_runtime.recipe.compiler as tc_compiler excluded = set(runtime_only_tensor_names) entries: list[Any] = [] @@ -1116,7 +1338,7 @@ def collect_tensor_schema( for name, param in model.named_parameters(remove_duplicate=True): if name in excluded: continue - if is_reserved_serving_tensor_name(name): + if is_reserved_runtime_tensor_name(name): raise RuntimeError( f"Model tensor name '{name}' collides with Tensorcast reserved names" ) @@ -1132,7 +1354,7 @@ def collect_tensor_schema( for name, buf in model.named_buffers(remove_duplicate=True): if name in excluded: continue - if is_reserved_serving_tensor_name(name): + if is_reserved_runtime_tensor_name(name): raise RuntimeError( f"Model tensor name '{name}' collides with Tensorcast reserved names" ) @@ -1157,7 +1379,7 @@ def resolve_semantic_validation_spec( framework_adapter: Any, explicit_spec: object | None, ) -> Any: - from tensorcast.serving.builder import compiler as tc_compiler + import tensorcast.artifact_runtime.recipe.compiler as tc_compiler if explicit_spec is not None: if isinstance(explicit_spec, tc_compiler.TensorcastSemanticValidationSpec): @@ -1185,7 +1407,7 @@ def trace_plan_summary_fields(trace_plan: Any) -> dict[str, int]: @staticmethod def recipe_summary_fields(recipe: Any) -> dict[str, int]: - from tensorcast.serving.builder import compiler as tc_compiler + import tensorcast.artifact_runtime.recipe.compiler as tc_compiler return { "tensor_schema_count": len(recipe.tensor_schema), @@ -1198,61 +1420,66 @@ def recipe_summary_fields(recipe: Any) -> dict[str, int]: @staticmethod def load_trace_plan_cache(cache_path: str | None) -> Any: - from tensorcast.serving.builder import trace_cache as tc_trace_cache + import tensorcast.artifact_runtime.recipe.trace_cache as tc_trace_cache return tc_trace_cache.load_trace_plan_cache(cache_path) @staticmethod def write_trace_plan_cache(cache_path: str, trace_plan: Any) -> None: - from tensorcast.serving.builder import trace_cache as tc_trace_cache + import tensorcast.artifact_runtime.recipe.trace_cache as tc_trace_cache tc_trace_cache.write_trace_plan_cache(cache_path, trace_plan) @staticmethod def dump_trace_plan_debug(*args: Any, **kwargs: Any) -> Any: - from tensorcast.serving.builder import trace_cache as tc_trace_cache + import tensorcast.artifact_runtime.recipe.trace_cache as tc_trace_cache return tc_trace_cache.dump_trace_plan_debug(*args, **kwargs) @staticmethod def load_compiled_recipe_cache(cache_path: str | None) -> Any: - from tensorcast.serving.builder import recipe_cache as tc_recipe_cache + import tensorcast.artifact_runtime.recipe.cache as tc_recipe_cache return tc_recipe_cache.load_compiled_recipe_cache(cache_path) @staticmethod def write_compiled_recipe_cache(cache_path: str, recipe: Any) -> None: - from tensorcast.serving.builder import recipe_cache as tc_recipe_cache + import tensorcast.artifact_runtime.recipe.cache as tc_recipe_cache tc_recipe_cache.write_compiled_recipe_cache(cache_path, recipe) @staticmethod def compute_recipe_compile_key(*args: Any, **kwargs: Any) -> str: - from tensorcast.serving.builder import compiler as tc_compiler + import tensorcast.artifact_runtime.recipe.compiler as tc_compiler return tc_compiler.compute_recipe_compile_key(*args, **kwargs) @staticmethod def compile_recipe_from_inputs(*args: Any, **kwargs: Any) -> Any: - from tensorcast.serving.builder import compiler as tc_compiler + import tensorcast.artifact_runtime.recipe.compiler as tc_compiler - return tc_compiler.compile_serving_recipe(*args, **kwargs) + return tc_compiler.compile_runtime_recipe(*args, **kwargs) __all__ = [ - "ServingBindingPlan", + "RuntimeBindingPlan", "RecipeBuildMemoryCache", "RecipeBuildCacheConfig", + "RecipeBuildResult", "RecipeBuildRunResult", + "RecipeBuildSessionRequest", "RecipeCacheLookupResult", "RecipeCacheWriteResult", "RecipeBuildSession", "COMPILED_RECIPE_MEMORY_CACHE", "DEFAULT_RECIPE_BUILD_MEMORY_CACHE_ENTRIES", "TRACE_PLAN_MEMORY_CACHE", + "build_recipe_session", "compute_recipe_cache_key", "compute_trace_cache_key", "recipe_cache_path", + "recipe_build_cache_config_from_policy", + "recipe_build_identity", "stable_recipe_build_hash", "trace_cache_path", ] diff --git a/tensorcast/artifact_runtime/recipe/builder.py b/tensorcast/artifact_runtime/recipe/builder.py new file mode 100644 index 00000000..05f7db47 --- /dev/null +++ b/tensorcast/artifact_runtime/recipe/builder.py @@ -0,0 +1,47 @@ +# Copyright (c) 2026, TensorCast Team. + +"""Artifact runtime builder primitives for offline publication workflows.""" + +from __future__ import annotations + +from tensorcast.artifact_runtime.locator import ranked_version_key_for_member +from tensorcast.artifact_runtime.recipe.compiler import ( + CompiledRuntimeRecipe, + TensorSchemaEntry, +) +from tensorcast.artifact_runtime.recipe.materialization import ( + BindingFinalizeMaterializationResult, + collect_runtime_tensors_from_model, + load_source_tensors_for_recipe, + materialize_binding_finalize_runtime_tensors, + materialize_pure_transform_runtime_tensors, + run_binding_finalize_semantic_validation, + tensorcast_view_slices_from_trace_plan, + validate_binding_finalize_tensor_schema, +) +from tensorcast.artifact_runtime.recipe.publication import ( + complete_pure_transform_recipe_publication, +) +from tensorcast.artifact_runtime.recipe.validation import ( + validate_recipe_for_builder_mode, +) + +LOCAL_READY_BOOTSTRAP_BUILD_PIPELINE_VERSION = "tensorcast-bootstrap-v1" + + +__all__ = [ + "BindingFinalizeMaterializationResult", + "CompiledRuntimeRecipe", + "LOCAL_READY_BOOTSTRAP_BUILD_PIPELINE_VERSION", + "TensorSchemaEntry", + "collect_runtime_tensors_from_model", + "complete_pure_transform_recipe_publication", + "load_source_tensors_for_recipe", + "materialize_binding_finalize_runtime_tensors", + "materialize_pure_transform_runtime_tensors", + "ranked_version_key_for_member", + "run_binding_finalize_semantic_validation", + "tensorcast_view_slices_from_trace_plan", + "validate_binding_finalize_tensor_schema", + "validate_recipe_for_builder_mode", +] diff --git a/tensorcast/serving/builder/recipe_cache.py b/tensorcast/artifact_runtime/recipe/cache.py similarity index 89% rename from tensorcast/serving/builder/recipe_cache.py rename to tensorcast/artifact_runtime/recipe/cache.py index e7c03487..7010b0db 100644 --- a/tensorcast/serving/builder/recipe_cache.py +++ b/tensorcast/artifact_runtime/recipe/cache.py @@ -1,5 +1,5 @@ # Copyright (c) 2026, TensorCast Team. -"""CompiledServingRecipe cache helpers.""" +"""CompiledRuntimeRecipe cache helpers.""" from __future__ import annotations @@ -10,16 +10,16 @@ from pathlib import Path from typing import Any -from tensorcast.serving.builder.compiler import ( - CompiledServingRecipe, +from tensorcast.artifact_runtime.recipe.compiler import ( + CompiledRuntimeRecipe, SourceHullEntry, + TensorcastRuntimeFacts, TensorcastSemanticValidationSpec, - TensorcastServingFacts, TensorSchemaEntry, binding_realization_plan_proto_bytes, compiled_recipe_realization_plan_count, ) -from tensorcast.serving.builder.trace_ir import ( +from tensorcast.artifact_runtime.recipe.trace_ir import ( TracePlan, copy_plan_from_dict, copy_plan_to_dict, @@ -28,15 +28,15 @@ ) from tensorcast.types import ( FinalizeClass, - ServingBindingMemberRef, - ServingSupportLevel, - ServingTopologyRef, + RuntimeBindingMemberRef, + RuntimeSupportLevel, + RuntimeTopologyRef, ) -RECIPE_CACHE_PAYLOAD_VERSION = 5 +RECIPE_CACHE_PAYLOAD_VERSION = 6 -def _serving_facts_to_dict(facts: TensorcastServingFacts) -> dict[str, Any]: +def _runtime_facts_to_dict(facts: TensorcastRuntimeFacts) -> dict[str, Any]: return { "framework_name": facts.framework_name, "framework_version": facts.framework_version, @@ -49,8 +49,8 @@ def _serving_facts_to_dict(facts: TensorcastServingFacts) -> dict[str, Any]: } -def _serving_facts_from_dict(data: Mapping[str, Any]) -> TensorcastServingFacts: - return TensorcastServingFacts( +def _runtime_facts_from_dict(data: Mapping[str, Any]) -> TensorcastRuntimeFacts: + return TensorcastRuntimeFacts( framework_name=str(data["framework_name"]), framework_version=( None @@ -59,7 +59,7 @@ def _serving_facts_from_dict(data: Mapping[str, Any]) -> TensorcastServingFacts: ), adapter_version=str(data["adapter_version"]), serving_abi_version=str(data["serving_abi_version"]), - support_level=ServingSupportLevel(str(data["support_level"])), + support_level=RuntimeSupportLevel(str(data["support_level"])), runtime_only_tensor_names=tuple( str(name) for name in data.get("runtime_only_tensor_names", ()) ), @@ -167,18 +167,18 @@ def _pydantic_model_to_dict(value: Any | None) -> dict[str, Any] | None: def _topology_ref_from_dict( data: Mapping[str, Any] | None, -) -> ServingTopologyRef | None: +) -> RuntimeTopologyRef | None: if data is None: return None - return ServingTopologyRef.model_validate(dict(data)) + return RuntimeTopologyRef.model_validate(dict(data)) def _member_ref_from_dict( data: Mapping[str, Any] | None, -) -> ServingBindingMemberRef | None: +) -> RuntimeBindingMemberRef | None: if data is None: return None - return ServingBindingMemberRef.model_validate(dict(data)) + return RuntimeBindingMemberRef.model_validate(dict(data)) def _semantic_validation_spec_to_dict( @@ -199,7 +199,7 @@ def _semantic_validation_spec_from_dict( ) -def compiled_recipe_to_dict(recipe: CompiledServingRecipe) -> dict[str, Any]: +def compiled_recipe_to_dict(recipe: CompiledRuntimeRecipe) -> dict[str, Any]: realization_plan_proto = bytes(recipe.realization_plan_proto or b"") if not realization_plan_proto and recipe.realization_plan: realization_plan_proto = binding_realization_plan_proto_bytes( @@ -210,7 +210,7 @@ def compiled_recipe_to_dict(recipe: CompiledServingRecipe) -> dict[str, Any]: "compile_key": recipe.compile_key, "source_artifact_ref": recipe.source_artifact_ref, "source_metadata_fingerprint": recipe.source_metadata_fingerprint, - "serving_facts": _serving_facts_to_dict(recipe.serving_facts), + "runtime_facts": _runtime_facts_to_dict(recipe.runtime_facts), "trace_plan_summary": _trace_plan_summary_to_dict(recipe.trace_plan), "tensor_schema": [ _tensor_schema_to_dict(entry) for entry in recipe.tensor_schema @@ -229,16 +229,16 @@ def compiled_recipe_to_dict(recipe: CompiledServingRecipe) -> dict[str, Any]: } -def compiled_recipe_from_dict(data: Mapping[str, Any]) -> CompiledServingRecipe: +def compiled_recipe_from_dict(data: Mapping[str, Any]) -> CompiledRuntimeRecipe: realization_plan_proto = _bytes_from_base64_payload( data["realization_plan_proto"], field="realization_plan_proto", ) - return CompiledServingRecipe( + return CompiledRuntimeRecipe( compile_key=str(data["compile_key"]), source_artifact_ref=str(data["source_artifact_ref"]), source_metadata_fingerprint=str(data["source_metadata_fingerprint"]), - serving_facts=_serving_facts_from_dict(data["serving_facts"]), + runtime_facts=_runtime_facts_from_dict(data["runtime_facts"]), trace_plan=_trace_plan_summary_from_dict(data["trace_plan_summary"]), tensor_schema=tuple( _tensor_schema_from_dict(entry) for entry in data["tensor_schema"] @@ -263,7 +263,7 @@ def compiled_recipe_from_dict(data: Mapping[str, Any]) -> CompiledServingRecipe: def load_compiled_recipe_cache( cache_path: str | os.PathLike[str] | None, -) -> CompiledServingRecipe | None: +) -> CompiledRuntimeRecipe | None: if not cache_path: return None path = Path(cache_path) @@ -284,7 +284,7 @@ def load_compiled_recipe_cache( def write_compiled_recipe_cache( cache_path: str | os.PathLike[str], - recipe: CompiledServingRecipe, + recipe: CompiledRuntimeRecipe, ) -> None: path = Path(cache_path) path.parent.mkdir(parents=True, exist_ok=True) diff --git a/tensorcast/serving/builder/compiler.py b/tensorcast/artifact_runtime/recipe/compiler.py similarity index 83% rename from tensorcast/serving/builder/compiler.py rename to tensorcast/artifact_runtime/recipe/compiler.py index 471d1b92..dd8727f3 100644 --- a/tensorcast/serving/builder/compiler.py +++ b/tensorcast/artifact_runtime/recipe/compiler.py @@ -1,5 +1,5 @@ # Copyright (c) 2026, TensorCast Team. -"""Framework-neutral serving recipe compiler primitives.""" +"""Framework-neutral runtime recipe compiler primitives.""" from __future__ import annotations @@ -14,31 +14,31 @@ from tensorcast.api.store.common import dtype_from_string as store_dtype_from_string from tensorcast.api.store.realization_plan import binding_realization_plan_to_proto from tensorcast.api.store.types import CanonicalIndex, CanonicalIndexEntry -from tensorcast.serving.binding_plan import ServingBindingPlan -from tensorcast.serving.builder.binding_plan import lower_trace_plan_for_realization -from tensorcast.serving.builder.trace_ir import CopyPlanEntry, Range, TracePlan -from tensorcast.serving.source_catalog import resolve_source_artifact_ref +from tensorcast.artifact_runtime.binding.plan import lower_trace_plan_for_realization +from tensorcast.artifact_runtime.recipe.identity import RuntimeBindingPlan +from tensorcast.artifact_runtime.recipe.trace_ir import CopyPlanEntry, Range, TracePlan +from tensorcast.artifact_runtime.source import resolve_source_artifact_ref from tensorcast.types import ( FinalizeClass, - ServingBindingMemberRef, - ServingSupportLevel, - ServingTopologyRef, + RuntimeBindingMemberRef, + RuntimeSupportLevel, + RuntimeTopologyRef, ) @dataclass(frozen=True) -class TensorcastServingFacts: +class TensorcastRuntimeFacts: framework_name: str adapter_version: str serving_abi_version: str - support_level: ServingSupportLevel + support_level: RuntimeSupportLevel runtime_only_tensor_names: tuple[str, ...] process_after_load_class: FinalizeClass post_bind_finalize_class: FinalizeClass framework_version: str | None = None -ServingFacts = TensorcastServingFacts +RuntimeFacts = TensorcastRuntimeFacts @dataclass(frozen=True) @@ -69,20 +69,20 @@ def empty(cls) -> TensorcastSemanticValidationSpec: @dataclass(frozen=True) -class CompiledServingRecipe: +class CompiledRuntimeRecipe: compile_key: str source_artifact_ref: str source_metadata_fingerprint: str - serving_facts: TensorcastServingFacts + runtime_facts: TensorcastRuntimeFacts trace_plan: TracePlan tensor_schema: tuple[TensorSchemaEntry, ...] source_hull: tuple[SourceHullEntry, ...] realization_plan: tuple[BindingRealizationEntry, ...] realization_fallback_plan: tuple[CopyPlanEntry, ...] - topology_ref: ServingTopologyRef | None - member_ref: ServingBindingMemberRef | None + topology_ref: RuntimeTopologyRef | None + member_ref: RuntimeBindingMemberRef | None semantic_validation_spec: TensorcastSemanticValidationSpec - binding_plan: ServingBindingPlan | None = None + binding_plan: RuntimeBindingPlan | None = None realization_plan_proto: bytes = b"" realization_plan_count: int = 0 @@ -91,26 +91,26 @@ class CompiledServingRecipe: class RecipeCompileInputs: source_catalog: Any trace_plan: TracePlan - serving_facts: TensorcastServingFacts + runtime_facts: TensorcastRuntimeFacts tensor_schema: tuple[TensorSchemaEntry, ...] semantic_validation_spec: TensorcastSemanticValidationSpec = field( default_factory=TensorcastSemanticValidationSpec.empty ) -class ServingBuildObserver(Protocol): +class RuntimeBuildObserver(Protocol): def event(self, name: str, payload: Mapping[str, object]) -> None: ... -def compile_serving_recipe( +def compile_runtime_recipe( *, - identity: ServingBindingPlan, + identity: RuntimeBindingPlan, inputs: RecipeCompileInputs, - observer: ServingBuildObserver | None = None, -) -> CompiledServingRecipe: - """Assemble a serving recipe from framework-collected pure inputs.""" + observer: RuntimeBuildObserver | None = None, +) -> CompiledRuntimeRecipe: + """Assemble a runtime recipe from framework-collected pure inputs.""" - _validate_compile_identity_matches_facts(identity, inputs.serving_facts) + _validate_compile_identity_matches_facts(identity, inputs.runtime_facts) source_artifact_ref = resolve_source_artifact_ref( inputs.source_catalog.source_artifact_ref ) @@ -144,7 +144,7 @@ def compile_serving_recipe( resolved_plan = identity.with_compiled_artifacts( source_artifact_ref=source_artifact_ref, source_metadata_fingerprint=source_metadata_fingerprint, - serving_facts=inputs.serving_facts, + runtime_facts=inputs.runtime_facts, trace_plan=inputs.trace_plan, tensor_schema=tuple(tensor_schema), source_hull=source_hull, @@ -161,15 +161,15 @@ def compile_serving_recipe( identity=resolved_plan, source_artifact_ref=source_artifact_ref, source_metadata_fingerprint=source_metadata_fingerprint, - serving_facts=inputs.serving_facts, + runtime_facts=inputs.runtime_facts, tensor_schema=tensor_schema, semantic_validation_spec=inputs.semantic_validation_spec, ) - recipe = CompiledServingRecipe( + recipe = CompiledRuntimeRecipe( compile_key=compile_key, source_artifact_ref=source_artifact_ref, source_metadata_fingerprint=source_metadata_fingerprint, - serving_facts=inputs.serving_facts, + runtime_facts=inputs.runtime_facts, trace_plan=inputs.trace_plan, tensor_schema=tensor_schema, source_hull=source_hull, @@ -255,7 +255,7 @@ def realization_plan_digest(realization_plan_proto: bytes) -> str: def compiled_recipe_realization_plan_count( - recipe: CompiledServingRecipe, + recipe: CompiledRuntimeRecipe, ) -> int: return int(recipe.realization_plan_count or len(recipe.realization_plan)) @@ -269,34 +269,34 @@ def filter_tensor_schema_for_trace_plan( missing = expected - set(schema_by_name) if missing: raise ValueError( - "TensorCast serving recipe tensor_schema is missing destination " + "TensorCast runtime recipe tensor_schema is missing destination " f"entries: {sorted(missing)}" ) return tuple(entry for entry in tensor_schema if entry.name in expected) def _validate_compile_identity_matches_facts( - identity: ServingBindingPlan, - serving_facts: TensorcastServingFacts, + identity: RuntimeBindingPlan, + runtime_facts: TensorcastRuntimeFacts, ) -> None: mismatches = [ field_name for field_name, identity_value, facts_value in ( - ("framework_name", identity.framework_name, serving_facts.framework_name), + ("framework_name", identity.framework_name, runtime_facts.framework_name), ( "framework_version", identity.framework_version, - serving_facts.framework_version, + runtime_facts.framework_version, ), ( "adapter_version", identity.adapter_version, - serving_facts.adapter_version, + runtime_facts.adapter_version, ), ( "serving_abi_version", identity.serving_abi_version, - serving_facts.serving_abi_version, + runtime_facts.serving_abi_version, ), ) if identity_value is not None @@ -305,24 +305,24 @@ def _validate_compile_identity_matches_facts( ] if mismatches: raise ValueError( - "ServingBindingPlan must match TensorcastServingFacts for " + "RuntimeBindingPlan must match TensorcastRuntimeFacts for " f"{', '.join(mismatches)}" ) def compute_recipe_compile_key( *, - identity: ServingBindingPlan, + identity: RuntimeBindingPlan, source_artifact_ref: str, source_metadata_fingerprint: str, - serving_facts: TensorcastServingFacts, + runtime_facts: TensorcastRuntimeFacts, tensor_schema: Sequence[TensorSchemaEntry], semantic_validation_spec: TensorcastSemanticValidationSpec, ) -> str: payload = identity.compile_payload( source_artifact_ref=source_artifact_ref, source_metadata_fingerprint=source_metadata_fingerprint, - serving_facts=serving_facts, + runtime_facts=runtime_facts, tensor_schema=tensor_schema, semantic_validation_spec=semantic_validation_spec, ) @@ -332,18 +332,18 @@ def compute_recipe_compile_key( __all__ = [ - "CompiledServingRecipe", + "CompiledRuntimeRecipe", "RecipeCompileInputs", "SemanticValidationSpec", - "ServingBuildObserver", - "ServingBindingPlan", - "ServingFacts", + "RuntimeBuildObserver", + "RuntimeBindingPlan", + "RuntimeFacts", "SourceHullEntry", "TensorSchemaEntry", "TensorcastSemanticValidationSpec", - "TensorcastServingFacts", + "TensorcastRuntimeFacts", "binding_realization_plan_proto_bytes", - "compile_serving_recipe", + "compile_runtime_recipe", "compiled_recipe_realization_plan_count", "compute_recipe_compile_key", "filter_tensor_schema_for_trace_plan", diff --git a/tensorcast/serving/binding_plan.py b/tensorcast/artifact_runtime/recipe/identity.py similarity index 90% rename from tensorcast/serving/binding_plan.py rename to tensorcast/artifact_runtime/recipe/identity.py index 20d7d321..c811d30d 100644 --- a/tensorcast/serving/binding_plan.py +++ b/tensorcast/artifact_runtime/recipe/identity.py @@ -1,5 +1,5 @@ # Copyright (c) 2026, TensorCast Team. -"""Serving binding plan identity shared by trace and recipe compilation.""" +"""Runtime recipe identity shared by trace and recipe compilation.""" from __future__ import annotations @@ -7,12 +7,12 @@ from dataclasses import asdict, dataclass, field, is_dataclass, replace from typing import Any -from tensorcast.types import ServingBindingMemberRef, ServingTopologyRef +from tensorcast.types import RuntimeBindingMemberRef, RuntimeTopologyRef @dataclass(frozen=True) -class ServingBindingPlan: - """Cache and correctness identity for serving source bootstrap.""" +class RuntimeBindingPlan: + """Cache and correctness identity for runtime source bootstrap.""" model_id: str model_revision: str | None @@ -26,8 +26,8 @@ class ServingBindingPlan: framework_version: str | None = None tp_rank: int = 0 tp_world_size: int = 1 - topology_ref: ServingTopologyRef | None = None - member_ref: ServingBindingMemberRef | None = None + topology_ref: RuntimeTopologyRef | None = None + member_ref: RuntimeBindingMemberRef | None = None placement: Any | None = None source_artifact_ref: str | None = None source_metadata_fingerprint: str | None = None @@ -42,7 +42,7 @@ class ServingBindingPlan: target_layout_hash: str | None = None tensor_schema_hash: str | None = None resolved_spec_digest: str | None = None - serving_facts: Any | None = None + runtime_facts: Any | None = None trace_plan: Any | None = None tensor_schema: tuple[Any, ...] = () source_hull: tuple[Any, ...] = () @@ -77,7 +77,7 @@ def base_payload(self) -> dict[str, Any]: def with_resolved_spec_cache_entry( self, resolved_spec_cache_entry: Any, - ) -> "ServingBindingPlan": + ) -> "RuntimeBindingPlan": return replace( self, resolved_spec_cache_entry=resolved_spec_cache_entry, @@ -152,7 +152,7 @@ def compile_payload( *, source_artifact_ref: str, source_metadata_fingerprint: str, - serving_facts: Any, + runtime_facts: Any, tensor_schema: Any, semantic_validation_spec: Any, ) -> dict[str, Any]: @@ -163,20 +163,20 @@ def compile_payload( payload.update( { "runtime_version": self.runtime_version, - "framework_name": serving_facts.framework_name, - "framework_version": serving_facts.framework_version, - "adapter_version": serving_facts.adapter_version, - "serving_abi_version": serving_facts.serving_abi_version, + "framework_name": runtime_facts.framework_name, + "framework_version": runtime_facts.framework_version, + "adapter_version": runtime_facts.adapter_version, + "serving_abi_version": runtime_facts.serving_abi_version, "identity_framework_name": self.framework_name, "identity_framework_version": self.framework_version, "identity_adapter_version": self.adapter_version, "identity_serving_abi_version": self.serving_abi_version, - "support_level": str(serving_facts.support_level), + "support_level": str(runtime_facts.support_level), "runtime_only_tensor_names": list( - serving_facts.runtime_only_tensor_names + runtime_facts.runtime_only_tensor_names ), - "process_after_load_class": str(serving_facts.process_after_load_class), - "post_bind_finalize_class": str(serving_facts.post_bind_finalize_class), + "process_after_load_class": str(runtime_facts.process_after_load_class), + "post_bind_finalize_class": str(runtime_facts.post_bind_finalize_class), "tensor_schema": [ { "name": item.name, @@ -200,7 +200,7 @@ def with_compiled_artifacts( *, source_artifact_ref: str, source_metadata_fingerprint: str, - serving_facts: Any, + runtime_facts: Any, trace_plan: Any, tensor_schema: tuple[Any, ...], source_hull: tuple[Any, ...], @@ -213,13 +213,13 @@ def with_compiled_artifacts( tensor_schema_hash: str | None = None, realization_plan_digest: str | None = None, resolved_spec_cache_entry: Any | None = None, - ) -> "ServingBindingPlan": + ) -> "RuntimeBindingPlan": return replace( self, source_artifact_ref=str(source_artifact_ref), source_metadata_fingerprint=str(source_metadata_fingerprint), source_schema_hash=_optional_str(source_schema_hash), - serving_facts=serving_facts, + runtime_facts=runtime_facts, trace_plan=trace_plan, tensor_schema=tuple(tensor_schema), tensor_schema_hash=_optional_str(tensor_schema_hash), @@ -248,7 +248,7 @@ def compiled_artifact_payload(self) -> dict[str, Any]: "target_layout_hash": self.target_layout_hash, "tensor_schema_hash": self.tensor_schema_hash, "resolved_spec_digest": self.resolved_spec_digest, - "serving_facts": _jsonable(self.serving_facts), + "runtime_facts": _jsonable(self.runtime_facts), "trace_plan": _jsonable(self.trace_plan), "tensor_schema": _jsonable(self.tensor_schema), "source_hull": _jsonable(self.source_hull), @@ -288,7 +288,7 @@ def _optional_str(value: Any | None) -> str | None: return text or None -def _optional_identity_payload(plan: ServingBindingPlan) -> dict[str, Any]: +def _optional_identity_payload(plan: RuntimeBindingPlan) -> dict[str, Any]: payload: dict[str, Any] = {} for field_name in ( "source_schema_hash", @@ -313,4 +313,4 @@ def _optional_identity_payload(plan: ServingBindingPlan) -> dict[str, Any]: return payload -__all__ = ["ServingBindingPlan"] +__all__ = ["RuntimeBindingPlan"] diff --git a/tensorcast/serving/local_ready.py b/tensorcast/artifact_runtime/recipe/local_ready.py similarity index 78% rename from tensorcast/serving/local_ready.py rename to tensorcast/artifact_runtime/recipe/local_ready.py index cc33eaee..14c5ec2d 100644 --- a/tensorcast/serving/local_ready.py +++ b/tensorcast/artifact_runtime/recipe/local_ready.py @@ -1,6 +1,6 @@ # Copyright (c) 2026, TensorCast Team. -"""Core local-ready serving helpers used by framework integrations.""" +"""Core local-ready runtime binding helpers used by framework integrations.""" from __future__ import annotations @@ -12,25 +12,30 @@ import torch import tensorcast as tc +import tensorcast.artifact_runtime.contract as tc_contract +import tensorcast.artifact_runtime.recipe.materialization as tc_materialization +import tensorcast.artifact_runtime.recipe.publication as tc_publication from tensorcast.api.store import create_binding as create_tensorcast_binding from tensorcast.api.store.owned_binding_layout import ( build_mapped_tensor_spec, build_owned_layout, ) -from tensorcast.api.store.serving_builder import prepare_serving_manifest_carrier +from tensorcast.api.store.publication_builder import ( + prepare_runtime_artifact_manifest_carrier, +) from tensorcast.api.store.types import CanonicalIndexEntry -from tensorcast.proto.daemon.v2 import store_daemon_pb2 -from tensorcast.serving.builder import materialization as tc_materialization -from tensorcast.serving.builder import publication as tc_publication -from tensorcast.serving.builder.compiler import ( - CompiledServingRecipe, +from tensorcast.artifact_runtime.publication.context import ( + logical_topology_json_from_recipe, + publication_context_from_recipe, +) +from tensorcast.artifact_runtime.recipe.compiler import ( + CompiledRuntimeRecipe, TensorSchemaEntry, ) -from tensorcast.serving.builder.compiler import ( +from tensorcast.artifact_runtime.recipe.compiler import ( compiled_recipe_realization_plan_count as _compiled_recipe_realization_plan_count, ) -from tensorcast.serving.contract import logical_topology_json -from tensorcast.types import ServingTopologyRef +from tensorcast.proto.daemon.v2 import store_daemon_pb2 LOCAL_READY_BOOTSTRAP_BUILD_PIPELINE_VERSION = "tensorcast-bootstrap-v1" _LOGGER = logging.getLogger(__name__) @@ -46,7 +51,7 @@ class LocalReadyBindingRealizationResult: def materialized_tensor_schema( - recipe: CompiledServingRecipe, + recipe: CompiledRuntimeRecipe, ) -> tuple[TensorSchemaEntry, ...]: expected_names = set(recipe.trace_plan.expected_dst_names) return tuple( @@ -54,8 +59,8 @@ def materialized_tensor_schema( ) -def serving_binding_tensor_schema( - recipe: CompiledServingRecipe, +def runtime_binding_tensor_schema( + recipe: CompiledRuntimeRecipe, *, manifest_tensor_name: str, manifest_bytes: bytes | None = None, @@ -99,7 +104,7 @@ def canonical_index_entries_from_tensor_schema( return tuple(entries) -def canonical_index_from_recipe(recipe: CompiledServingRecipe) -> tc.CanonicalIndex: +def canonical_index_from_recipe(recipe: CompiledRuntimeRecipe) -> tc.CanonicalIndex: entries = canonical_index_entries_from_tensor_schema( materialized_tensor_schema(recipe) ) @@ -111,44 +116,8 @@ def canonical_index_from_recipe(recipe: CompiledServingRecipe) -> tc.CanonicalIn ) -def logical_topology_json_from_recipe( - recipe: CompiledServingRecipe, - *, - topology: ServingTopologyRef | None = None, - framework_payload: dict[str, Any] | None = None, -) -> str | None: - if topology is None: - if ( - getattr(recipe, "topology_ref", None) is None - and getattr(recipe, "member_ref", None) is None - ): - return None - raise ValueError( - "TensorCast local-ready manifest requires ServingTopologyRef for " - "a topology-sensitive recipe" - ) - return logical_topology_json( - topology, - framework_payload=framework_payload or {}, - ) - - -def publication_context_from_recipe( - recipe: CompiledServingRecipe, - *, - logical_topology_json_payload: str | None = None, -) -> tc_publication.RecipePublicationContext: - return tc_publication.RecipePublicationContext( - source_artifact_ref=recipe.source_artifact_ref, - framework_name=recipe.serving_facts.framework_name, - adapter_version=recipe.serving_facts.adapter_version, - serving_abi_version=recipe.serving_facts.serving_abi_version, - logical_topology_json=logical_topology_json_payload, - ) - - def prepare_same_binding_manifest_carrier( - recipe: CompiledServingRecipe, + recipe: CompiledRuntimeRecipe, *, manifest_tensor_name: str, representation_contract_hash: str, @@ -162,7 +131,7 @@ def prepare_same_binding_manifest_carrier( logical_topology_json_payload=logical_topology_json_payload, ) if ( - recipe.serving_facts.process_after_load_class + recipe.runtime_facts.process_after_load_class == tc.FinalizeClass.REPRESENTATION_CHANGING ): build_intent = tc_publication.build_binding_finalize_build_intent( @@ -176,7 +145,7 @@ def prepare_same_binding_manifest_carrier( build_pipeline_version=build_pipeline_version, representation_contract_hash=representation_contract_hash, ) - carrier = prepare_serving_manifest_carrier( + carrier = prepare_runtime_artifact_manifest_carrier( build_intent=build_intent, canonical_index=base_canonical_index, representation_contract_hash=representation_contract_hash, @@ -187,14 +156,14 @@ def prepare_same_binding_manifest_carrier( return representation_contract_hash, carrier.serving_manifest_bytes -def compute_serving_binding_tensor_schema_hash( - recipe: CompiledServingRecipe, +def compute_runtime_binding_tensor_schema_hash( + recipe: CompiledRuntimeRecipe, *, manifest_tensor_name: str, manifest_bytes: bytes | None = None, ) -> str: entries = canonical_index_entries_from_tensor_schema( - serving_binding_tensor_schema( + runtime_binding_tensor_schema( recipe, manifest_tensor_name=manifest_tensor_name, manifest_bytes=manifest_bytes, @@ -205,7 +174,7 @@ def compute_serving_binding_tensor_schema_hash( total_size_bytes=sum(int(entry.size_bytes) for entry in entries), avbs_hash="", ) - return tc.compute_serving_tensor_schema_hash( + return tc_contract.compute_canonical_runtime_tensor_schema_hash( canonical_index, manifest_tensor_name=manifest_tensor_name, ) @@ -227,7 +196,7 @@ def realization_plan_proto_with_manifest( def build_binding_layout_for_recipe( - recipe: CompiledServingRecipe, + recipe: CompiledRuntimeRecipe, *, target_device: torch.device, manifest_tensor_name: str, @@ -238,7 +207,7 @@ def build_binding_layout_for_recipe( raise RuntimeError( "Tensorcast bootstrap requires an explicit CUDA device index" ) - tensor_schema = serving_binding_tensor_schema( + tensor_schema = runtime_binding_tensor_schema( recipe, manifest_tensor_name=manifest_tensor_name, manifest_bytes=manifest_bytes, @@ -295,7 +264,7 @@ def _close_binding_after_failure(binding: Any, *, phase: str) -> None: def realize_local_ready_binding_from_source( *, - recipe: CompiledServingRecipe, + recipe: CompiledRuntimeRecipe, source_subject: Any, target_device: torch.device, manifest_tensor_name: str, @@ -338,27 +307,6 @@ def realize_local_ready_binding_from_source( ) -def prepare_local_ready_serving( - *, - recipe: CompiledServingRecipe, - source_subject: Any, - target_device: torch.device, - manifest_tensor_name: str, - manifest_bytes: bytes | None, - options: Any | None, - binding_factory: Callable[..., Any] | None = None, -) -> LocalReadyBindingRealizationResult: - return realize_local_ready_binding_from_source( - recipe=recipe, - source_subject=source_subject, - target_device=target_device, - manifest_tensor_name=manifest_tensor_name, - manifest_bytes=manifest_bytes, - options=options, - binding_factory=binding_factory, - ) - - def freeze_local_ready_binding( *, binding: Any, @@ -375,7 +323,7 @@ def freeze_local_ready_binding( raise -def source_view_for_recipe(recipe: CompiledServingRecipe, source_subject: Any) -> Any: +def source_view_for_recipe(recipe: CompiledRuntimeRecipe, source_subject: Any) -> Any: source_view = source_subject if not isinstance(source_subject, tc.PublicDiskSourceHandle): subset_fn = getattr(source_subject, "subset", None) @@ -401,13 +349,13 @@ def source_view_for_recipe(recipe: CompiledServingRecipe, source_subject: Any) - return source_view -def tensorcast_view_slice_count(recipe: CompiledServingRecipe) -> int: +def tensorcast_view_slice_count(recipe: CompiledRuntimeRecipe) -> int: return len( tc_materialization.tensorcast_view_slices_from_trace_plan(recipe.trace_plan) ) -def compiled_recipe_realization_plan_count(recipe: CompiledServingRecipe) -> int: +def compiled_recipe_realization_plan_count(recipe: CompiledRuntimeRecipe) -> int: return _compiled_recipe_realization_plan_count(recipe) @@ -434,7 +382,7 @@ def binding_value_verification_state_name(value: Any) -> str: "binding_value_verification_state_name", "canonical_index_entries_from_tensor_schema", "canonical_index_from_recipe", - "compute_serving_binding_tensor_schema_hash", + "compute_runtime_binding_tensor_schema_hash", "create_local_ready_binding", "compiled_recipe_realization_plan_count", "freeze_local_ready_binding", @@ -442,12 +390,11 @@ def binding_value_verification_state_name(value: Any) -> str: "LOCAL_READY_BOOTSTRAP_BUILD_PIPELINE_VERSION", "LocalReadyBindingRealizationResult", "materialized_tensor_schema", - "prepare_local_ready_serving", "prepare_same_binding_manifest_carrier", "publication_context_from_recipe", "realization_plan_proto_with_manifest", "realize_local_ready_binding_from_source", - "serving_binding_tensor_schema", + "runtime_binding_tensor_schema", "source_view_for_recipe", "tensorcast_view_slice_count", ] diff --git a/tensorcast/serving/builder/materialization.py b/tensorcast/artifact_runtime/recipe/materialization.py similarity index 90% rename from tensorcast/serving/builder/materialization.py rename to tensorcast/artifact_runtime/recipe/materialization.py index 435fcf4b..bbce2882 100644 --- a/tensorcast/serving/builder/materialization.py +++ b/tensorcast/artifact_runtime/recipe/materialization.py @@ -1,6 +1,6 @@ # Copyright (c) 2026, TensorCast Team. -"""Framework-neutral tensor-dict materialization helpers.""" +"""Framework-neutral runtime tensor-dict materialization helpers.""" from __future__ import annotations @@ -11,33 +11,33 @@ import torch from torch import nn -from tensorcast.pytorch.module_binding import ( - attach_tensors_to_module, - collect_module_tensors, -) -from tensorcast.serving.builder.recipe_validation import ( - validate_recipe_for_builder_mode, -) -from tensorcast.serving.builder.semantic_validation import ( +from tensorcast.artifact_runtime.recipe.semantic_validation import ( evaluate_semantic_validation_spec, ) -from tensorcast.serving.builder.tensor_schema import ( +from tensorcast.artifact_runtime.recipe.tensor_schema import ( validate_tensor_schema_against_tensors, ) -from tensorcast.serving.builder.trace_ir import ( +from tensorcast.artifact_runtime.recipe.trace_ir import ( CopyPlanEntry, MultiRange, Range, RangeSpec, TracePlan, ) +from tensorcast.artifact_runtime.recipe.validation import ( + validate_recipe_for_builder_mode, +) +from tensorcast.pytorch.module_binding import ( + attach_tensors_to_module, + collect_module_tensors, +) from tensorcast.types import BuilderMode @dataclass(frozen=True) class BindingFinalizeMaterializationResult: model: nn.Module - serving_tensors: dict[str, torch.Tensor] + runtime_tensors: dict[str, torch.Tensor] semantic_probe_result: Any = None @@ -132,12 +132,12 @@ def narrow_source_view( def apply_copy_plan( trace_plan: TracePlan, source_tensors: Mapping[str, torch.Tensor], - serving_tensors: Mapping[str, torch.Tensor], + runtime_tensors: Mapping[str, torch.Tensor], *, entries: Iterable[CopyPlanEntry] | None = None, ) -> None: for entry in trace_plan.copy_plan if entries is None else entries: - dst_base = serving_tensors.get(entry.dst_name) + dst_base = runtime_tensors.get(entry.dst_name) if dst_base is None: raise RuntimeError(f"Missing destination tensor {entry.dst_name}") dst_view = ( @@ -239,10 +239,10 @@ def update_dst_coverage( def validate_dst_coverage( trace_plan: TracePlan, - serving_tensors: Mapping[str, torch.Tensor], + runtime_tensors: Mapping[str, torch.Tensor], ) -> None: expected = set(trace_plan.expected_dst_names) - provided = set(serving_tensors) + provided = set(runtime_tensors) missing = expected - provided unexpected = provided - expected if missing or unexpected: @@ -253,7 +253,7 @@ def validate_dst_coverage( coverage: dict[str, dict[str, Any]] = {} for entry in trace_plan.copy_plan: - dst_base = serving_tensors.get(entry.dst_name) + dst_base = runtime_tensors.get(entry.dst_name) if dst_base is None: continue update_dst_coverage(coverage, entry, dst_base) @@ -297,7 +297,7 @@ def load_source_tensors_for_recipe( } -def materialize_pure_transform_serving_tensors( +def materialize_pure_transform_runtime_tensors( recipe: Any, source_tensors: Mapping[str, torch.Tensor], *, @@ -321,7 +321,7 @@ def materialize_recipe_copy_plan_tensors( str(name): tensor for name, tensor in dict(source_tensors).items() } validate_source_tensor_names(recipe.trace_plan, resolved_source_tensors) - serving_tensors = allocate_tensors_from_schema( + runtime_tensors = allocate_tensors_from_schema( recipe.tensor_schema, target_device=torch.device(target_device), ) @@ -329,13 +329,13 @@ def materialize_recipe_copy_plan_tensors( apply_copy_plan( recipe.trace_plan, resolved_source_tensors, - serving_tensors, + runtime_tensors, ) - validate_dst_coverage(recipe.trace_plan, serving_tensors) - return serving_tensors + validate_dst_coverage(recipe.trace_plan, runtime_tensors) + return runtime_tensors -def materialize_binding_finalize_serving_tensors( +def materialize_binding_finalize_runtime_tensors( recipe: Any, source_tensors: Mapping[str, torch.Tensor], *, @@ -346,7 +346,7 @@ def materialize_binding_finalize_serving_tensors( ) -> BindingFinalizeMaterializationResult: validate_recipe_for_builder_mode(recipe, BuilderMode.BINDING_FINALIZE) resolved_target_device = torch.device(target_device) - serving_tensors = materialize_recipe_copy_plan_tensors( + runtime_tensors = materialize_recipe_copy_plan_tensors( recipe, source_tensors, target_device=resolved_target_device, @@ -354,7 +354,7 @@ def materialize_binding_finalize_serving_tensors( model = build_runtime_model(resolved_target_device) attach_tensors_to_module( model, - serving_tensors, + runtime_tensors, replace_meta_params=True, skip_reserved_tensor_names=True, preserve_aliases=True, @@ -372,14 +372,14 @@ def materialize_binding_finalize_serving_tensors( model_config=model_config, framework_adapter=framework_adapter, ) - finalized_tensors = collect_serving_tensors_from_model( + finalized_tensors = collect_runtime_tensors_from_model( model, - runtime_only_tensor_names=recipe.serving_facts.runtime_only_tensor_names, + runtime_only_tensor_names=recipe.runtime_facts.runtime_only_tensor_names, ) validate_tensor_schema_against_tensors(recipe.tensor_schema, finalized_tensors) return BindingFinalizeMaterializationResult( model=model, - serving_tensors=finalized_tensors, + runtime_tensors=finalized_tensors, semantic_probe_result=semantic_probe_result, ) @@ -400,7 +400,7 @@ def run_binding_finalize_semantic_validation( ) -def collect_serving_tensors_from_model( +def collect_runtime_tensors_from_model( model: nn.Module, *, runtime_only_tensor_names: Sequence[str], @@ -424,12 +424,12 @@ def validate_binding_finalize_tensor_schema( "BindingFinalizeMaterializationResult", "allocate_tensors_from_schema", "apply_copy_plan", - "collect_serving_tensors_from_model", + "collect_runtime_tensors_from_model", "dtype_from_string", "iter_ranges", "load_source_tensors_for_recipe", - "materialize_binding_finalize_serving_tensors", - "materialize_pure_transform_serving_tensors", + "materialize_binding_finalize_runtime_tensors", + "materialize_pure_transform_runtime_tensors", "materialize_recipe_copy_plan_tensors", "narrow_by_range_spec", "narrow_source_view", diff --git a/tensorcast/serving/builder/publication.py b/tensorcast/artifact_runtime/recipe/publication.py similarity index 77% rename from tensorcast/serving/builder/publication.py rename to tensorcast/artifact_runtime/recipe/publication.py index 125d1f06..da67f096 100644 --- a/tensorcast/serving/builder/publication.py +++ b/tensorcast/artifact_runtime/recipe/publication.py @@ -1,100 +1,48 @@ # Copyright (c) 2026, TensorCast Team. -"""Recipe-oriented serving publication helpers.""" +"""Recipe-oriented runtime publication helpers.""" from __future__ import annotations from collections.abc import Mapping -from dataclasses import dataclass from typing import Any import torch import tensorcast as tc from tensorcast.api.store.handles import RegisteredArtifact -from tensorcast.api.store.serving_builder import ( +from tensorcast.api.store.publication_builder import ( build_binding_finalize_admission_facts, build_binding_finalize_publication_bundle, build_pure_transform_publication_bundle_from_registered_artifact, build_pure_transform_publication_spec, - prepare_binding_finalize_serving_registration, - prepare_pure_transform_serving_registration, + prepare_binding_finalize_runtime_registration, + prepare_pure_transform_runtime_registration, ) from tensorcast.api.store.types import CanonicalIndex -from tensorcast.serving.builder.materialization import ( +from tensorcast.artifact_runtime.publication.context import ( + RecipePublicationContext, + build_binding_finalize_build_intent, + build_pure_transform_build_intent, + build_recipe_runtime_build_intent, +) +from tensorcast.artifact_runtime.recipe.materialization import ( load_source_tensors_for_recipe, - materialize_pure_transform_serving_tensors, + materialize_pure_transform_runtime_tensors, ) from tensorcast.types import ( AssemblyReadinessPolicy, AssemblyRequirementSetRef, BindingValueRef, - BuilderMode, PureTransformPublicationSpec, RepresentationPublishSpec, - ServingAdmissionFacts, - ServingBuildIntent, - ServingPublicationSubject, - ServingSupportLevel, + RuntimeAdmissionFacts, + RuntimePublicationSubject, + RuntimeSupportLevel, ) -@dataclass(frozen=True) -class RecipePublicationContext: - source_artifact_ref: str - framework_name: str - adapter_version: str - serving_abi_version: str - logical_topology_json: str | None = None - - -def build_recipe_serving_build_intent( - context: RecipePublicationContext, - *, - builder_mode: BuilderMode, - build_pipeline_version: str, - representation_contract_hash: str | None = None, -) -> ServingBuildIntent: - return ServingBuildIntent( - representation_contract_hash=representation_contract_hash, - builder_mode=builder_mode, - framework_name=context.framework_name, - adapter_version=context.adapter_version, - serving_abi_version=context.serving_abi_version, - build_pipeline_version=str(build_pipeline_version), - source_artifact_ref=context.source_artifact_ref, - ) - - -def build_pure_transform_build_intent( - context: RecipePublicationContext, - *, - build_pipeline_version: str, - representation_contract_hash: str | None = None, -) -> ServingBuildIntent: - return build_recipe_serving_build_intent( - context, - builder_mode=BuilderMode.PURE_TRANSFORM, - build_pipeline_version=build_pipeline_version, - representation_contract_hash=representation_contract_hash, - ) - - -def build_binding_finalize_build_intent( - context: RecipePublicationContext, - *, - build_pipeline_version: str, - representation_contract_hash: str, -) -> ServingBuildIntent: - return build_recipe_serving_build_intent( - context, - builder_mode=BuilderMode.BINDING_FINALIZE, - build_pipeline_version=build_pipeline_version, - representation_contract_hash=str(representation_contract_hash), - ) - - -def prepare_pure_transform_serving_registration_from_context( +def prepare_pure_transform_runtime_registration_from_context( context: RecipePublicationContext, *, tensors: Mapping[str, torch.Tensor], @@ -104,7 +52,7 @@ def prepare_pure_transform_serving_registration_from_context( serving_manifest_ref: str | None = None, topology_admission_digest: str | None = None, ) -> Any: - return prepare_pure_transform_serving_registration( + return prepare_pure_transform_runtime_registration( build_intent=build_pure_transform_build_intent( context, build_pipeline_version=build_pipeline_version, @@ -186,7 +134,7 @@ def build_pure_transform_publication_bundle_from_context( ) -def prepare_binding_finalize_serving_registration_from_context( +def prepare_binding_finalize_runtime_registration_from_context( context: RecipePublicationContext, *, tensors: dict[str, torch.Tensor], @@ -195,7 +143,7 @@ def prepare_binding_finalize_serving_registration_from_context( serving_manifest_ref: str | None = None, topology_admission_digest: str | None = None, ) -> Any: - return prepare_binding_finalize_serving_registration( + return prepare_binding_finalize_runtime_registration( build_intent=build_binding_finalize_build_intent( context, build_pipeline_version=build_pipeline_version, @@ -211,10 +159,10 @@ def prepare_binding_finalize_serving_registration_from_context( def build_binding_finalize_admission_facts_from_context( *, - support_level: ServingSupportLevel, + support_level: RuntimeSupportLevel, topology_admission_digest: str | None = None, same_binding_fast_path_validated: bool = True, -) -> ServingAdmissionFacts: +) -> RuntimeAdmissionFacts: return build_binding_finalize_admission_facts( support_level=support_level, topology_admission_digest=topology_admission_digest, @@ -225,7 +173,7 @@ def build_binding_finalize_admission_facts_from_context( def build_binding_finalize_publication_bundle_from_context( context: RecipePublicationContext, *, - publication_subject: ServingPublicationSubject | BindingValueRef, + publication_subject: RuntimePublicationSubject | BindingValueRef, canonical_index: CanonicalIndex, build_pipeline_version: str, representation_contract_hash: str, @@ -237,7 +185,7 @@ def build_binding_finalize_publication_bundle_from_context( requirements: AssemblyRequirementSetRef | None = None, readiness_policy: AssemblyReadinessPolicy | None = None, structural_view_ids: tuple[str, ...] = (), - admission_facts: ServingAdmissionFacts | None = None, + admission_facts: RuntimeAdmissionFacts | None = None, ) -> RepresentationPublishSpec: if admission_facts is None: raise ValueError( @@ -298,13 +246,13 @@ def complete_pure_transform_recipe_publication( if source_tensors is None else {str(name): tensor for name, tensor in dict(source_tensors).items()} ) - serving_tensors = materialize_pure_transform_serving_tensors( + runtime_tensors = materialize_pure_transform_runtime_tensors( recipe, resolved_source_tensors, target_device=materialization_device, ) return tc.complete_pure_transform_publication( - serving_tensors, + runtime_tensors, build_intent=build_pure_transform_build_intent( publication_context, build_pipeline_version=build_pipeline_version, @@ -342,7 +290,7 @@ def complete_pure_transform_recipe_publication( "build_pure_transform_build_intent", "build_pure_transform_publication_bundle_from_context", "build_pure_transform_publication_spec_from_context", - "build_recipe_serving_build_intent", - "prepare_binding_finalize_serving_registration_from_context", - "prepare_pure_transform_serving_registration_from_context", + "build_recipe_runtime_build_intent", + "prepare_binding_finalize_runtime_registration_from_context", + "prepare_pure_transform_runtime_registration_from_context", ] diff --git a/tensorcast/serving/builder/semantic_validation.py b/tensorcast/artifact_runtime/recipe/semantic_validation.py similarity index 64% rename from tensorcast/serving/builder/semantic_validation.py rename to tensorcast/artifact_runtime/recipe/semantic_validation.py index 6ef6736d..c44a7a58 100644 --- a/tensorcast/serving/builder/semantic_validation.py +++ b/tensorcast/artifact_runtime/recipe/semantic_validation.py @@ -1,6 +1,6 @@ # Copyright (c) 2026, TensorCast Team. -"""Framework-neutral semantic validation helpers for serving recipes.""" +"""Framework-neutral semantic validation helpers for runtime recipes.""" from __future__ import annotations @@ -14,20 +14,36 @@ def evaluate_semantic_validation_spec(spec: Any, actual_payload: Any) -> Any: return None actual = _jsonable(actual_payload) if spec.kind == "framework_semantic_probes": - return actual + return _compare_semantic_payload( + label="framework probe", + expected=_jsonable(spec.payload), + actual=actual, + ) if spec.kind == "explicit": - expected = _jsonable(spec.payload) - if actual != expected: - raise RuntimeError( - "TensorCast semantic validation failed for explicit probe " - f"spec: expected={expected!r}, actual={actual!r}" - ) - return actual + return _compare_semantic_payload( + label="explicit probe", + expected=_jsonable(spec.payload), + actual=actual, + ) raise RuntimeError( f"Unsupported TensorCast semantic validation spec kind: {spec.kind!r}" ) +def _compare_semantic_payload( + *, + label: str, + expected: Any, + actual: Any, +) -> Any: + if actual != expected: + raise RuntimeError( + f"TensorCast semantic validation failed for {label} " + f"spec: expected={expected!r}, actual={actual!r}" + ) + return actual + + def _jsonable(value: Any) -> Any: if value is None or isinstance(value, (str, int, float, bool)): return value diff --git a/tensorcast/serving/builder/tensor_parity.py b/tensorcast/artifact_runtime/recipe/tensor_parity.py similarity index 99% rename from tensorcast/serving/builder/tensor_parity.py rename to tensorcast/artifact_runtime/recipe/tensor_parity.py index c2001c51..6c0cfb7c 100644 --- a/tensorcast/serving/builder/tensor_parity.py +++ b/tensorcast/artifact_runtime/recipe/tensor_parity.py @@ -1,5 +1,5 @@ # Copyright (c) 2026, TensorCast Team. -"""Tensor parity diagnostics for TensorCast serving recipes.""" +"""Tensor parity diagnostics for TensorCast runtime recipes.""" from __future__ import annotations @@ -10,18 +10,18 @@ import torch from tensorcast.api.store import BindingRealizationEntry -from tensorcast.proto.daemon.v2 import store_daemon_pb2 -from tensorcast.serving.builder.materialization import ( +from tensorcast.artifact_runtime.recipe.materialization import ( narrow_by_range_spec, narrow_source_view, ) -from tensorcast.serving.builder.trace_ir import ( +from tensorcast.artifact_runtime.recipe.trace_ir import ( CopyPlanEntry, MultiRange, Range, RangeSpec, TracePlan, ) +from tensorcast.proto.daemon.v2 import store_daemon_pb2 class _RangeLike(Protocol): diff --git a/tensorcast/serving/builder/tensor_schema.py b/tensorcast/artifact_runtime/recipe/tensor_schema.py similarity index 92% rename from tensorcast/serving/builder/tensor_schema.py rename to tensorcast/artifact_runtime/recipe/tensor_schema.py index 494a38e3..c58cdbf9 100644 --- a/tensorcast/serving/builder/tensor_schema.py +++ b/tensorcast/artifact_runtime/recipe/tensor_schema.py @@ -5,14 +5,13 @@ from __future__ import annotations from collections.abc import Mapping, Sequence +from typing import Any import torch -from tensorcast.serving.builder.compiler import TensorSchemaEntry - def validate_tensor_schema_against_tensors( - tensor_schema: Sequence[TensorSchemaEntry], + tensor_schema: Sequence[Any], tensors: Mapping[str, torch.Tensor], ) -> None: expected = {entry.name: entry for entry in tensor_schema} @@ -31,7 +30,7 @@ def validate_tensor_schema_against_tensors( def _validate_tensor_schema_entry( name: str, - entry: TensorSchemaEntry, + entry: Any, tensor: torch.Tensor, ) -> None: shape = tuple(int(dim) for dim in tensor.shape) diff --git a/tensorcast/serving/builder/trace_cache.py b/tensorcast/artifact_runtime/recipe/trace_cache.py similarity index 98% rename from tensorcast/serving/builder/trace_cache.py rename to tensorcast/artifact_runtime/recipe/trace_cache.py index d86fb3d4..772cfaaf 100644 --- a/tensorcast/serving/builder/trace_cache.py +++ b/tensorcast/artifact_runtime/recipe/trace_cache.py @@ -10,7 +10,7 @@ from pathlib import Path from typing import Any -from tensorcast.serving.builder.trace_ir import ( +from tensorcast.artifact_runtime.recipe.trace_ir import ( TracePlan, trace_plan_from_dict, trace_plan_to_dict, diff --git a/tensorcast/serving/builder/trace_ir.py b/tensorcast/artifact_runtime/recipe/trace_ir.py similarity index 98% rename from tensorcast/serving/builder/trace_ir.py rename to tensorcast/artifact_runtime/recipe/trace_ir.py index 6e2b133e..dddd0088 100644 --- a/tensorcast/serving/builder/trace_ir.py +++ b/tensorcast/artifact_runtime/recipe/trace_ir.py @@ -1,6 +1,6 @@ # Copyright (c) 2026, TensorCast Team. -"""Framework-neutral TensorCast serving trace IR.""" +"""Framework-neutral TensorCast runtime trace IR.""" from __future__ import annotations diff --git a/tensorcast/serving/builder/recipe_validation.py b/tensorcast/artifact_runtime/recipe/validation.py similarity index 62% rename from tensorcast/serving/builder/recipe_validation.py rename to tensorcast/artifact_runtime/recipe/validation.py index 4d0f49f9..74bb8b9b 100644 --- a/tensorcast/serving/builder/recipe_validation.py +++ b/tensorcast/artifact_runtime/recipe/validation.py @@ -1,46 +1,28 @@ # Copyright (c) 2026, TensorCast Team. -"""Framework-neutral serving recipe fact validation.""" +"""Framework-neutral runtime recipe fact validation.""" from __future__ import annotations from typing import Any -from tensorcast.types import BuilderMode, FinalizeClass, ServingSupportLevel - -_SUPPORT_LEVEL_ORDER = { - ServingSupportLevel.BLOCKED: 0, - ServingSupportLevel.SOURCE_BIND_BOOTSTRAP_ONLY: 1, - ServingSupportLevel.BUILDER_PUBLICATION_READY: 2, - ServingSupportLevel.RUNTIME_BIND_SWAP_READY: 3, -} - - -def serving_support_level_at_least( - value: ServingSupportLevel | str, - minimum: ServingSupportLevel | str, -) -> bool: - resolved_value = _coerce_support_level(value) - resolved_minimum = _coerce_support_level(minimum) - return ( - _SUPPORT_LEVEL_ORDER[resolved_value] >= _SUPPORT_LEVEL_ORDER[resolved_minimum] - ) - - -def serving_support_level_display_name(value: ServingSupportLevel | str) -> str: - return str(_coerce_support_level(value).value) +from tensorcast.artifact_runtime.readiness import ( + runtime_support_level_at_least, + runtime_support_level_display_name, +) +from tensorcast.types import BuilderMode, FinalizeClass, RuntimeSupportLevel def validate_recipe_for_builder_mode(recipe: Any, mode: BuilderMode | str) -> Any: - facts = recipe.serving_facts + facts = recipe.runtime_facts builder_mode = _coerce_builder_mode(mode) failures: list[str] = [] - if not serving_support_level_at_least( - facts.support_level, ServingSupportLevel.BUILDER_PUBLICATION_READY + if not runtime_support_level_at_least( + facts.support_level, RuntimeSupportLevel.BUILDER_PUBLICATION_READY ): failures.append( "support_level=" - f"{serving_support_level_display_name(facts.support_level)} " + f"{runtime_support_level_display_name(facts.support_level)} " "is below builder_publication_ready" ) if builder_mode == BuilderMode.PURE_TRANSFORM: @@ -77,12 +59,6 @@ def validate_recipe_for_builder_mode(recipe: Any, mode: BuilderMode | str) -> An return recipe -def _coerce_support_level(value: ServingSupportLevel | str) -> ServingSupportLevel: - if isinstance(value, ServingSupportLevel): - return value - return ServingSupportLevel(str(value).strip()) - - def _coerce_builder_mode(value: BuilderMode | str) -> BuilderMode: if isinstance(value, BuilderMode): return value @@ -90,7 +66,7 @@ def _coerce_builder_mode(value: BuilderMode | str) -> BuilderMode: __all__ = [ - "serving_support_level_at_least", - "serving_support_level_display_name", + "runtime_support_level_at_least", + "runtime_support_level_display_name", "validate_recipe_for_builder_mode", ] diff --git a/tensorcast/artifact_runtime/reload.py b/tensorcast/artifact_runtime/reload.py new file mode 100644 index 00000000..ada49b5e --- /dev/null +++ b/tensorcast/artifact_runtime/reload.py @@ -0,0 +1,89 @@ +# Copyright (c) 2026, TensorCast Team. +"""Artifact runtime reload actions.""" + +from __future__ import annotations + +from collections.abc import Callable + +from tensorcast.artifact_runtime.artifact.resolver import RuntimeArtifactResolver +from tensorcast.artifact_runtime.attachment import ( + RuntimeAttachment, + RuntimeBindingState, +) +from tensorcast.artifact_runtime.errors import ConfigConflictError +from tensorcast.artifact_runtime.host import RuntimeHostCapabilities +from tensorcast.artifact_runtime.intent import ExistingRuntimeArtifact, RequestContext +from tensorcast.artifact_runtime.lifecycle import ArtifactRuntimeIntegration +from tensorcast.artifact_runtime.locator import ArtifactLocator +from tensorcast.artifact_runtime.policy import ( + RuntimePolicy, + merge_runtime_reload_extra_config, + normalize_runtime_reload_request_payload, +) +from tensorcast.artifact_runtime.publication import replica as replica_publication + + +def _reject_local_reload_artifact_locator(artifact_locator: object) -> None: + if getattr(artifact_locator, "kind", None) == "local_path": + raise ConfigConflictError( + "TensorCast runtime reload requires a durable artifact locator, " + "not a local source selector" + ) + + +def reload_runtime_attachment( + *, + current_attachment: RuntimeAttachment | RuntimeBindingState, + artifact_locator: object, + policy: object | None, + runtime_host: RuntimeHostCapabilities, + runtime_context: RequestContext, + ensure_runtime_initialized: Callable[[], None], + model: object | None = None, + contract_identity: str | None = None, + runtime_resolver: RuntimeArtifactResolver | None = None, + profile_sink: object | None = None, +) -> RuntimeAttachment: + """Reload an existing artifact-backed runtime binding.""" + + _reject_local_reload_artifact_locator(artifact_locator) + if not isinstance(artifact_locator, ArtifactLocator): + raise ConfigConflictError( + "TensorCast runtime reload requires an ArtifactLocator" + ) + if policy is not None and not isinstance(policy, RuntimePolicy): + raise ConfigConflictError( + "TensorCast runtime reload requires a RuntimePolicy or None" + ) + if isinstance(current_attachment, RuntimeAttachment): + replica_publication.reject_reload_with_active_publication(current_attachment) + ensure_runtime_initialized() + current_state = ( + current_attachment.state + if isinstance(current_attachment, RuntimeAttachment) + else current_attachment + ) + runtime_model = ( + model if model is not None else getattr(current_attachment, "model", None) + ) + return ArtifactRuntimeIntegration( + resolver=runtime_resolver, + profile_sink=profile_sink, + host=runtime_host, + ).reload( + current_state, + ExistingRuntimeArtifact( + artifact_locator=artifact_locator, + policy=policy, + ), + runtime_context, + model=runtime_model, + contract_identity=contract_identity, + ) + + +__all__ = [ + "merge_runtime_reload_extra_config", + "normalize_runtime_reload_request_payload", + "reload_runtime_attachment", +] diff --git a/tensorcast/artifact_runtime/request_facts.py b/tensorcast/artifact_runtime/request_facts.py new file mode 100644 index 00000000..64da7463 --- /dev/null +++ b/tensorcast/artifact_runtime/request_facts.py @@ -0,0 +1,273 @@ +# Copyright (c) 2026, TensorCast Team. + +"""Fail-closed request fact resolution for model-runtime realization.""" + +from __future__ import annotations + +import json +from collections.abc import Mapping +from dataclasses import dataclass, replace +from typing import Any + +import torch + +from tensorcast.artifact_runtime.errors import ArtifactRuntimeIntegrationError +from tensorcast.artifact_runtime.intent import RequestContext + + +class ModelRuntimeRequestFactsError(ArtifactRuntimeIntegrationError): + """Raised when model-runtime spec, request, and host facts disagree.""" + + code = "invalid_argument" + operation = "model_runtime_request" + + +@dataclass(frozen=True) +class ResolvedModelRuntimeRequestFacts: + spec: Any + context: Any + + +def resolve_model_runtime_request_facts( + *, + spec: Any, + runtime_context: Any | None, + host_context: Any | None = None, + host_target_device: Any | None = None, +) -> ResolvedModelRuntimeRequestFacts: + """Resolve request facts without silently preferring one authority.""" + + context = runtime_context or RequestContext( + target_device=getattr(spec, "device", None) + ) + spec, context = _resolve_device_fact( + spec=spec, + context=context, + host_target_device=host_target_device, + ) + spec = _resolve_runtime_fact( + spec=spec, + context=context, + host_context=host_context, + field_name="topology", + host_value=_placement_value(host_context, "topology"), + ) + spec = _resolve_runtime_fact( + spec=spec, + context=context, + host_context=host_context, + field_name="member", + host_value=_placement_value(host_context, "member"), + ) + spec = _resolve_runtime_fact( + spec=spec, + context=context, + host_context=host_context, + field_name="adapter_version", + host_value=_optional_text(getattr(host_context, "adapter_version", None)), + ) + spec = _resolve_runtime_fact( + spec=spec, + context=context, + host_context=host_context, + field_name="runtime_abi_version", + context_field_names=("runtime_abi_version", "serving_abi_version"), + host_value=_optional_text(getattr(host_context, "serving_abi_version", None)), + ) + return ResolvedModelRuntimeRequestFacts(spec=spec, context=context) + + +def _resolve_device_fact( + *, + spec: Any, + context: Any, + host_target_device: Any | None, +) -> tuple[Any, Any]: + facts = ( + ("spec.device", getattr(spec, "device", None)), + ("runtime_context.target_device", getattr(context, "target_device", None)), + ("host.target_device", host_target_device), + ) + resolved = _single_resolved_value( + facts, + normalize=_normalized_device, + field_name="target_device", + ) + if resolved is None: + return spec, context + if getattr(spec, "device", None) is None: + spec = _replace_field( + spec, + field_name="device", + new_value=resolved, + subject="model_runtime spec", + ) + if getattr(context, "target_device", None) is None: + context = _replace_field( + context, + field_name="target_device", + new_value=resolved, + subject="model_runtime runtime_context", + ) + return spec, context + + +def _resolve_runtime_fact( + *, + spec: Any, + context: Any, + host_context: Any | None, + field_name: str, + host_value: Any | None, + context_field_names: tuple[str, ...] | None = None, +) -> Any: + del host_context + context_fields = context_field_names or (field_name,) + context_value = _first_present_attr(context, context_fields) + facts = ( + (f"spec.{field_name}", getattr(spec, field_name, None)), + (f"runtime_context.{field_name}", context_value), + (f"host.{field_name}", host_value), + ) + resolved = _single_resolved_value( + facts, + normalize=lambda value: _normalized_fact(field_name, value), + field_name=field_name, + ) + if resolved is None or getattr(spec, field_name, None) is not None: + return spec + return _replace_field( + spec, + field_name=field_name, + new_value=resolved, + subject="model_runtime spec", + ) + + +def _single_resolved_value( + facts: tuple[tuple[str, Any | None], ...], + *, + normalize: Any, + field_name: str, +) -> Any | None: + present: list[tuple[str, Any, Any]] = [] + for source, value in facts: + if value is None: + continue + normalized = normalize(value) + if normalized is None: + continue + present.append((source, value, normalized)) + if not present: + return None + expected = present[0][2] + mismatches = [ + (source, normalized) + for source, _value, normalized in present[1:] + if normalized != expected + ] + if mismatches: + details = {source: normalized for source, _value, normalized in present} + raise ModelRuntimeRequestFactsError( + f"model_runtime {field_name} facts disagree", + details=details, + ) + return present[0][1] + + +def _replace_field( + obj: Any, + *, + field_name: str, + new_value: Any, + subject: str, +) -> Any: + model_copy = getattr(obj, "model_copy", None) + if callable(model_copy): + return model_copy(update={field_name: new_value}) + try: + return replace(obj, **{field_name: new_value}) + except TypeError as exc: + raise ModelRuntimeRequestFactsError( + f"{subject} must be dataclass-compatible when {field_name} is omitted", + details={"field": field_name}, + ) from exc + + +def _normalized_device(value: Any) -> str: + try: + return str(torch.device(value)) + except Exception as exc: # noqa: BLE001 + raise ModelRuntimeRequestFactsError( + f"model_runtime target_device is invalid: {value!r}", + details={"target_device": repr(value)}, + ) from exc + + +def _normalized_fact(field_name: str, value: Any) -> Any | None: + if field_name == "topology": + return _topology_identity(value) + if field_name == "member": + return _member_identity(value) + return _optional_text(value) + + +def _topology_identity(value: Any) -> Any | None: + digest = _optional_text(getattr(value, "schema_topology_digest", None)) + if digest is not None: + return ("schema_topology_digest", digest) + return _stable_value(value) + + +def _member_identity(value: Any) -> Any | None: + member_id = _optional_text(getattr(value, "member_id", None)) + if member_id is not None: + return ( + member_id, + int(getattr(value, "member_index", 0)), + int(getattr(value, "member_count", 1)), + _optional_text(getattr(value, "group_id", None)), + ) + return _stable_value(value) + + +def _stable_value(value: Any) -> Any | None: + if value is None: + return None + dump = getattr(value, "model_dump", None) + if callable(dump): + return _stable_json(dump(mode="python")) + if isinstance(value, Mapping): + return _stable_json(value) + return value + + +def _stable_json(value: Any) -> str: + return json.dumps(value, sort_keys=True, separators=(",", ":"), default=str) + + +def _placement_value(host_context: Any | None, field_name: str) -> Any | None: + placement = getattr(host_context, "placement", None) + return getattr(placement, field_name, None) + + +def _first_present_attr(value: Any, names: tuple[str, ...]) -> Any | None: + for name in names: + attr = getattr(value, name, None) + if attr is not None: + return attr + return None + + +def _optional_text(value: Any) -> str | None: + if value is None: + return None + text = str(value).strip() + return text or None + + +__all__ = [ + "ModelRuntimeRequestFactsError", + "ResolvedModelRuntimeRequestFacts", + "resolve_model_runtime_request_facts", +] diff --git a/tensorcast/serving/source_catalog.py b/tensorcast/artifact_runtime/source.py similarity index 62% rename from tensorcast/serving/source_catalog.py rename to tensorcast/artifact_runtime/source.py index e0eaef84..79e4883c 100644 --- a/tensorcast/serving/source_catalog.py +++ b/tensorcast/artifact_runtime/source.py @@ -1,6 +1,6 @@ # Copyright (c) 2026, TensorCast Team. -"""Source catalog primitives for serving local bootstrap and builders.""" +"""Artifact runtime source catalog primitives.""" from __future__ import annotations @@ -12,6 +12,7 @@ from dataclasses import dataclass from pathlib import Path from types import MappingProxyType +from typing import Any import torch @@ -21,7 +22,9 @@ canonical_index_to_bytes, ) from tensorcast.api.store.types import CanonicalIndex +from tensorcast.artifact_runtime.errors import SourceSubjectError from tensorcast.common.identity import ArtifactIdKind, validate_artifact_id +from tensorcast.types import PublicDiskSourceHandle _SOURCE_CATALOG_FINGERPRINT_VERSION = "tensorcast-source-catalog-v1" SOURCE_CATALOG_SCHEMA_VERSION = 1 @@ -69,6 +72,165 @@ def __post_init__(self) -> None: ) +@dataclass(frozen=True) +class SourceSubject: + """Framework-facing source subject with a durable source artifact root.""" + + artifact_ref: str + subject: Any + source_kind: str = "opaque" + metadata_fingerprint: str | None = None + + def broadcast_payload(self) -> dict[str, Any]: + if self.source_kind == "public_disk": + subject_payload = _public_disk_source_payload(self.subject) + else: + subject_payload = self.subject + return { + "kind": self.source_kind, + "artifact_ref": self.artifact_ref, + "subject": subject_payload, + "metadata_fingerprint": self.metadata_fingerprint, + } + + def profile_fields(self) -> dict[str, Any]: + source = self.subject + fields: dict[str, Any] = { + "artifact_ref": self.artifact_ref, + "source_kind": self.source_kind, + } + if self.metadata_fingerprint is not None: + fields["metadata_fingerprint"] = self.metadata_fingerprint + canonical_index = getattr(source, "canonical_index_bytes", None) + if canonical_index is not None: + fields["canonical_index_bytes"] = len(canonical_index) + source_index = getattr(source, "source_index_bytes", None) + if source_index is not None: + fields["source_index_bytes"] = len(bytes(source_index or b"")) + for name in ("format_kind", "metadata_capability"): + value = getattr(source, name, None) + if value is not None: + fields[name] = str(value or "") + return fields + + +def _optional_str(value: Any) -> str | None: + if value is None: + return None + text = str(value) + return text or None + + +def _optional_text(value: Any) -> str | None: + return _optional_str(value) + + +def _optional_bytes(value: Any) -> bytes | None: + if value is None: + return None + data = bytes(value) + return data or None + + +def _enum_wire_value(value: Any) -> str | int | None: + if value is None: + return None + enum_value = getattr(value, "value", value) + if isinstance(enum_value, (str, int)): + return enum_value + return str(enum_value) + + +def _public_disk_source_payload(source: Any) -> dict[str, Any]: + return { + "path": str(getattr(source, "path", "") or ""), + "canonical_index_bytes": bytes(source.canonical_index_bytes), + "artifact_id": str(getattr(source, "artifact_id", "") or ""), + "generation": int(getattr(source, "generation", 0) or 0), + "verify_checksums": bool(getattr(source, "verify_checksums", True)), + "trusted_content_artifact_id": _optional_str( + getattr(source, "trusted_content_artifact_id", None) + ), + "source_index_bytes": _optional_bytes( + getattr(source, "source_index_bytes", None) + ), + "format_kind": _enum_wire_value(getattr(source, "format_kind", None)), + "metadata_capability": _enum_wire_value( + getattr(source, "metadata_capability", None) + ), + "resolution_strategy": _enum_wire_value( + getattr(source, "resolution_strategy", None) + ), + "validation_mode": _enum_wire_value(getattr(source, "validation_mode", None)), + "policy_id": _optional_str(getattr(source, "policy_id", None)), + "exact_size_bytes": int(getattr(source, "exact_size_bytes", 0) or 0), + } + + +def _source_subject_from_handle(source: Any) -> SourceSubject: + artifact_ref = str(getattr(source, "artifact_id", "") or "") + if not artifact_ref: + raise RuntimeError("TensorCast source subject is missing a source artifact_id") + return SourceSubject( + artifact_ref=artifact_ref, + subject=source, + source_kind="public_disk", + ) + + +def resolve_source_subject( + path: str, + *, + verify_checksums: bool, +) -> SourceSubject: + from tensorcast.api.store import resolve_public_disk_source + + return _source_subject_from_handle( + resolve_public_disk_source( + path, + verify_checksums=verify_checksums, + ) + ) + + +def source_subject_from_broadcast_payload(payload: Mapping[str, Any]) -> SourceSubject: + payload_dict = dict(payload) + if "kind" not in payload_dict: + raise SourceSubjectError( + "TensorCast source subject broadcast payload is missing kind" + ) + kind = str(payload_dict.get("kind") or "") + artifact_ref = str(payload_dict.get("artifact_ref") or "") + if not artifact_ref: + raise SourceSubjectError( + "TensorCast source subject broadcast payload is missing artifact_ref" + ) + source: Any + if kind == "public_disk": + subject_payload = payload_dict.get("subject") + if not isinstance(subject_payload, Mapping): + raise SourceSubjectError( + "TensorCast public_disk source subject payload must be a mapping" + ) + source = PublicDiskSourceHandle(**dict(subject_payload)) + else: + source = payload_dict.get("subject") + return SourceSubject( + artifact_ref=artifact_ref, + subject=source, + source_kind=kind, + metadata_fingerprint=_optional_text(payload_dict.get("metadata_fingerprint")), + ) + + +def source_subject_broadcast_payload(subject: SourceSubject) -> dict[str, Any]: + return subject.broadcast_payload() + + +def is_public_disk_source_subject(subject: Any) -> bool: + return isinstance(subject, PublicDiskSourceHandle) + + def source_catalog_from_selected_safetensors( directory: Path | str, *, @@ -275,15 +437,20 @@ def resolve_source_artifact_ref(source_artifact_ref: str) -> str: __all__ = [ + "SOURCE_CATALOG_SCHEMA_VERSION", "SourceCatalog", "SourceFileEntry", "SourceManifest", + "SourceSubject", "SourceTensorMeta", - "SOURCE_CATALOG_SCHEMA_VERSION", "compute_source_metadata_fingerprint", + "is_public_disk_source_subject", "resolve_source_artifact_ref", + "resolve_source_subject", "source_catalog_from_all_safetensors_dir", "source_catalog_from_canonical_index", "source_catalog_from_manifest", "source_catalog_from_selected_safetensors", + "source_subject_broadcast_payload", + "source_subject_from_broadcast_payload", ] diff --git a/tensorcast/serving/state.py b/tensorcast/artifact_runtime/state.py similarity index 99% rename from tensorcast/serving/state.py rename to tensorcast/artifact_runtime/state.py index b6a3d1ff..aca5bad6 100644 --- a/tensorcast/serving/state.py +++ b/tensorcast/artifact_runtime/state.py @@ -10,7 +10,7 @@ from typing import TYPE_CHECKING, Any if TYPE_CHECKING: - from tensorcast.serving.runtime_attachment import ( + from tensorcast.artifact_runtime.attachment import ( RuntimeAttachment, RuntimeBindingState, RuntimeBindingView, diff --git a/tensorcast/artifact_runtime/testing.py b/tensorcast/artifact_runtime/testing.py new file mode 100644 index 00000000..d8a5c56a --- /dev/null +++ b/tensorcast/artifact_runtime/testing.py @@ -0,0 +1,910 @@ +# Copyright (c) 2026, TensorCast Team. +"""Reusable conformance checks for framework artifact-runtime integrations.""" + +from __future__ import annotations + +import weakref +from collections.abc import Iterable, Mapping +from contextlib import contextmanager +from dataclasses import dataclass, field +from types import ModuleType, SimpleNamespace +from typing import Any, cast + +import torch + +import tensorcast as tc +import tensorcast.artifact_runtime.lifecycle as _integration + + +@dataclass(frozen=True) +class ConformanceResult: + """Result from a lightweight artifact runtime conformance check.""" + + checks: Mapping[str, bool] = field(default_factory=dict) + messages: Mapping[str, str] = field(default_factory=dict) + level: str | None = None + + @property + def failed_checks(self) -> tuple[str, ...]: + return tuple(name for name, passed in self.checks.items() if not passed) + + def failure_summary(self) -> str: + failed = self.failed_checks + if not failed: + return "TensorCast artifact-runtime conformance checks passed" + lines = [ + "TensorCast artifact-runtime conformance checks failed" + + (f" for {self.level}" if self.level else "") + + ":" + ] + for name in failed: + message = self.messages.get(name, "No remediation hint available") + lines.append(f"- {name}: {message}") + return "\n".join(lines) + + def assert_passed(self) -> None: + if self.failed_checks: + raise AssertionError(self.failure_summary()) + + +def _result( + *, + level: str, + checks: Mapping[str, bool], + messages: Mapping[str, str], +) -> ConformanceResult: + result = ConformanceResult(checks=checks, messages=messages, level=level) + result.assert_passed() + return result + + +_PUBLIC_BOUNDARY_MESSAGES = { + "hides_runtime_session": ( + "Do not expose ArtifactRuntimeSession from the public runtime API; " + "frameworks should use Artifact.realize(... model_runtime ...) and " + "artifact-runtime actions." + ), + "has_attachment": ( + "Expose RuntimeAttachment as the framework-held lifecycle token." + ), + "has_request_context": ( + "Expose RequestContext so framework facts enter lifecycle calls through " + "one typed context object." + ), + "hides_admin_local_bootstrap": ( + "Keep admin/local-bootstrap override DTOs out of the framework runtime " + "module; route them through admin/offline surfaces." + ), + "hides_low_level_bind": ( + "Do not expose bind/swap/restore helpers from the runtime module; " + "frameworks should use artifact-runtime start/reload/publication actions." + ), + "hides_serving_locator_policy": ( + "Keep serving-rooted locator and policy aliases out of " + "the public runtime API; use ArtifactLocator, RuntimePolicy, and " + "runtime reload helpers." + ), + "hides_legacy_config": ( + "Keep serving-rooted config and start-plan names out of " + "the public runtime API; use TensorCastRuntimeConfig and " + "plan_runtime_start." + ), + "hides_projection_dtos": ( + "Runtime endpoint projection DTOs live in tensorcast.artifact_runtime.view." + ), + "hides_state_helpers": ( + "Model attribute helpers live in tensorcast.artifact_runtime.state." + ), +} + +_ARTIFACT_RUNTIME_BOUNDARY_MESSAGES = { + "has_artifact_realization_spec": ( + "Expose ArtifactRealizationSpec so frameworks can request model_runtime " + "realization through the artifact API." + ), + "has_runtime_host": ( + "Expose RuntimeHostCapabilities as the framework-provided host surface." + ), + "has_runtime_context": ( + "Expose RuntimeRequestContext so framework facts enter runtime actions " + "through one typed context object." + ), + "has_artifact_locator": ( + "Expose ArtifactLocator for durable artifact runtime reload requests." + ), + "has_runtime_policy": ("Expose RuntimePolicy for typed runtime reload admission."), + "has_reload_action": ( + "Expose reload_runtime_attachment for runtime reload without a serving " + "session object." + ), + "has_publication_actions": ( + "Expose runtime replica publish/retire actions without requiring a " + "runtime session object." + ), + "hides_runtime_session": ( + "The tensorcast root runtime path must not expose ArtifactRuntimeSession; " + "frameworks should use Artifact.realize(... model_runtime ...) instead." + ), + "hides_legacy_serving_dtos": ( + "Keep legacy serving-rooted DTO aliases off the tensorcast root runtime " + "surface." + ), +} + +_FRAMEWORK_ISOLATION_MESSAGES = { + "no_vllm_imports": ( + "Reference and conformance frameworks must not import vLLM. Move any " + "needed generic fact extraction into TensorCast hosts or testing helpers." + ), + "no_internal_runtime_imports": ( + "Framework examples should not import TensorCast private/internal " + "runtime modules." + ), + "no_serving_imports": ( + "Framework examples should not import the removed tensorcast.serving " + "package; use tensorcast.artifact_runtime host/testing surfaces instead." + ), +} + + +def assert_public_artifact_runtime_boundary( + tc_module: ModuleType = tc, +) -> ConformanceResult: + """Check that the root API exposes artifact-runtime, not serving-session, APIs.""" + + public_names = set(getattr(tc_module, "__all__", ())) + checks = { + "has_artifact_realization_spec": "ArtifactRealizationSpec" in public_names, + "has_runtime_host": "RuntimeHostCapabilities" in public_names, + "has_runtime_context": "RuntimeRequestContext" in public_names, + "has_artifact_locator": "ArtifactLocator" in public_names, + "has_runtime_policy": "RuntimePolicy" in public_names, + "has_reload_action": "reload_runtime_attachment" in public_names, + "has_publication_actions": { + "publish_runtime_replica", + "retire_runtime_replica", + }.issubset(public_names), + "hides_runtime_session": "ArtifactRuntimeSession" not in public_names, + "hides_legacy_serving_dtos": { + "ServingBuildIntent", + "ServingArtifactManifest", + "ServingRuntimePolicy", + "ServingBindingTarget", + "ServingBindingSetTarget", + "PrefetchedServingBinding", + "PrefetchedServingBindingSet", + }.isdisjoint(public_names), + } + return _result( + level="public-artifact-runtime-boundary", + checks=checks, + messages=_ARTIFACT_RUNTIME_BOUNDARY_MESSAGES, + ) + + +def assert_public_runtime_boundary(runtime_module: ModuleType) -> ConformanceResult: + """Check that runtime imports expose framework APIs, not admin helpers.""" + + public_names = set(getattr(runtime_module, "__all__", ())) + checks = { + "hides_runtime_session": "ArtifactRuntimeSession" not in public_names, + "has_attachment": "RuntimeAttachment" in public_names, + "has_request_context": "RequestContext" in public_names, + "hides_admin_local_bootstrap": "AdminLocalSourceBootstrap" not in public_names + and "_AdminLocalSourceBootstrap" not in public_names, + "hides_low_level_bind": "bind_runtime_artifact" not in public_names + and "swap_runtime_artifact" not in public_names + and "restore_retained_binding" not in public_names, + "hides_serving_locator_policy": { + "ServingArtifactLocator", + "ServingPolicy", + "merge_serving_reload_extra_config", + "normalize_serving_reload_request_payload", + }.isdisjoint(public_names), + "hides_legacy_config": { + "ServingConfig", + "ServingStartPlan", + "ServingStartPlanError", + "plan_serving_start", + }.isdisjoint(public_names) + and "TensorCastRuntimeConfig" in public_names + and "plan_runtime_start" in public_names, + "hides_projection_dtos": { + "PublishedReplicaProjection", + "ReloadResponseProjection", + "RuntimeEndpointProjection", + "SourceSelectionProjection", + "WeightVersionProjection", + }.isdisjoint(public_names), + "hides_state_helpers": { + "ModelAttributeRuntimeState", + "RuntimeAttachmentRecord", + "RuntimeAttachmentStore", + }.isdisjoint(public_names), + } + return _result( + level="public-runtime-boundary", + checks=checks, + messages=_PUBLIC_BOUNDARY_MESSAGES, + ) + + +def assert_framework_isolation(module_names: Iterable[str]) -> ConformanceResult: + """Check that a fake/reference framework avoids vLLM imports.""" + + names = tuple(str(name) for name in module_names) + checks = { + "no_vllm_imports": not any( + name == "vllm" or name.startswith("vllm.") for name in names + ), + "no_internal_runtime_imports": not any( + name.startswith("tensorcast.serving.internal") for name in names + ), + "no_serving_imports": not any( + name == "tensorcast.serving" or name.startswith("tensorcast.serving.") + for name in names + ), + } + return _result( + level="framework-isolation", + checks=checks, + messages=_FRAMEWORK_ISOLATION_MESSAGES, + ) + + +class FakeArtifactView: + def __init__(self, names: Iterable[str] = ()) -> None: + self.names = tuple(names) + + def bind(self, **kwargs: Any) -> "FakeBinding": + binding = FakeBinding() + binding.names = self.names + binding.bind_kwargs = kwargs + return binding + + +class FakeArtifact: + def subset(self, names: Iterable[str]) -> FakeArtifactView: + return FakeArtifactView(names) + + +class FakeBinding: + def __init__(self) -> None: + self.tensors = {"w": torch.ones((1,), dtype=torch.float16)} + self.binding_layout_id = "layout-1" + self.current_value = SimpleNamespace( + binding_id="binding-1", + binding_layout_id="layout-1", + binding_value_id="value-1", + seal_generation=1, + ) + self.names: tuple[str, ...] = () + self.bind_kwargs: dict[str, Any] = {} + self.swapped: tuple[object, dict[str, Any]] | None = None + self.published_lease_id: str | None = None + self.published_replica_id: str | None = None + self.publish_calls = 0 + self.retire_calls: list[float | None] = [] + self.closed = False + + def swap(self, artifact: object, **kwargs: Any) -> "FakeBinding": + self.swapped = (artifact, kwargs) + self.tensors = {"w": torch.full((1,), 2.0, dtype=torch.float16)} + return self + + def publish_replica(self) -> object: + self.publish_calls += 1 + self.published_lease_id = "lease-1" + self.published_replica_id = "replica-1" + return SimpleNamespace( + binding_id=self.current_value.binding_id, + binding_layout_id=self.current_value.binding_layout_id, + binding_value_id=self.current_value.binding_value_id, + seal_generation=self.current_value.seal_generation, + replica_id=self.published_replica_id, + lease_id=self.published_lease_id, + serving_artifact_id="mi2:serving", + device_uuid="gpu-0", + ) + + def retire(self, *, drain_timeout_s: float | None = None) -> None: + self.retire_calls.append(drain_timeout_s) + self.published_lease_id = None + self.published_replica_id = None + + def close(self) -> None: + self.closed = True + + +class FakeRuntimeModel: + def __init__(self) -> None: + self.tensors = {"w": torch.empty((1,), dtype=torch.float16, device="meta")} + + +class FakeFrameworkHost: + def identity(self, model_config: object) -> _integration.FrameworkIdentity: + del model_config + return _integration.FrameworkIdentity( + framework_name="fakefw", + framework_version="fakefw-v1", + adapter_version="adapter-v1", + serving_abi_version="abi-v1", + ) + + def prepare_model_construction( + self, + framework_config: object | None, + model_config: object | None, + ) -> None: + del framework_config, model_config + + def build_meta_model( + self, + framework_config: object | None, + model_config: object | None, + ) -> FakeRuntimeModel: + del framework_config, model_config + return FakeRuntimeModel() + + def build_runtime_model( + self, + framework_config: object | None, + model_config: object | None, + target_device: object | None, + ) -> FakeRuntimeModel: + del framework_config, model_config, target_device + return FakeRuntimeModel() + + def assert_model_ready_for_runtime_binding( + self, + model: FakeRuntimeModel, + *, + context: object, + ) -> None: + del context + if "w" not in model.tensors: + raise AssertionError("fake model missing runtime tensor 'w'") + + def semantic_probes( + self, + model: FakeRuntimeModel, + model_config: object | None, + ) -> dict[str, object]: + del model, model_config + return {} + + +class FakePlacementHost: + def identity_facts( + self, + framework_config: object | None, + ) -> _integration.PlacementIdentityFacts: + del framework_config + return _integration.PlacementIdentityFacts( + tensor_parallel_rank=0, + tensor_parallel_size=1, + pipeline_parallel_rank=0, + pipeline_parallel_size=1, + data_parallel_rank=0, + data_parallel_size=1, + ) + + def admission_facts( + self, + framework_config: object | None, + ) -> _integration.PlacementAdmissionFacts: + del framework_config + return _integration.PlacementAdmissionFacts() + + def member_facts( + self, + framework_config: object | None, + ) -> _integration.PlacementMemberFacts: + del framework_config + return _integration.PlacementMemberFacts( + runtime_rank=0, + runtime_world_size=1, + member_id="member-0", + member_index=0, + member_count=1, + group_id_hint="group-1", + ) + + def execution_facts( + self, + framework_config: object | None, + ) -> _integration.MaterializationExecutionFacts: + del framework_config + return _integration.MaterializationExecutionFacts( + collective_rank=0, + collective_world_size=1, + tensor_parallel_ranks=(0,), + ) + + +class FakeTensorSurface: + def runtime_only_tensor_names(self, model: FakeRuntimeModel) -> tuple[str, ...]: + del model + return () + + def align_runtime_tensor_names( + self, + model: FakeRuntimeModel, + expected_names: Iterable[str], + ) -> int: + if set(expected_names) != set(model.tensors): + raise AssertionError("fake runtime tensor names do not match") + return 0 + + def collect_runtime_tensors( + self, + model: FakeRuntimeModel, + *, + remove_duplicate: bool = False, + ) -> dict[str, object]: + del remove_duplicate + return dict(model.tensors) + + def collect_runtime_tensor_view( + self, + tensors: Mapping[str, object], + ) -> tuple[object, ...]: + del tensors + return () + + def compute_runtime_tensor_schema_hash( + self, + tensors: Mapping[str, object], + *, + remove_duplicate: bool = False, + ) -> str: + del tensors, remove_duplicate + return "fake-schema" + + def attach_bound_tensors( + self, + model: FakeRuntimeModel, + tensors: Mapping[str, object], + *, + replace_meta_params: bool, + ) -> FakeRuntimeModel: + del replace_meta_params + model.tensors.update(cast(Mapping[str, torch.Tensor], tensors)) + return model + + def allocate_runtime_only_tensors( + self, + model: FakeRuntimeModel, + target_device: torch.device, + ) -> dict[str, object]: + del model, target_device + return {} + + def snapshot_tensor_invariants( + self, + tensors: Mapping[str, object], + ) -> tuple[str, ...]: + return tuple(sorted(tensors)) + + def validate_tensor_invariants( + self, + before: tuple[str, ...], + after: Mapping[str, object], + ) -> None: + if before != tuple(sorted(after)): + raise AssertionError("fake tensor invariants changed") + + +class FakeRuntimeOnlyTensorSurface(FakeTensorSurface): + def __init__(self) -> None: + self.allocated: list[tuple[str, torch.device]] = [] + + def runtime_only_tensor_names(self, model: FakeRuntimeModel) -> tuple[str, ...]: + del model + return ("cache",) + + def collect_runtime_tensors( + self, + model: FakeRuntimeModel, + *, + remove_duplicate: bool = False, + ) -> dict[str, object]: + del remove_duplicate + return { + name: tensor for name, tensor in model.tensors.items() if name != "cache" + } + + def allocate_runtime_only_tensors( + self, + model: FakeRuntimeModel, + target_device: torch.device, + ) -> dict[str, object]: + self.allocated.append(("cache", target_device)) + tensor = torch.zeros((1,), dtype=torch.float16) + model.tensors["cache"] = tensor + return {"cache": tensor} + + +class FakeRuntimeArtifactResolver: + def resolve(self, artifact_ref: str) -> SimpleNamespace: + return SimpleNamespace( + artifact=FakeArtifact(), + artifact_ref=artifact_ref, + tensor_names=("w",), + manifest=SimpleNamespace( + representation_contract_hash=f"repr:{artifact_ref}", + source_artifact_ref="mi2:source", + serving_build_digest=f"build:{artifact_ref}", + ), + ) + + def cross_check( + self, + resolved_artifact: SimpleNamespace, + **kwargs: object, + ) -> SimpleNamespace: + del kwargs + return resolved_artifact + + +class RecordingRuntimeArtifactResolver(FakeRuntimeArtifactResolver): + def __init__(self) -> None: + self.calls: list[tuple[str, object]] = [] + + def resolve(self, artifact_ref: str) -> SimpleNamespace: + self.calls.append(("resolve", artifact_ref)) + return super().resolve(artifact_ref) + + def cross_check( + self, + resolved_artifact: SimpleNamespace, + **kwargs: object, + ) -> SimpleNamespace: + self.calls.append(("cross_check", dict(kwargs))) + return super().cross_check(resolved_artifact, **kwargs) + + +class _LocalPathLocator: + kind = "local_path" + value = "/tmp/fakefw-model" + + +def build_fake_artifact_runtime_host( + tc_module: ModuleType = tc, + *, + tensor_surface: object | None = None, +) -> object: + """Build a minimal non-vLLM host through the root artifact-runtime API.""" + + return tc_module.RuntimeHostCapabilities( + framework=FakeFrameworkHost(), + placement=FakePlacementHost(), + tensor_surface=tensor_surface or FakeTensorSurface(), + ) + + +_ARTIFACT_LEVEL1_MESSAGES = { + "direct_start": ( + "Artifact model_runtime startup failed. Verify framework model " + "construction, tensor surface attach/schema behavior, placement facts, " + "and artifact resolver output." + ), + "artifact_realization_report": ( + "Artifact.realize(... model_runtime ...) must return a model_runtime " + "realization report for the requested framework." + ), + "runtime_session_not_required": ( + "Level 1 artifact-runtime start/reload must not instantiate or call " + "ArtifactRuntimeSession." + ), + "target_layout_from_runtime_binding": ( + "Model-runtime reports must carry target layout identity from the " + "runtime attachment binding." + ), + "runtime_only_tensors_allocated": ( + "Runtime-only tensor allocation must be expressible through the neutral " + "RuntimeHostCapabilities tensor surface." + ), + "runtime_publication_actions": ( + "Runtime publication must be represented by artifact-runtime " + "publish/retire actions, not by a runtime session." + ), + "describe": ( + "RuntimeAttachment.view must expose the typed RuntimeWorkerView for the " + "current attachment." + ), + "reload": ( + "Artifact runtime reload failed. Level 1 reload must use a typed " + "ArtifactLocator and RuntimePolicy." + ), + "reload_identity_from_runtime_view": ( + "Reload response identity must come from the runtime view, not from the " + "request payload." + ), + "source_capability_not_required": ( + "Level 1 direct artifact runtime start/reload must not require SourceHost." + ), + "source_catalog_not_required": ( + "Level 1 direct artifact runtime start/reload must not require " + "SourceCatalogProvider." + ), + "resolver_uses_artifact_refs": ( + "Artifact runtime start/reload must resolve durable artifact refs through " + "the supplied runtime resolver." + ), + "rejects_local_reload_artifact_locator": ( + "Reload must reject local source selectors; local paths belong to " + "source bootstrap, not durable artifact runtime reload." + ), + "rejects_untyped_reload_artifact_locator": ( + "Reload must reject untyped artifact locator dictionaries on the public " + "runtime path. Use ArtifactLocator." + ), + "rejects_untyped_reload_policy": ( + "Reload must reject untyped policy dictionaries on the public runtime " + "path. Use RuntimePolicy." + ), +} + + +@contextmanager +def _patched_direct_artifact_runtime(): + integration_module = cast(Any, _integration) + original_contract_reader = integration_module.read_source_bound_contract_state + original_materialization_options = ( + integration_module.ArtifactRuntimeIntegration.build_materialization_options + ) + integration_module.read_source_bound_contract_state = lambda: SimpleNamespace( + source_bound_contract_ready=True, + source_bound_contract_version=4, + source_bound_capability_names=("collective",), + ) + integration_module.ArtifactRuntimeIntegration.build_materialization_options = ( + lambda self, **kwargs: ("fake-materialization-options", kwargs) + ) + try: + yield + finally: + integration_module.read_source_bound_contract_state = original_contract_reader + integration_module.ArtifactRuntimeIntegration.build_materialization_options = ( + original_materialization_options + ) + + +@contextmanager +def _reject_artifact_runtime_session(): + session_cls = cast(Any, _integration.ArtifactRuntimeSession) + original_from_config = session_cls.__dict__["from_config"] + original_start = session_cls.__dict__["start"] + original_reload = session_cls.__dict__["reload"] + + def reject_runtime_session(*_args: object, **_kwargs: object) -> None: + raise AssertionError("artifact-runtime conformance used ArtifactRuntimeSession") + + session_cls.from_config = classmethod(reject_runtime_session) + session_cls.start = reject_runtime_session + session_cls.reload = reject_runtime_session + try: + yield + finally: + session_cls.from_config = original_from_config + session_cls.start = original_start + session_cls.reload = original_reload + + +def assert_level1_artifact_runtime_conformance( + tc_module: ModuleType = tc, + *, + host: object | None = None, +) -> ConformanceResult: + """Run Level 1 durable model-runtime conformance through Artifact.realize.""" + + from tensorcast.api.store.artifact import Artifact + + checks: dict[str, bool] = {} + assert_public_artifact_runtime_boundary(tc_module) + assert_framework_isolation((tc_module.__name__, __name__)) + + class _Store: + pass + + with _patched_direct_artifact_runtime(), _reject_artifact_runtime_session(): + store = _Store() + tensor_surface = None if host is not None else FakeRuntimeOnlyTensorSurface() + runtime_host = host or build_fake_artifact_runtime_host( + tc_module, + tensor_surface=tensor_surface, + ) + model_config = SimpleNamespace(model="fake-model") + identity = runtime_host.framework.identity(model_config) + resolver = RecordingRuntimeArtifactResolver() + store_ref: weakref.ReferenceType[Any] = weakref.ref(store) + artifact = Artifact( + store_ref=store_ref, + artifact_id="mi2:serving", + ) + handle = artifact.realize( + tc_module.ArtifactRealizationSpec.model_runtime( + framework=str(identity.framework_name), + device=torch.device("cuda:0"), + adapter_version=str(identity.adapter_version), + runtime_abi_version=str(identity.serving_abi_version), + ), + runtime_host=runtime_host, + runtime_context=tc_module.RuntimeRequestContext( + framework_config=SimpleNamespace(), + model_config=model_config, + ), + runtime_resolver=resolver, + ) + attachment = handle.attachment() + model_runtime_report = handle.report.model_runtime + target_plan = handle.report.target_plan + direct_payload = attachment.view.endpoint.to_weight_version_payload() + checks["direct_start"] = ( + direct_payload.get("serving_artifact_ref") == "mi2:serving" + and direct_payload.get("source_artifact_ref") == "mi2:source" + ) + checks["artifact_realization_report"] = ( + handle.report.target_kind == "model_runtime" + and model_runtime_report is not None + and model_runtime_report.framework == str(identity.framework_name) + ) + checks["target_layout_from_runtime_binding"] = ( + handle.report.target_layout_digest == "binding-layout:layout-1" + and target_plan is not None + and target_plan.target_layout_digest == "binding-layout:layout-1" + ) + checks["runtime_only_tensors_allocated"] = ( + True + if tensor_surface is None + else ( + "cache" in attachment.model.tensors + and ("cache", torch.device("cuda:0")) in tensor_surface.allocated + ) + ) + publication_events: list[Mapping[str, object]] = [] + published = tc_module.publish_runtime_replica( + current_attachment=attachment, + policy=SimpleNamespace( + mode="required", + timeout_s=0.0, + drain_timeout_s=0.0, + ), + ensure_runtime_initialized=lambda: None, + profile_sink=publication_events.append, + ) + published_replica = published.view.endpoint.weight_version.published_replica + retired = tc_module.retire_runtime_replica( + current_attachment=published, + reason="conformance", + drain_timeout_s=0.0, + ensure_runtime_initialized=lambda: None, + profile_sink=publication_events.append, + ) + retired_replica = retired.view.endpoint.weight_version.published_replica + published_binding = published.state.binding + checks["runtime_publication_actions"] = ( + published_replica is not None + and published_replica.state == "published" + and published_replica.replica_id == "replica-1" + and retired_replica is not None + and retired_replica.state == "retired" + and getattr(published_binding, "publish_calls", 0) == 1 + and getattr(published_binding, "retire_calls", ()) == [0.0] + and [event["event"] for event in publication_events] + == [ + "runtime_publication.publish.done", + "runtime_publication.retire.done", + ] + ) + checks["describe"] = ( + attachment.view.endpoint.to_weight_version_payload().get( + "serving_artifact_ref" + ) + == "mi2:serving" + ) + + reloaded = tc_module.reload_runtime_attachment( + current_attachment=retired, + artifact_locator=tc_module.ArtifactLocator.artifact_ref("mi2:serving-next"), + policy=tc_module.RuntimePolicy(), + runtime_host=runtime_host, + runtime_context=tc_module.RuntimeRequestContext( + framework_config=SimpleNamespace(), + model_config=SimpleNamespace(model="fake-model"), + ), + ensure_runtime_initialized=lambda: None, + model=attachment.model, + runtime_resolver=resolver, + ) + reload_response = reloaded.view.endpoint.to_reload_response_payload() + checks["reload"] = ( + reload_response is not None + and reload_response.get("serving_artifact_ref") == "mi2:serving-next" + ) + checks["reload_identity_from_runtime_view"] = ( + reload_response is not None + and reloaded.state.runtime_view.serving_artifact_ref + == reload_response.get("serving_artifact_ref") + ) + checks["source_capability_not_required"] = True + checks["source_catalog_not_required"] = True + checks["resolver_uses_artifact_refs"] = ( + "resolve", + "mi2:serving", + ) in resolver.calls and ("resolve", "mi2:serving-next") in resolver.calls + + try: + tc_module.reload_runtime_attachment( + current_attachment=reloaded, + artifact_locator=_LocalPathLocator(), + policy=tc_module.RuntimePolicy(), + runtime_host=runtime_host, + runtime_context=tc_module.RuntimeRequestContext(), + ensure_runtime_initialized=lambda: None, + ) + except _integration.ConfigConflictError: + checks["rejects_local_reload_artifact_locator"] = True + else: + checks["rejects_local_reload_artifact_locator"] = False + + try: + tc_module.reload_runtime_attachment( + current_attachment=reloaded, + artifact_locator={ + "kind": "artifact_ref", + "value": "mi2:serving-next", + }, + policy=tc_module.RuntimePolicy(), + runtime_host=runtime_host, + runtime_context=tc_module.RuntimeRequestContext(), + ensure_runtime_initialized=lambda: None, + ) + except _integration.ConfigConflictError: + checks["rejects_untyped_reload_artifact_locator"] = True + else: + checks["rejects_untyped_reload_artifact_locator"] = False + + try: + tc_module.reload_runtime_attachment( + current_attachment=reloaded, + artifact_locator=tc_module.ArtifactLocator.artifact_ref( + "mi2:serving-next" + ), + policy={"mode": "from_manifest"}, + runtime_host=runtime_host, + runtime_context=tc_module.RuntimeRequestContext(), + ensure_runtime_initialized=lambda: None, + ) + except _integration.ConfigConflictError: + checks["rejects_untyped_reload_policy"] = True + else: + checks["rejects_untyped_reload_policy"] = False + checks["runtime_session_not_required"] = True + + return _result( + level="level1-artifact-runtime", + checks=checks, + messages=_ARTIFACT_LEVEL1_MESSAGES, + ) + + +__all__ = [ + "ConformanceResult", + "FakeArtifact", + "FakeArtifactView", + "FakeBinding", + "FakeFrameworkHost", + "FakePlacementHost", + "FakeRuntimeOnlyTensorSurface", + "FakeRuntimeModel", + "FakeRuntimeArtifactResolver", + "FakeTensorSurface", + "RecordingRuntimeArtifactResolver", + "assert_framework_isolation", + "assert_level1_artifact_runtime_conformance", + "assert_public_artifact_runtime_boundary", + "assert_public_runtime_boundary", + "build_fake_artifact_runtime_host", +] diff --git a/tensorcast/serving/runtime_view.py b/tensorcast/artifact_runtime/view.py similarity index 94% rename from tensorcast/serving/runtime_view.py rename to tensorcast/artifact_runtime/view.py index 5ae3665b..8e83ae69 100644 --- a/tensorcast/serving/runtime_view.py +++ b/tensorcast/artifact_runtime/view.py @@ -56,10 +56,12 @@ def _diagnostic_value( return getattr(diagnostics, name, default) -def _serving_realization_report( +def _runtime_realization_report( diagnostics: Mapping[str, object], ) -> Mapping[str, object] | None: - value = diagnostics.get("serving_realization_report") + value = diagnostics.get("runtime_realization_report") + if not isinstance(value, Mapping): + value = diagnostics.get("serving_realization_report") if isinstance(value, Mapping): return value return None @@ -246,10 +248,39 @@ def to_dict(self) -> dict[str, object]: return payload +def _source_selection_projection_from_value( + value: object | None, +) -> SourceSelectionProjection | None: + if value is None: + return None + if isinstance(value, SourceSelectionProjection): + return value + if not isinstance(value, Mapping): + return None + selected_source_kind = _optional_text(value.get("selected_source_kind")) + if selected_source_kind is None: + return None + return SourceSelectionProjection( + selected_source_kind=selected_source_kind, + selected_replica_id=_optional_text(value.get("selected_replica_id")), + selected_producer_worker_id=_optional_text( + value.get("selected_producer_worker_id") + ), + selected_byte_space_kind=_optional_text(value.get("selected_byte_space_kind")), + selected_byte_space_id=_optional_text(value.get("selected_byte_space_id")), + p2p_bytes=_optional_int(value.get("p2p_bytes")) or 0, + fallback_bytes=_optional_int(value.get("fallback_bytes")) or 0, + disk_bytes=_optional_int(value.get("disk_bytes")) or 0, + reselection_attempts=_optional_int(value.get("reselection_attempts")) or 0, + reject_reason_bucket=_optional_text(value.get("reject_reason_bucket")), + fallback_reason_bucket=_optional_text(value.get("fallback_reason_bucket")), + ) + + def _source_bound_projection_from_diagnostics( diagnostics: Mapping[str, object], ) -> SourceBoundContractProjection | None: - report = _serving_realization_report(diagnostics) + report = _runtime_realization_report(diagnostics) source_contract = _nested_mapping(report, "source_bound_contract") if source_contract is not None: return SourceBoundContractProjection(dict(source_contract)) @@ -275,7 +306,7 @@ def _materialization_projection_from_fields( ) -> MaterializationDiagnosticsProjection | None: fields: dict[str, object] = {} if prefix == "realize": - report = _serving_realization_report(diagnostics) + report = _runtime_realization_report(diagnostics) realization = _nested_mapping(report, "realization") execution = _nested_mapping(realization, "execution") plan = _nested_mapping(realization, "plan") @@ -553,6 +584,11 @@ def source_selection_projection_from_runtime_diagnostics( if diagnostics is None: return None + explicit = _source_selection_projection_from_value( + diagnostics.get("source_selection") + ) + if explicit is not None: + return explicit materialization = source_selection_projection_from_materialization_diagnostics( diagnostics.get("materialization") ) @@ -563,7 +599,7 @@ def source_selection_projection_from_runtime_diagnostics( ) if execution is not None: return execution - report = _serving_realization_report(diagnostics) + report = _runtime_realization_report(diagnostics) realization = _nested_mapping(report, "realization") report_execution = _nested_mapping(realization, "execution") serving_projection = source_selection_projection_from_execution_diagnostics( @@ -699,7 +735,7 @@ def from_runtime_view( include_reload_response: bool = False, ) -> "RuntimeWorkerView": diagnostics = dict(getattr(view, "diagnostics", None) or {}) - report = _serving_realization_report(diagnostics) + report = _runtime_realization_report(diagnostics) binding_value_ref = BindingValueRefProjection.from_value( getattr(view, "binding_value_ref", None) ) diff --git a/tensorcast/daemon_ctl.py b/tensorcast/daemon_ctl.py index ab8e4352..c053d64f 100644 --- a/tensorcast/daemon_ctl.py +++ b/tensorcast/daemon_ctl.py @@ -75,17 +75,17 @@ LocalStableTierResult, Plan, PrefetchRetentionPolicy, + RealizationTarget, + RealizationTargetSet, RegionMemoryKind, RegisterStorage, RegisterTensorAlias, RepresentationPublishSpec, + RuntimeArtifactPolicy, + RuntimeBindingMemberRef, + RuntimeBindingReadiness, SealAssemblyResult, ServerConfig, - ServingBindingMemberRef, - ServingBindingReadiness, - ServingBindingSetTarget, - ServingBindingTarget, - ServingRuntimePolicy, StableDramHandshake, VramRegionHandle, ) @@ -943,7 +943,7 @@ def materialize_into_target( target_layout: store_daemon_pb2.TargetLayout, device_uuid: str, source_policy: store_daemon_pb2.SourcePolicy | None = None, - serving_runtime_policy: "ServingRuntimePolicy | None" = None, + runtime_artifact_policy: "RuntimeArtifactPolicy | None" = None, placement: store_daemon_pb2.TransformPlacement | None = None, pid: int | None = None, operation_id: str | None = None, @@ -967,9 +967,9 @@ def materialize_into_target( pid=pid_value, ) request.source_policy.CopyFrom(resolved_source_policy) - if serving_runtime_policy is not None: + if runtime_artifact_policy is not None: request.serving_artifact_policy.CopyFrom( - serving_runtime_policy.to_proto() + runtime_artifact_policy.to_proto() ) if placement is not None: request.placement = placement @@ -1017,7 +1017,7 @@ def materialize_into_mapped_target( copy_plan, dst_tensors: Mapping[str, torch.Tensor], source_policy: store_daemon_pb2.SourcePolicy | None = None, - serving_runtime_policy: "ServingRuntimePolicy | None" = None, + runtime_artifact_policy: "RuntimeArtifactPolicy | None" = None, placement: store_daemon_pb2.TransformPlacement | None = None, pid: int | None = None, operation_id: str | None = None, @@ -1072,9 +1072,9 @@ def materialize_into_mapped_target( spec.stride.extend(int(v) for v in tensor.stride()) request.dst_tensors.append(spec) request.source_policy.CopyFrom(resolved_source_policy) - if serving_runtime_policy is not None: + if runtime_artifact_policy is not None: request.serving_artifact_policy.CopyFrom( - serving_runtime_policy.to_proto() + runtime_artifact_policy.to_proto() ) if placement is not None: request.placement = placement @@ -1128,7 +1128,7 @@ def create_owned_binding( device_uuid: str, binding_layout_id: str, source_policy: store_daemon_pb2.SourcePolicy | None = None, - serving_runtime_policy: "ServingRuntimePolicy | None" = None, + runtime_artifact_policy: "RuntimeArtifactPolicy | None" = None, placement: store_daemon_pb2.TransformPlacement | None = None, copy_plan: store_daemon_pb2.CopyPlan | None = None, dst_specs: Iterable[store_daemon_pb2.MappedTensorSpec] | None = None, @@ -1159,9 +1159,9 @@ def create_owned_binding( pid=pid_value, ) request.source_policy.CopyFrom(resolved_source_policy) - if serving_runtime_policy is not None: + if runtime_artifact_policy is not None: request.serving_artifact_policy.CopyFrom( - serving_runtime_policy.to_proto() + runtime_artifact_policy.to_proto() ) if placement is not None: request.placement = placement @@ -1203,8 +1203,8 @@ def prefetch_serving_binding( self, *, source_selection: common_pb2.ArtifactSelection, - target: ServingBindingTarget | ServingBindingSetTarget, - requested_readiness: ServingBindingReadiness, + target: RealizationTarget | RealizationTargetSet, + requested_readiness: RuntimeBindingReadiness, retention_policy: PrefetchRetentionPolicy | None = None, operation_id: str | None = None, group_realization: Any | None = None, @@ -1220,15 +1220,15 @@ def prefetch_serving_binding( requested_readiness=_SERVING_READINESS_TO_PROTO[requested_readiness], ) request.source_selection.CopyFrom(source_selection) - if isinstance(target, ServingBindingTarget): + if isinstance(target, RealizationTarget): request.source.CopyFrom(target.source.to_proto()) request.serving_binding_target.CopyFrom(target.to_proto()) - elif isinstance(target, ServingBindingSetTarget): + elif isinstance(target, RealizationTargetSet): request.source.CopyFrom(target.source.to_proto()) request.serving_binding_set_target.CopyFrom(target.to_proto()) else: raise ValueError( - "target must be a ServingBindingTarget or ServingBindingSetTarget" + "target must be a RealizationTarget or RealizationTargetSet" ) if retention_policy is not None: request.retention_policy.CopyFrom(retention_policy.to_proto()) @@ -1322,7 +1322,7 @@ def acquire_binding_value( expected_serving_build_digest: str, expected_daemon_id: str | None = None, expected_daemon_session_id: str | None = None, - expected_member: ServingBindingMemberRef | None = None, + expected_member: RuntimeBindingMemberRef | None = None, local_serving_ref: str | None = None, group_realization_acquire: GroupRealizationAcquireRef | None = None, caller_pid: int | None = None, @@ -1391,7 +1391,7 @@ def acquire_binding_value_by_local_ref( expected_device_uuid: str, expected_tensor_schema_hash: str, expected_serving_build_digest: str, - expected_member: ServingBindingMemberRef, + expected_member: RuntimeBindingMemberRef, expected_target_layout_hash: str | None = None, expected_daemon_id: str | None = None, expected_daemon_session_id: str | None = None, @@ -1816,7 +1816,7 @@ def refill_owned_binding( source_policy: store_daemon_pb2.SourcePolicy | None = None, execution_topology: store_daemon_pb2.SourceExecutionTopology | None = None, collective_policy: store_daemon_pb2.CollectivePolicy | None = None, - serving_runtime_policy: "ServingRuntimePolicy | None" = None, + runtime_artifact_policy: "RuntimeArtifactPolicy | None" = None, placement: store_daemon_pb2.TransformPlacement | None = None, operation_id: str | None = None, timeout_s: float = 600.0, @@ -1842,9 +1842,9 @@ def refill_owned_binding( request.execution_topology.CopyFrom(execution_topology) if collective_policy is not None: request.collective_policy = collective_policy - if serving_runtime_policy is not None: + if runtime_artifact_policy is not None: request.serving_artifact_policy.CopyFrom( - serving_runtime_policy.to_proto() + runtime_artifact_policy.to_proto() ) if placement is not None: request.placement = placement @@ -2407,7 +2407,7 @@ def materialize_by_artifact_id( placement: store_daemon_pb2.TransformPlacement | None = None, return_response: Literal[True], source_policy: store_daemon_pb2.SourcePolicy | None = None, - serving_runtime_policy: "ServingRuntimePolicy | None" = None, + runtime_artifact_policy: "RuntimeArtifactPolicy | None" = None, export_policy: store_daemon_pb2.ExportPolicy | None = None, need_view_data_hash: bool = True, target_device_type: store_daemon_pb2.DeviceType = store_daemon_pb2.DeviceType.DEVICE_TYPE_GPU, @@ -2431,7 +2431,7 @@ def materialize_by_artifact_id( placement: store_daemon_pb2.TransformPlacement | None = None, return_response: Literal[False] = False, source_policy: store_daemon_pb2.SourcePolicy | None = None, - serving_runtime_policy: "ServingRuntimePolicy | None" = None, + runtime_artifact_policy: "RuntimeArtifactPolicy | None" = None, export_policy: store_daemon_pb2.ExportPolicy | None = None, need_view_data_hash: bool = True, target_device_type: store_daemon_pb2.DeviceType = store_daemon_pb2.DeviceType.DEVICE_TYPE_GPU, @@ -2476,7 +2476,7 @@ def materialize_by_artifact_id( placement: store_daemon_pb2.TransformPlacement | None = None, return_response: bool = False, source_policy: store_daemon_pb2.SourcePolicy | None = None, - serving_runtime_policy: "ServingRuntimePolicy | None" = None, + runtime_artifact_policy: "RuntimeArtifactPolicy | None" = None, export_policy: store_daemon_pb2.ExportPolicy | None = None, need_view_data_hash: bool = True, target_device_type: store_daemon_pb2.DeviceType = store_daemon_pb2.DeviceType.DEVICE_TYPE_GPU, @@ -2527,9 +2527,9 @@ def materialize_by_artifact_id( if wait_for_shared_disk_ms: request.wait_for_shared_disk_ms = int(wait_for_shared_disk_ms) request.source_policy.CopyFrom(resolved_source_policy) - if serving_runtime_policy is not None: + if runtime_artifact_policy is not None: request.serving_artifact_policy.CopyFrom( - serving_runtime_policy.to_proto() + runtime_artifact_policy.to_proto() ) if export_policy is not None: request.export_policy = export_policy diff --git a/tensorcast/engine_adapter/adapter.py b/tensorcast/engine_adapter/adapter.py index 75d657c0..14ce9db6 100644 --- a/tensorcast/engine_adapter/adapter.py +++ b/tensorcast/engine_adapter/adapter.py @@ -20,10 +20,9 @@ from tensorcast.api.plan.transforms import TransformSpec from tensorcast.api.store import Artifact, Store from tensorcast.api.store.handles import RegisteredArtifact -from tensorcast.api.store.serving_builder import ( - RepresentationPublishSpec, +from tensorcast.api.store.publication_builder import ( build_pure_transform_publication_bundle_from_registered_artifact, - prepare_pure_transform_serving_registration, + prepare_pure_transform_runtime_registration, ) from tensorcast.engine_adapter.artifact_api import ( BatchResult, @@ -33,7 +32,7 @@ PublishResult, SealedByteArtifact, ) -from tensorcast.types import ServingBuildIntent +from tensorcast.types import RepresentationPublishSpec, RuntimeArtifactBuildIntent def _encode_token(token: bytes) -> str: @@ -65,7 +64,7 @@ def _pure_transform_build_intent( ctx: "TransformContext", *, source_artifact: Artifact, -) -> ServingBuildIntent | None: +) -> RuntimeArtifactBuildIntent | None: publication_spec = ctx.spec.publication_spec if publication_spec is None: return None @@ -79,7 +78,7 @@ def _maybe_build_pure_transform_publication_bundle( registered_artifact: object, *, source_artifact: Artifact, - build_intent: ServingBuildIntent | None, + build_intent: RuntimeArtifactBuildIntent | None, ) -> RepresentationPublishSpec | None: if build_intent is None: return None @@ -620,7 +619,7 @@ def _register(ctx: TransformContext) -> object | None: status_code="INVALID_ARGUMENT", retryable=False, ) - prepared = prepare_pure_transform_serving_registration( + prepared = prepare_pure_transform_runtime_registration( build_intent=build_intent, source_artifact=selected_source, tensors=registration_tensors, diff --git a/tensorcast/node_agent/executor.py b/tensorcast/node_agent/executor.py index 73cc44c0..26fac6a8 100644 --- a/tensorcast/node_agent/executor.py +++ b/tensorcast/node_agent/executor.py @@ -27,7 +27,6 @@ from tensorcast.api.plan.transforms import TransformSpec from tensorcast.api.store import Artifact, Store from tensorcast.api.store.runtime import StoreRuntimeContext -from tensorcast.api.store.serving_builder import RepresentationPublishSpec from tensorcast.daemon_ctl import DaemonCtl, get_daemon_client from tensorcast.engine_adapter import ( BatchResult, @@ -44,8 +43,9 @@ from tensorcast.types import ( _SERVING_READINESS_FROM_PROTO, PrefetchRetentionPolicy, - ServingBindingSetTarget, - ServingBindingTarget, + RealizationTarget, + RealizationTargetSet, + RepresentationPublishSpec, ) ArtifactActionResult = ( @@ -1040,7 +1040,7 @@ def _prefetch( artifact, _ = self._artifact_from_selection(selection) readiness = _SERVING_READINESS_FROM_PROTO.get( int(action.requested_readiness), - "serving_local_ready", + "runtime_local_ready", ) retention = ( PrefetchRetentionPolicy.from_proto(action.retention_policy) @@ -1048,11 +1048,9 @@ def _prefetch( else None ) if serving_target_kind == "serving_binding_target": - target = ServingBindingTarget.from_proto( - action.serving_binding_target - ) + target = RealizationTarget.from_proto(action.serving_binding_target) else: - target = ServingBindingSetTarget.from_proto( + target = RealizationTargetSet.from_proto( action.serving_binding_set_target ) op = artifact.prefetch( diff --git a/tensorcast/node_agent/server.py b/tensorcast/node_agent/server.py index b57f0652..6a01eaaa 100644 --- a/tensorcast/node_agent/server.py +++ b/tensorcast/node_agent/server.py @@ -10,7 +10,6 @@ ArtifactSetResult, selection_identity_to_proto, ) -from tensorcast.api.store.serving_builder import RepresentationPublishSpec from tensorcast.engine_adapter import ( BatchOutcome, BatchResult, @@ -21,6 +20,7 @@ from tensorcast.node_agent.executor import NodeAgentExecutor from tensorcast.proto.node_agent.v1 import node_agent_pb2, node_agent_pb2_grpc from tensorcast.proto.plan.v1 import plan_pb2 +from tensorcast.types import RepresentationPublishSpec _STATE_MAP = { "pending": node_agent_pb2.OPERATION_STATE_PENDING, diff --git a/tensorcast/pytorch/module_binding.py b/tensorcast/pytorch/module_binding.py index cb75bd8c..c7b6cef7 100644 --- a/tensorcast/pytorch/module_binding.py +++ b/tensorcast/pytorch/module_binding.py @@ -10,7 +10,7 @@ import torch from torch import nn -import tensorcast.serving.contract as tc_contract +import tensorcast.artifact_runtime.contract as tc_contract _RESERVED_TENSORCAST_PREFIX = "__tensorcast_meta__." @@ -185,7 +185,7 @@ def align_runtime_binding_exclude_names( if len(missing) > 8: sample = f"{sample}, ..." raise RuntimeError( - "TensorCast serving artifact tensor names are missing from " + "TensorCast runtime artifact tensor names are missing from " f"the model: missing_count={len(missing)} [{sample}]" ) extra_excluded = sorted(all_names - canonical) @@ -211,7 +211,7 @@ def assert_runtime_tensors_match_expected_names( if len(unexpected) > 8: unexpected_sample = f"{unexpected_sample}, ..." raise RuntimeError( - "TensorCast serving artifact tensor set mismatch: " + "TensorCast runtime artifact tensor set mismatch: " f"missing_count={len(missing)} [{missing_sample}], " f"unexpected_count={len(unexpected)} [{unexpected_sample}]" ) diff --git a/tensorcast/pytorch/trace_capture.py b/tensorcast/pytorch/trace_capture.py index c6791493..40654549 100644 --- a/tensorcast/pytorch/trace_capture.py +++ b/tensorcast/pytorch/trace_capture.py @@ -14,7 +14,7 @@ from torch import nn from torch.utils._python_dispatch import TorchDispatchMode -from tensorcast.serving.builder.trace_ir import ( +from tensorcast.artifact_runtime.recipe.trace_ir import ( CopyPlanEntry, MultiRange, Range, diff --git a/tensorcast/retained_realization.py b/tensorcast/retained_realization.py new file mode 100644 index 00000000..71c8099b --- /dev/null +++ b/tensorcast/retained_realization.py @@ -0,0 +1,552 @@ +# Copyright (c) 2026, TensorCast Team. +"""Neutral retained realization claim helpers. + +Retained realization claims are serialized handoffs produced by artifact +prefetch. They expose the trusted reservation credit needed before framework +admission while keeping the existing retained binding authority validation as +the source of truth during the migration away from serving-rooted public names. +""" + +from __future__ import annotations + +import json +from collections.abc import Mapping +from dataclasses import dataclass +from typing import Any + +from tensorcast.api.errors import ArtifactError +from tensorcast.api.store.realization_kernel import ( + ArtifactRealizationHandle, + ArtifactRealizationSpec, +) +from tensorcast.retained_realization_authority import ( + ParsedRetainedRealizationAuthority, +) +from tensorcast.retained_realization_authority import ( + RetainedRealizationAuthority as RetainedRealizationAuthorityConfig, +) +from tensorcast.types import ( + BindingReservationCapability, + BindingValueRef, + GroupRealizationAcquireRef, + PrefetchHandoff, + RealizationTarget, + RuntimeBindingMemberRef, +) + + +@dataclass(frozen=True) +class RetainedRealizationExpectedDigests: + """Expected identity digests embedded in a retained realization claim.""" + + target_layout_hash: str + tensor_schema_hash: str + runtime_build_digest: str + resolved_spec_digest: str + + +@dataclass(frozen=True) +class RetainedRealizationClaim: + """Validated retained realization handoff for admission and acquire.""" + + _authority: ParsedRetainedRealizationAuthority + + @property + def group_id(self) -> str: + return self._authority.group_id + + @property + def local_ref(self) -> str | None: + return self._authority.local_serving_ref + + @property + def binding_value_ref(self) -> BindingValueRef: + return self._authority.binding_value_ref + + @property + def reservation_capability(self) -> BindingReservationCapability: + return self._authority.reservation_capability + + @property + def daemon_id(self) -> str: + return self._authority.daemon_id + + @property + def daemon_session_id(self) -> str: + return self._authority.daemon_session_id + + @property + def device_uuid(self) -> str: + return self._authority.device_uuid + + @property + def member(self) -> RuntimeBindingMemberRef: + return self._authority.member + + @property + def reservation_bytes(self) -> int: + return self._authority.reservation_bytes + + @property + def expected(self) -> RetainedRealizationExpectedDigests: + expected = self._authority.expected + return RetainedRealizationExpectedDigests( + target_layout_hash=expected.target_layout_hash, + tensor_schema_hash=expected.tensor_schema_hash, + runtime_build_digest=expected.runtime_build_digest, + resolved_spec_digest=expected.resolved_spec_digest, + ) + + @property + def readiness(self) -> str: + return self._authority.readiness + + @property + def verification_state(self) -> str: + return self._authority.verification_state + + @property + def serving_artifact_id(self) -> str | None: + return self._authority.serving_artifact_id + + @property + def group_realization_acquire(self) -> GroupRealizationAcquireRef | None: + return self._authority.group_realization_acquire + + @property + def authority(self) -> ParsedRetainedRealizationAuthority: + return self._authority + + def as_authority(self) -> ParsedRetainedRealizationAuthority: + return self._authority + + @staticmethod + def _request_facts( + spec: ArtifactRealizationSpec, + runtime_context: Any | None, + ) -> tuple[ArtifactRealizationSpec, Any]: + from tensorcast.artifact_runtime.request_facts import ( + ModelRuntimeRequestFactsError, + resolve_model_runtime_request_facts, + ) + + try: + facts = resolve_model_runtime_request_facts( + spec=spec, + runtime_context=runtime_context, + ) + except ModelRuntimeRequestFactsError as exc: + raise ArtifactError( + str(exc), + status_code="INVALID_ARGUMENT", + retryable=False, + ) from exc + return facts.spec, facts.context + + def realize_model_runtime( + self, + spec: ArtifactRealizationSpec, + *, + runtime_host: Any, + runtime_context: Any | None = None, + profile_sink: Any | None = None, + ) -> ArtifactRealizationHandle: + """Realize this retained claim as a model runtime attachment.""" + + if runtime_host is None: + raise ArtifactError( + "retained model_runtime realization requires runtime_host", + status_code="INVALID_ARGUMENT", + retryable=False, + ) + if spec.target_kind != "model_runtime": + raise ArtifactError( + "retained realization claim requires a model_runtime spec", + status_code="INVALID_ARGUMENT", + retryable=False, + ) + + from tensorcast.artifact_runtime.lifecycle import ArtifactRuntimeIntegration + + resolved_spec, context = self._request_facts(spec, runtime_context) + attachment = ArtifactRuntimeIntegration( + profile_sink=profile_sink, + host=runtime_host, + ).realize_retained_model_runtime( + authority=self._authority, + spec=resolved_spec, + context=context, + ) + handle = getattr(attachment.state, "model_runtime_handle", None) + if not isinstance(handle, ArtifactRealizationHandle): + raise ArtifactError( + "retained model_runtime realization completed without a " + "realization handle", + status_code="INTERNAL", + retryable=False, + ) + return handle + + +def parse_retained_realization_claim( + extra: Mapping[str, Any] | Any, + *, + expected_member: RuntimeBindingMemberRef | None = None, +) -> RetainedRealizationClaim: + """Parse and validate a retained realization claim from loader config.""" + + return RetainedRealizationClaim( + parse_retained_realization_authority( + extra, + expected_member=expected_member, + ) + ) + + +def parse_retained_realization_authority( + extra: Mapping[str, Any] | Any, + *, + expected_member: RuntimeBindingMemberRef | None = None, +) -> ParsedRetainedRealizationAuthority: + """Parse and validate retained realization authority from runtime config.""" + + from tensorcast.artifact_runtime.config import TensorCastRuntimeConfig + + config = ( + extra + if isinstance(extra, TensorCastRuntimeConfig) + else TensorCastRuntimeConfig.from_mapping(extra) + ) + if config.retained_binding_acquire.mode != "external": + raise ValueError( + "TensorCast retained realization authority requires " + "retained_binding_acquire.mode='external' and " + "retained_binding_acquire.authority" + ) + authority_config = _select_retained_realization_authority_config( + config, + expected_member=expected_member, + ) + + binding_value_ref = _model_validate( + BindingValueRef, + authority_config.binding_value_ref, + field_name="retained_binding_acquire.authority.binding_value_ref", + ) + member = _model_validate( + RuntimeBindingMemberRef, + authority_config.member_ref, + field_name="retained_binding_acquire.authority.member_ref", + ) + capability_payload = _payload_to_dict( + authority_config.reservation_capability, + field_name="retained_binding_acquire.authority.reservation_capability", + ) + capability_payload.setdefault( + "binding_value_ref", binding_value_ref.model_dump(mode="python") + ) + capability_payload.setdefault("member", member.model_dump(mode="python")) + reservation_capability = _model_validate( + BindingReservationCapability, + capability_payload, + field_name="retained_binding_acquire.authority.reservation_capability", + ) + group_realization_acquire = None + if authority_config.group_realization_acquire is not None: + group_realization_acquire = _model_validate( + GroupRealizationAcquireRef, + authority_config.group_realization_acquire, + field_name="retained_binding_acquire.authority.group_realization_acquire", + ) + + authority = ParsedRetainedRealizationAuthority( + group_id=authority_config.group_id, + local_serving_ref=authority_config.local_serving_ref, + binding_value_ref=binding_value_ref, + reservation_capability=reservation_capability, + daemon_id=authority_config.daemon_id, + daemon_session_id=authority_config.daemon_session_id, + device_uuid=authority_config.device_uuid, + member=member, + reservation_bytes=int(authority_config.trusted_reservation_bytes), + expected=authority_config.expected, + readiness=authority_config.readiness, + verification_state=authority_config.verification_state or "local_only", + serving_artifact_id=authority_config.serving_artifact_id, + group_realization_acquire=group_realization_acquire, + ) + _validate_retained_realization_authority_consistency(authority) + if expected_member is not None and authority.member != expected_member: + raise ValueError( + "TensorCast retained realization authority member does not match " + f"expected member: authority={authority.member!r}, " + f"expected={expected_member!r}" + ) + return authority + + +def retained_realization_claim_mode(extra: Mapping[str, Any] | None) -> str: + """Return the retained claim acquire mode encoded in extra config.""" + + if extra is None or not isinstance(extra, Mapping): + return "disabled" + from tensorcast.artifact_runtime.config import TensorCastRuntimeConfig + + return TensorCastRuntimeConfig.from_mapping(extra).retained_binding_acquire.mode + + +def retained_realization_trusted_reservation_bytes( + load_config_or_extra: Any, + *, + expected_member: RuntimeBindingMemberRef | None = None, +) -> int: + """Return trusted retained reservation bytes after full claim validation.""" + + extra = getattr( + load_config_or_extra, + "model_loader_extra_config", + load_config_or_extra, + ) + if extra is None or not isinstance(extra, Mapping): + return 0 + if retained_realization_claim_mode(extra) != "external": + return 0 + return parse_retained_realization_claim( + extra, + expected_member=expected_member, + ).reservation_bytes + + +def retained_realization_claim_extra_from_handoff( + *, + handoff: PrefetchHandoff, + target: RealizationTarget, + expected_member: RuntimeBindingMemberRef | None = None, +) -> dict[str, Any]: + """Build serialized retained claim config from a prefetch handoff.""" + + return _retained_realization_claim_extra( + authority=_retained_realization_authority_from_handoff( + handoff=handoff, + target=target, + expected_member=expected_member, + ), + config_key="retained_binding_acquire", + ) + + +def retained_realization_claim_extra_json_from_handoff( + *, + handoff: PrefetchHandoff, + target: RealizationTarget, + expected_member: RuntimeBindingMemberRef | None = None, +) -> str: + """Serialize retained claim config using stable JSON ordering.""" + + return json.dumps( + retained_realization_claim_extra_from_handoff( + handoff=handoff, + target=target, + expected_member=expected_member, + ), + sort_keys=True, + separators=(",", ":"), + ) + + +def _payload_to_dict(value: Any, *, field_name: str) -> dict[str, Any]: + if hasattr(value, "model_dump"): + return dict(value.model_dump(mode="python")) + if isinstance(value, Mapping): + return dict(value) + if isinstance(value, str): + try: + parsed = json.loads(value) + except json.JSONDecodeError as exc: + raise ValueError(f"{field_name} must be a JSON object") from exc + if not isinstance(parsed, Mapping): + raise ValueError(f"{field_name} must be a JSON object") + return dict(parsed) + raise ValueError(f"{field_name} must be a dict or JSON object") + + +def _model_validate(model_type: Any, value: Any, *, field_name: str) -> Any: + payload = _payload_to_dict(value, field_name=field_name) + try: + return model_type.model_validate(payload) + except Exception as exc: + raise ValueError( + f"{field_name} is invalid for TensorCast retained realization " + f"acquire: {exc}" + ) from exc + + +def _select_retained_realization_authority_config( + config: Any, + *, + expected_member: RuntimeBindingMemberRef | None = None, +) -> RetainedRealizationAuthorityConfig: + acquire_config = config.retained_binding_acquire + authority_config = acquire_config.authority + if authority_config is not None: + return authority_config + + authority_configs = tuple(acquire_config.authorities) + if not authority_configs: + raise ValueError( + "TensorCast retained realization authority requires " + "retained_binding_acquire.mode='external' and " + "retained_binding_acquire.authority or " + "retained_binding_acquire.authorities" + ) + if expected_member is None: + if len(authority_configs) == 1: + return authority_configs[0] + raise ValueError( + "TensorCast retained realization authority set requires an expected " + "serving member to select the worker authority" + ) + + for index, candidate in enumerate(authority_configs): + member = _model_validate( + RuntimeBindingMemberRef, + candidate.member_ref, + field_name=(f"retained_binding_acquire.authorities[{index}].member_ref"), + ) + if member == expected_member: + return candidate + raise ValueError( + "TensorCast retained realization authority set has no authority for " + f"expected member {expected_member!r}" + ) + + +def _validate_retained_realization_authority_consistency( + authority: ParsedRetainedRealizationAuthority, +) -> None: + capability = authority.reservation_capability + if capability.binding_value_ref != authority.binding_value_ref: + raise ValueError( + "retained_binding_acquire.authority.reservation_capability." + "binding_value_ref must match retained_binding_acquire.authority." + "binding_value_ref" + ) + if capability.daemon_id != authority.daemon_id: + raise ValueError( + "retained_binding_acquire.authority.reservation_capability." + "daemon_id mismatch" + ) + if capability.daemon_session_id != authority.daemon_session_id: + raise ValueError( + "retained_binding_acquire.authority.reservation_capability." + "daemon_session_id mismatch" + ) + if capability.device_uuid != authority.device_uuid: + raise ValueError( + "retained_binding_acquire.authority.reservation_capability." + "device_uuid mismatch" + ) + if capability.member != authority.member: + raise ValueError( + "retained_binding_acquire.authority.reservation_capability.member mismatch" + ) + if capability.reservation_bytes != authority.reservation_bytes: + raise ValueError( + "retained_binding_acquire.authority.reservation_capability." + "reservation_bytes must match retained_binding_acquire.authority." + "trusted_reservation_bytes" + ) + if authority.member.group_id is not None and authority.member.group_id != ( + authority.group_id + ): + raise ValueError( + "retained_binding_acquire.authority.member_ref.group_id must match " + "retained_binding_acquire.authority.group_id" + ) + if ( + authority.readiness == "runtime_published_ready" + and not authority.serving_artifact_id + ): + raise ValueError( + "retained_binding_acquire.authority.serving_artifact_id is required " + "when retained_binding_acquire.authority.readiness=" + "'runtime_published_ready'" + ) + + +def _retained_realization_authority_from_handoff( + *, + handoff: PrefetchHandoff, + target: RealizationTarget, + expected_member: RuntimeBindingMemberRef | None = None, +) -> dict[str, Any]: + member = handoff.member + if expected_member is not None and member != expected_member: + raise ValueError( + "Prefetched retained realization member does not match expected " + f"placement: prefetched={member}, expected={expected_member}" + ) + authority: dict[str, Any] = { + "group_id": member.group_id or "", + "member_ref": _model_dump(member), + "daemon_id": handoff.daemon_id, + "daemon_session_id": handoff.daemon_session_id, + "device_uuid": handoff.device_uuid, + "binding_value_ref": _model_dump(handoff.binding_value_ref), + "reservation_capability": _model_dump(handoff.reservation_capability), + "local_serving_ref": handoff.local_serving_ref, + "readiness": str(getattr(handoff.readiness, "value", handoff.readiness)), + "verification_state": str( + getattr( + handoff.verification_state, + "value", + handoff.verification_state, + ) + ), + "serving_artifact_id": handoff.serving_artifact_id, + "trusted_reservation_bytes": handoff.reservation_bytes, + "expected": { + "target_layout_hash": target.resolved_layout.target_layout_hash, + "tensor_schema_hash": target.resolved_layout.tensor_schema_hash, + "runtime_build_digest": target.runtime_build_digest, + "resolved_spec_digest": target.resolved_layout.spec_digest, + }, + } + if handoff.group_realization_acquire is not None: + authority["group_realization_acquire"] = _model_dump( + handoff.group_realization_acquire + ) + return authority + + +def _retained_realization_claim_extra( + *, + authority: dict[str, Any], + config_key: str, +) -> dict[str, Any]: + return { + config_key: { + "mode": "external", + "authority": authority, + }, + } + + +def _model_dump(value: Any) -> dict[str, Any]: + if hasattr(value, "model_dump"): + return dict(value.model_dump(mode="python")) + if isinstance(value, Mapping): + return dict(value) + raise TypeError(f"Cannot serialize {type(value)!r}") + + +__all__ = [ + "RetainedRealizationClaim", + "RetainedRealizationExpectedDigests", + "parse_retained_realization_authority", + "parse_retained_realization_claim", + "retained_realization_claim_mode", + "retained_realization_trusted_reservation_bytes", + "retained_realization_claim_extra_from_handoff", + "retained_realization_claim_extra_json_from_handoff", +] diff --git a/tensorcast/retained_realization_authority.py b/tensorcast/retained_realization_authority.py new file mode 100644 index 00000000..66dd93ba --- /dev/null +++ b/tensorcast/retained_realization_authority.py @@ -0,0 +1,148 @@ +# Copyright (c) 2026, TensorCast Team. + +"""Typed retained realization authority models.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator + +from tensorcast.types import ( + BindingReservationCapability, + BindingValueRef, + GroupRealizationAcquireRef, + RuntimeBindingMemberRef, +) + +_READINESS_STATES = { + "runtime_reserved", + "runtime_local_ready", + "runtime_published_ready", +} + + +def _normalize_optional_text(value: Any) -> str | None: + if value is None: + return None + normalized = str(value).strip() + return normalized or None + + +def _normalize_enum(value: Any, *, allowed: set[str], field_name: str) -> str: + normalized = str(value).strip().lower() + if normalized not in allowed: + raise ValueError( + f"{field_name} must be one of {sorted(allowed)}, got: {value!r}" + ) + return normalized + + +class RetainedRealizationExpectedDigests(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + + target_layout_hash: str + tensor_schema_hash: str + runtime_build_digest: str + resolved_spec_digest: str + + @field_validator( + "target_layout_hash", + "tensor_schema_hash", + "runtime_build_digest", + "resolved_spec_digest", + mode="before", + ) + @classmethod + def _normalize_required_text(cls, value: Any) -> str: + normalized = _normalize_optional_text(value) + if normalized is None: + raise ValueError("expected digest fields must be non-empty") + return normalized + + +class RetainedRealizationAuthority(BaseModel): + model_config = ConfigDict(frozen=True, extra="forbid") + + group_id: str + member_ref: dict[str, Any] + daemon_id: str + daemon_session_id: str + device_uuid: str + binding_value_ref: dict[str, Any] + reservation_capability: dict[str, Any] + group_realization_acquire: dict[str, Any] | None = None + local_serving_ref: str | None = None + readiness: str + verification_state: str = "local_only" + serving_artifact_id: str | None = None + trusted_reservation_bytes: int = Field(ge=0) + expected: RetainedRealizationExpectedDigests + + @field_validator( + "group_id", + "daemon_id", + "daemon_session_id", + "device_uuid", + mode="before", + ) + @classmethod + def _normalize_required_text(cls, value: Any) -> str: + normalized = _normalize_optional_text(value) + if normalized is None: + raise ValueError("retained binding authority text fields required") + return normalized + + @field_validator( + "local_serving_ref", + "verification_state", + "serving_artifact_id", + mode="before", + ) + @classmethod + def _normalize_optional_fields(cls, value: Any) -> Any: + return _normalize_optional_text(value) + + @field_validator("readiness", mode="before") + @classmethod + def _normalize_readiness(cls, value: Any) -> str: + return _normalize_enum( + value, + allowed=_READINESS_STATES, + field_name="retained_binding_acquire.authority.readiness", + ) + + @model_validator(mode="after") + def _validate_published_ready(self) -> RetainedRealizationAuthority: + if self.readiness == "runtime_published_ready" and not self.serving_artifact_id: + raise ValueError( + "retained_binding_acquire.authority.serving_artifact_id is required when " + "readiness='runtime_published_ready'" + ) + return self + + +@dataclass(frozen=True) +class ParsedRetainedRealizationAuthority: + group_id: str + local_serving_ref: str | None + binding_value_ref: BindingValueRef + reservation_capability: BindingReservationCapability + daemon_id: str + daemon_session_id: str + device_uuid: str + member: RuntimeBindingMemberRef + reservation_bytes: int + expected: RetainedRealizationExpectedDigests + readiness: str + verification_state: str + serving_artifact_id: str | None = None + group_realization_acquire: GroupRealizationAcquireRef | None = None + + +__all__ = [ + "ParsedRetainedRealizationAuthority", + "RetainedRealizationAuthority", + "RetainedRealizationExpectedDigests", +] diff --git a/tensorcast/serving/__init__.py b/tensorcast/serving/__init__.py deleted file mode 100644 index 9b93f041..00000000 --- a/tensorcast/serving/__init__.py +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright (c) 2026, TensorCast Team. -"""Curated public facade for TensorCast serving integrations.""" - -from tensorcast.serving.binding_plan import ServingBindingPlan -from tensorcast.serving.config import ( - ArtifactBindStartPlan, - BootstrapSettings, - DiagnosticsSettings, - MaterializationSettings, - ReplicaPublicationPolicy, - RetainedBindingAcquireStartPlan, - ServingConfig, - ServingSettings, - ServingStartPlan, - ServingStartPlanError, - SourceBootstrapToBindingStartPlan, - plan_serving_start, -) -from tensorcast.serving.errors import ( - AdmissionRejectedError, - ArtifactLocatorResolutionError, - AttachFinalizeError, - AuthorityValidationError, - CapabilityMissingError, - ConfigConflictError, - OwnershipTransferError, - PlacementAdmissionError, - PolicyMismatchError, - PublicationRequiredError, - ReplicaPublicationError, - RuntimeSwapError, - SchemaMismatchError, - ServingIntegrationError, - SourceProviderError, - TensorCastServingRuntimeError, -) -from tensorcast.serving.hosts import ( - PLACEMENT_ADMISSION_FACTS_SCHEMA_VERSION, - PLACEMENT_IDENTITY_FACTS_SCHEMA_VERSION, - RECIPE_CACHE_POLICY_SCHEMA_VERSION, - SOURCE_CATALOG_REQUEST_SCHEMA_VERSION, - SOURCE_CATALOG_SCHEMA_VERSION, - SOURCE_DOWNLOAD_POLICY_SCHEMA_VERSION, - AdmissionDecision, - AdmissionPolicy, - AdmissionRequest, - CollectiveHost, - DefaultAdmissionPolicy, - FinalizeHookHost, - FinalizePhase, - FinalizePolicy, - FrameworkHost, - FrameworkIdentity, - IntegrationHost, - MaterializationExecutionFacts, - NativeLoadHost, - ObservabilitySink, - PlacementAdmissionFacts, - PlacementHost, - PlacementIdentityFacts, - PlacementMemberFacts, - RecipeCachePolicy, - RecipeTraceHost, - SourceCatalogProvider, - SourceCatalogRequest, - SourceDownloadPolicy, - SourceHost, - SourceSelector, - TensorCastEvent, - TensorSurfaceHost, - TorchTensorHost, - semantic_placement_digest, - serving_placement_from_framework_facts, -) -from tensorcast.serving.policy import ( - SERVING_ARTIFACT_LOCATOR_SCHEMA_VERSION, - SERVING_POLICY_SCHEMA_VERSION, - ServingArtifactLocator, - ServingPolicy, - merge_serving_reload_extra_config, - normalize_serving_reload_request_payload, - ranked_version_key_for_member, -) -from tensorcast.serving.retained_binding import RetainedBindingAcquireSettings -from tensorcast.serving.runtime_attachment import RuntimeAttachment -from tensorcast.serving.runtime_config import ( - DEFAULT_RUNTIME_PROFILE, - RuntimeConfigProfile, - RuntimeDaemonSettings, - RuntimeGlobalStoreSettings, - RuntimeSettings, - resolve_runtime_config_profile, -) -from tensorcast.serving.runtime_intent import ( - BootstrapPolicy, - ExistingServingArtifact, - LocalSourceBootstrap, - RequestContext, - RetainedBindingAcquire, -) -from tensorcast.serving.runtime_view import RuntimeWorkerView - -__all__ = [ - "AdmissionDecision", - "AdmissionPolicy", - "AdmissionRejectedError", - "AdmissionRequest", - "ArtifactBindStartPlan", - "ArtifactLocatorResolutionError", - "AttachFinalizeError", - "AuthorityValidationError", - "BootstrapPolicy", - "BootstrapSettings", - "CapabilityMissingError", - "CollectiveHost", - "ConfigConflictError", - "DEFAULT_RUNTIME_PROFILE", - "DefaultAdmissionPolicy", - "DiagnosticsSettings", - "ExistingServingArtifact", - "FinalizeHookHost", - "FinalizePhase", - "FinalizePolicy", - "FrameworkHost", - "FrameworkIdentity", - "IntegrationHost", - "LocalSourceBootstrap", - "MaterializationExecutionFacts", - "MaterializationSettings", - "NativeLoadHost", - "ObservabilitySink", - "OwnershipTransferError", - "PLACEMENT_ADMISSION_FACTS_SCHEMA_VERSION", - "PLACEMENT_IDENTITY_FACTS_SCHEMA_VERSION", - "PlacementAdmissionError", - "PlacementAdmissionFacts", - "PlacementHost", - "PlacementIdentityFacts", - "PlacementMemberFacts", - "PolicyMismatchError", - "PublicationRequiredError", - "RECIPE_CACHE_POLICY_SCHEMA_VERSION", - "ReplicaPublicationError", - "ReplicaPublicationPolicy", - "RecipeCachePolicy", - "RecipeTraceHost", - "RequestContext", - "RetainedBindingAcquire", - "RetainedBindingAcquireSettings", - "RetainedBindingAcquireStartPlan", - "RuntimeAttachment", - "RuntimeConfigProfile", - "RuntimeDaemonSettings", - "RuntimeGlobalStoreSettings", - "RuntimeSettings", - "RuntimeSwapError", - "RuntimeWorkerView", - "SERVING_ARTIFACT_LOCATOR_SCHEMA_VERSION", - "SERVING_POLICY_SCHEMA_VERSION", - "SOURCE_CATALOG_REQUEST_SCHEMA_VERSION", - "SOURCE_CATALOG_SCHEMA_VERSION", - "SOURCE_DOWNLOAD_POLICY_SCHEMA_VERSION", - "SchemaMismatchError", - "ServingArtifactLocator", - "ServingBindingPlan", - "ServingConfig", - "ServingIntegrationError", - "ServingPolicy", - "ServingSettings", - "ServingStartPlan", - "ServingStartPlanError", - "SourceBootstrapToBindingStartPlan", - "SourceCatalogProvider", - "SourceCatalogRequest", - "SourceDownloadPolicy", - "SourceHost", - "SourceProviderError", - "SourceSelector", - "TensorCastEvent", - "TensorCastServingRuntimeError", - "TensorSurfaceHost", - "TorchTensorHost", - "merge_serving_reload_extra_config", - "normalize_serving_reload_request_payload", - "plan_serving_start", - "ranked_version_key_for_member", - "resolve_runtime_config_profile", - "semantic_placement_digest", - "serving_placement_from_framework_facts", -] diff --git a/tensorcast/serving/_runtime_impl/__init__.py b/tensorcast/serving/_runtime_impl/__init__.py deleted file mode 100644 index a72f7800..00000000 --- a/tensorcast/serving/_runtime_impl/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (c) 2026, TensorCast Team. -"""Private serving runtime lifecycle implementation package.""" diff --git a/tensorcast/serving/artifact_manifest.py b/tensorcast/serving/artifact_manifest.py deleted file mode 100644 index 8cbde919..00000000 --- a/tensorcast/serving/artifact_manifest.py +++ /dev/null @@ -1,130 +0,0 @@ -# Copyright (c) 2026, TensorCast Team. -"""Serving artifact manifest parse and validation helpers.""" - -from __future__ import annotations - -from typing import Any - -import torch - -import tensorcast as tc - -SERVING_ARTIFACT_SCHEMA_VERSION = int( - tc.ServingArtifactManifest.model_fields["schema_version"].default -) -SERVING_MANIFEST_TENSOR_NAME = tc.SERVING_MANIFEST_TENSOR_NAME - - -def serving_manifest_from_tensor_bytes( - data: bytes | bytearray, -) -> tc.ServingArtifactManifest: - return tc.ServingArtifactManifest.from_bytes(bytes(data)) - - -def read_serving_artifact_manifest_tensor( - artifact: Any, - *, - artifact_ref: str, - manifest_tensor_name: str = SERVING_MANIFEST_TENSOR_NAME, -) -> tc.ServingArtifactManifest: - subset = artifact.subset([manifest_tensor_name]) - try: - manifest_tensor = subset.tensor_dict(device="cpu")[manifest_tensor_name] - except Exception as cpu_exc: - try: - cuda_device = torch.device("cuda", torch.cuda.current_device()) - manifest_tensor = subset.tensor_dict(device=cuda_device)[ - manifest_tensor_name - ] - except Exception as cuda_exc: - raise RuntimeError( - f"Failed to materialize serving manifest from '{artifact_ref}' " - f"(cpu_error={cpu_exc!r}; cuda_error={cuda_exc!r})" - ) from cuda_exc - if manifest_tensor.dtype != torch.uint8 or manifest_tensor.dim() != 1: - raise RuntimeError("TensorCast serving manifest tensor must be 1D torch.uint8") - return serving_manifest_from_tensor_bytes( - bytes(manifest_tensor.detach().cpu().tolist()) - ) - - -def cross_check_serving_artifact_manifest( - *, - manifest: Any | None, - descriptor_tensor_schema_hash: str, - tensor_names: tuple[str, ...], - expected_tensor_schema_hash: str, - serving_runtime_policy: tc.ServingRuntimePolicy | None = None, - expected_schema_version: int = SERVING_ARTIFACT_SCHEMA_VERSION, -) -> Any: - if manifest is None: - raise RuntimeError("TensorCast serving artifact manifest is missing") - if manifest.schema_version != expected_schema_version: - raise RuntimeError( - "TensorCast serving artifact schema version mismatch: " - f"{manifest.schema_version} != {expected_schema_version}" - ) - if manifest.artifact_kind != "serving": - raise RuntimeError( - f"TensorCast artifact is not a serving artifact: {manifest.artifact_kind}" - ) - if ( - serving_runtime_policy is not None - and serving_runtime_policy.serving_manifest_ref is not None - and manifest.serving_manifest_ref != serving_runtime_policy.serving_manifest_ref - ): - raise RuntimeError("TensorCast serving artifact manifest ref mismatch") - if ( - serving_runtime_policy is not None - and serving_runtime_policy.expected_representation_contract_hash is not None - and manifest.representation_contract_hash - != serving_runtime_policy.expected_representation_contract_hash - ): - raise RuntimeError( - "TensorCast serving artifact representation contract mismatch" - ) - if ( - serving_runtime_policy is not None - and serving_runtime_policy.expected_serving_build_digest is not None - and manifest.serving_build_digest - != serving_runtime_policy.expected_serving_build_digest - ): - raise RuntimeError("TensorCast serving artifact build digest mismatch") - if ( - serving_runtime_policy is not None - and getattr( - serving_runtime_policy, - "expected_topology_admission_digest", - None, - ) - is not None - and getattr(manifest, "topology_admission_digest", None) - != serving_runtime_policy.expected_topology_admission_digest - ): - raise RuntimeError( - "TensorCast serving artifact topology admission digest mismatch" - ) - if manifest.tensor_schema_hash != expected_tensor_schema_hash: - raise RuntimeError( - "TensorCast serving artifact tensor schema hash mismatch: " - f"manifest={manifest.tensor_schema_hash}, " - f"expected={expected_tensor_schema_hash}" - ) - if descriptor_tensor_schema_hash != expected_tensor_schema_hash: - raise RuntimeError( - "TensorCast serving artifact descriptor schema hash mismatch: " - f"descriptor={descriptor_tensor_schema_hash}, " - f"expected={expected_tensor_schema_hash}" - ) - if manifest.canonical_tensor_count != len(tensor_names): - raise RuntimeError("TensorCast serving artifact tensor count mismatch") - return manifest - - -__all__ = [ - "SERVING_ARTIFACT_SCHEMA_VERSION", - "SERVING_MANIFEST_TENSOR_NAME", - "cross_check_serving_artifact_manifest", - "read_serving_artifact_manifest_tensor", - "serving_manifest_from_tensor_bytes", -] diff --git a/tensorcast/serving/builder/__init__.py b/tensorcast/serving/builder/__init__.py deleted file mode 100644 index d95ef31b..00000000 --- a/tensorcast/serving/builder/__init__.py +++ /dev/null @@ -1,214 +0,0 @@ -# Copyright (c) 2026, TensorCast Team. -"""Serving builder primitives shared by framework integrations.""" - -from tensorcast.serving.binding_plan import ServingBindingPlan -from tensorcast.serving.builder.binding_plan import ( - TargetShapes, - lower_trace_plan_for_binding, - lower_trace_plan_for_realization, - range_spec_to_tensorcast_ranges, -) -from tensorcast.serving.builder.compiler import ( - CompiledServingRecipe, - RecipeCompileInputs, - SemanticValidationSpec, - ServingBuildObserver, - ServingFacts, - SourceHullEntry, - TensorcastSemanticValidationSpec, - TensorcastServingFacts, - TensorSchemaEntry, - compile_serving_recipe, - compute_recipe_compile_key, - filter_tensor_schema_for_trace_plan, -) -from tensorcast.serving.builder.materialization import ( - BindingFinalizeMaterializationResult, - allocate_tensors_from_schema, - apply_copy_plan, - collect_serving_tensors_from_model, - dtype_from_string, - iter_ranges, - load_source_tensors_for_recipe, - materialize_binding_finalize_serving_tensors, - materialize_pure_transform_serving_tensors, - materialize_recipe_copy_plan_tensors, - narrow_by_range_spec, - narrow_source_view, - run_binding_finalize_semantic_validation, - tensorcast_view_slices_from_trace_plan, - update_dst_coverage, - validate_binding_finalize_tensor_schema, - validate_dst_coverage, - validate_source_tensor_names, -) -from tensorcast.serving.builder.publication import ( - RecipePublicationContext, - build_binding_finalize_admission_facts_from_context, - build_binding_finalize_build_intent, - build_binding_finalize_publication_bundle_from_context, - build_pure_transform_build_intent, - build_pure_transform_publication_bundle_from_context, - build_pure_transform_publication_spec_from_context, - build_recipe_serving_build_intent, - complete_pure_transform_recipe_publication, - prepare_binding_finalize_serving_registration_from_context, - prepare_pure_transform_serving_registration_from_context, -) -from tensorcast.serving.builder.recipe_cache import ( - RECIPE_CACHE_PAYLOAD_VERSION, - compiled_recipe_from_dict, - compiled_recipe_to_dict, - load_compiled_recipe_cache, - write_compiled_recipe_cache, -) -from tensorcast.serving.builder.recipe_validation import ( - serving_support_level_at_least, - serving_support_level_display_name, - validate_recipe_for_builder_mode, -) -from tensorcast.serving.builder.semantic_validation import ( - evaluate_semantic_validation_spec, -) -from tensorcast.serving.builder.tensor_parity import ( - TensorParityMismatch, - TensorParityProbe, - TensorParityReport, - build_tensor_parity_probes_from_realization_plan, - build_tensor_parity_probes_from_realization_plan_proto, - build_tensor_parity_probes_from_recipe, - build_tensor_parity_probes_from_trace_plan, - evaluate_recipe_tensor_parity, - evaluate_tensor_parity_probes, -) -from tensorcast.serving.builder.tensor_schema import ( - validate_tensor_schema_against_tensors, -) -from tensorcast.serving.builder.trace_cache import ( - TRACE_PLAN_CACHE_PAYLOAD_VERSION, - dump_trace_plan_debug, - load_trace_plan_cache, - trace_plan_debug_payload, - write_trace_plan_cache, -) -from tensorcast.serving.builder.trace_ir import ( - CopyPlanEntry, - MultiRange, - Range, - RangeSpec, - TracePlan, - copy_plan_from_dict, - copy_plan_to_dict, - range_from_dict, - range_to_dict, - single_range_from_dict, - trace_plan_from_dict, - trace_plan_to_dict, -) -from tensorcast.serving.source_catalog import ( - SourceCatalog, - SourceFileEntry, - SourceManifest, - SourceTensorMeta, - compute_source_metadata_fingerprint, - resolve_source_artifact_ref, - source_catalog_from_all_safetensors_dir, - source_catalog_from_canonical_index, - source_catalog_from_manifest, - source_catalog_from_selected_safetensors, -) - -__all__ = [ - "BindingFinalizeMaterializationResult", - "CompiledServingRecipe", - "CopyPlanEntry", - "MultiRange", - "Range", - "RangeSpec", - "RecipeCompileInputs", - "RecipePublicationContext", - "RECIPE_CACHE_PAYLOAD_VERSION", - "SemanticValidationSpec", - "SourceCatalog", - "SourceFileEntry", - "SourceManifest", - "SourceHullEntry", - "SourceTensorMeta", - "ServingBuildObserver", - "ServingBindingPlan", - "ServingFacts", - "TargetShapes", - "TRACE_PLAN_CACHE_PAYLOAD_VERSION", - "TracePlan", - "TensorSchemaEntry", - "TensorParityMismatch", - "TensorParityProbe", - "TensorParityReport", - "TensorcastSemanticValidationSpec", - "TensorcastServingFacts", - "allocate_tensors_from_schema", - "apply_copy_plan", - "build_binding_finalize_admission_facts_from_context", - "build_binding_finalize_build_intent", - "build_binding_finalize_publication_bundle_from_context", - "build_pure_transform_build_intent", - "build_pure_transform_publication_bundle_from_context", - "build_pure_transform_publication_spec_from_context", - "build_recipe_serving_build_intent", - "build_tensor_parity_probes_from_realization_plan", - "build_tensor_parity_probes_from_realization_plan_proto", - "build_tensor_parity_probes_from_recipe", - "build_tensor_parity_probes_from_trace_plan", - "collect_serving_tensors_from_model", - "compile_serving_recipe", - "complete_pure_transform_recipe_publication", - "compute_source_metadata_fingerprint", - "compute_recipe_compile_key", - "copy_plan_from_dict", - "copy_plan_to_dict", - "compiled_recipe_from_dict", - "compiled_recipe_to_dict", - "dump_trace_plan_debug", - "dtype_from_string", - "evaluate_semantic_validation_spec", - "evaluate_recipe_tensor_parity", - "evaluate_tensor_parity_probes", - "iter_ranges", - "load_source_tensors_for_recipe", - "lower_trace_plan_for_binding", - "lower_trace_plan_for_realization", - "materialize_binding_finalize_serving_tensors", - "materialize_pure_transform_serving_tensors", - "materialize_recipe_copy_plan_tensors", - "narrow_by_range_spec", - "narrow_source_view", - "prepare_binding_finalize_serving_registration_from_context", - "prepare_pure_transform_serving_registration_from_context", - "range_spec_to_tensorcast_ranges", - "range_from_dict", - "range_to_dict", - "filter_tensor_schema_for_trace_plan", - "resolve_source_artifact_ref", - "run_binding_finalize_semantic_validation", - "single_range_from_dict", - "source_catalog_from_all_safetensors_dir", - "source_catalog_from_canonical_index", - "source_catalog_from_manifest", - "source_catalog_from_selected_safetensors", - "tensorcast_view_slices_from_trace_plan", - "trace_plan_debug_payload", - "trace_plan_from_dict", - "trace_plan_to_dict", - "update_dst_coverage", - "serving_support_level_at_least", - "serving_support_level_display_name", - "validate_binding_finalize_tensor_schema", - "validate_dst_coverage", - "validate_recipe_for_builder_mode", - "validate_source_tensor_names", - "validate_tensor_schema_against_tensors", - "load_trace_plan_cache", - "load_compiled_recipe_cache", - "write_trace_plan_cache", - "write_compiled_recipe_cache", -] diff --git a/tensorcast/serving/config.py b/tensorcast/serving/config.py deleted file mode 100644 index 865220d1..00000000 --- a/tensorcast/serving/config.py +++ /dev/null @@ -1,338 +0,0 @@ -# Copyright (c) 2026, TensorCast Team. - -"""Public TensorCast serving artifact runtime configuration schema.""" - -from __future__ import annotations - -from dataclasses import dataclass, field -from typing import Any, Mapping - -from pydantic import BaseModel, ConfigDict, Field, field_validator - -from tensorcast.serving.policy import ServingArtifactLocator, ServingPolicy -from tensorcast.serving.retained_binding import RetainedBindingAcquireSettings -from tensorcast.serving.runtime_config import RuntimeSettings - -_BOOTSTRAP_MODES = {"disabled", "auto", "required"} -_COLLECTIVE_MODES = {"auto", "required", "disabled"} -_REPLICA_PUBLICATION_MODES = {"disabled", "optional", "required"} -_REPLICA_PUBLICATION_TRIGGERS = {"after_vllm_ready"} -_TOP_LEVEL_KEYS = { - "runtime", - "serving", - "bootstrap", - "materialization", - "retained_binding_acquire", - "diagnostics", - "replica_publication", -} - - -def _normalize_optional_text(value: Any) -> str | None: - if value is None: - return None - normalized = str(value).strip() - return normalized or None - - -def _normalize_enum(value: Any, *, allowed: set[str], field_name: str) -> str: - normalized = str(value).strip().lower() - if normalized not in allowed: - raise ValueError( - f"{field_name} must be one of {sorted(allowed)}, got: {value!r}" - ) - return normalized - - -class ServingSettings(BaseModel): - model_config = ConfigDict(frozen=True, extra="forbid") - - artifact_locator: ServingArtifactLocator | None = None - policy: ServingPolicy = ServingPolicy() - - -class BootstrapSettings(BaseModel): - model_config = ConfigDict(frozen=True, extra="forbid") - - mode: str = "auto" - cache_dir: str | None = None - verify_source_checksums: bool = True - - @field_validator("mode", mode="before") - @classmethod - def _normalize_mode(cls, value: Any) -> str: - if value is None: - return "auto" - return _normalize_enum( - value, - allowed=_BOOTSTRAP_MODES, - field_name="bootstrap.mode", - ) - - @field_validator("cache_dir", mode="before") - @classmethod - def _normalize_optional_fields(cls, value: Any) -> Any: - return _normalize_optional_text(value) - - -class MaterializationSettings(BaseModel): - model_config = ConfigDict(frozen=True, extra="forbid") - - collective: str = "auto" - - @field_validator("collective", mode="before") - @classmethod - def _normalize_collective(cls, value: Any) -> str: - if value is None: - return "auto" - return _normalize_enum( - value, - allowed=_COLLECTIVE_MODES, - field_name="materialization.collective", - ) - - def collective_policy_value(self) -> str: - return { - "auto": "collective_first", - "required": "require_collective", - "disabled": "disable_collective", - }[self.collective] - - -class DiagnosticsSettings(BaseModel): - model_config = ConfigDict(frozen=True, extra="forbid") - - debug_path: str | None = None - verify_tensors: bool = False - - @field_validator("debug_path", mode="before") - @classmethod - def _normalize_debug_path(cls, value: Any) -> Any: - return _normalize_optional_text(value) - - -class ReplicaPublicationPolicy(BaseModel): - model_config = ConfigDict(frozen=True, extra="forbid") - - mode: str = "disabled" - trigger: str = "after_vllm_ready" - async_publish: bool = True - timeout_s: float = 30.0 - ttl_ms: int | None = None - drain_timeout_s: float = 30.0 - - @field_validator("mode", mode="before") - @classmethod - def _normalize_mode(cls, value: Any) -> str: - if value is None: - return "disabled" - return _normalize_enum( - value, - allowed=_REPLICA_PUBLICATION_MODES, - field_name="replica_publication.mode", - ) - - @field_validator("trigger", mode="before") - @classmethod - def _normalize_trigger(cls, value: Any) -> str: - if value is None: - return "after_vllm_ready" - return _normalize_enum( - value, - allowed=_REPLICA_PUBLICATION_TRIGGERS, - field_name="replica_publication.trigger", - ) - - @field_validator("async_publish") - @classmethod - def _validate_async_publish(cls, value: bool) -> bool: - if not value: - raise ValueError("replica_publication.async_publish=false is not supported") - return value - - @field_validator("timeout_s", "drain_timeout_s") - @classmethod - def _validate_positive_timeout(cls, value: float) -> float: - normalized = float(value) - if normalized <= 0: - raise ValueError("replica_publication timeouts must be positive") - return normalized - - @field_validator("ttl_ms") - @classmethod - def _reject_ttl(cls, value: int | None) -> int | None: - if value is not None: - raise ValueError("replica_publication.ttl_ms is not supported yet") - return value - - -class ServingConfig(BaseModel): - model_config = ConfigDict(frozen=True, extra="forbid") - - runtime: RuntimeSettings = RuntimeSettings() - serving: ServingSettings = ServingSettings() - bootstrap: BootstrapSettings = BootstrapSettings() - materialization: MaterializationSettings = MaterializationSettings() - retained_binding_acquire: RetainedBindingAcquireSettings = Field( - default_factory=RetainedBindingAcquireSettings, - ) - diagnostics: DiagnosticsSettings = DiagnosticsSettings() - replica_publication: ReplicaPublicationPolicy = ReplicaPublicationPolicy() - - @classmethod - def from_mapping(cls, data: Mapping[str, Any] | None) -> ServingConfig: - payload: Mapping[str, Any] = {} if data is None else data - if not isinstance(payload, Mapping): - raise ValueError("model_loader_extra_config must be a mapping") - unknown = {str(key) for key in payload if str(key) not in _TOP_LEVEL_KEYS} - if unknown: - raise ValueError( - "Unexpected TensorCast serving config keys in " - "model_loader_extra_config: " - f"{sorted(unknown)}" - ) - serving = payload.get("serving") - if isinstance(serving, Mapping) and "selector" in serving: - raise ValueError( - "serving.selector is not supported; use serving.artifact_locator" - ) - return cls.model_validate(dict(payload)) - - def to_mapping(self) -> dict[str, Any]: - return self.model_dump(mode="python") - - -class ServingStartPlanError(ValueError): - """Startup configuration cannot be lowered into one serving plan.""" - - -@dataclass(frozen=True) -class ServingStartPlan: - """Typed serving startup intent selected before runtime allocation.""" - - kind: str = field(init=False) - - -@dataclass(frozen=True) -class ArtifactBindStartPlan(ServingStartPlan): - """Bind a durable serving artifact selected by an artifact locator.""" - - artifact_locator: ServingArtifactLocator - policy: ServingPolicy - kind: str = field(default="artifact_bind", init=False) - - -@dataclass(frozen=True) -class SourceBootstrapToBindingStartPlan(ServingStartPlan): - """Bootstrap a source artifact into a daemon-owned binding value.""" - - source_selector: Any - bootstrap_policy: BootstrapSettings - kind: str = field(default="source_bootstrap_to_binding", init=False) - - -@dataclass(frozen=True) -class RetainedBindingAcquireStartPlan(ServingStartPlan): - """Acquire a retained binding authority prepared by artifact prefetch.""" - - authority: Any - kind: str = field(default="retained_binding_acquire", init=False) - - -def _candidate_rejection_reasons( - *, - has_retained_authority: bool, - has_artifact_locator: bool, - has_source_selector: bool, - bootstrap_mode: str, -) -> dict[str, str]: - source_reason = ( - "bootstrap.mode is disabled" - if bootstrap_mode == "disabled" - else "source selector is unavailable" - ) - return { - "retained_binding_acquire": ( - "selected" - if has_retained_authority - else "retained_binding_acquire.mode is not external" - ), - "artifact_bind": ( - "selected" if has_artifact_locator else "serving artifact locator missing" - ), - "source_bootstrap_to_binding": ( - "selected" - if has_source_selector and bootstrap_mode in {"auto", "required"} - else source_reason - ), - } - - -def _format_rejection_reasons(reasons: Mapping[str, str]) -> str: - return "; ".join(f"{name}: {reason}" for name, reason in reasons.items()) - - -def plan_serving_start( - *, - config: ServingConfig, - source_selector: Any | None, - expected_member: Any | None = None, -) -> ServingStartPlan: - """Classify serving startup into exactly one canonical start plan.""" - - retained_requested = config.retained_binding_acquire.mode == "external" - artifact_locator = config.serving.artifact_locator - has_artifact_locator = artifact_locator is not None - bootstrap_mode = config.bootstrap.mode - has_source_selector = source_selector is not None - - if retained_requested and has_artifact_locator: - raise ServingStartPlanError( - "TensorCast serving config cannot request both retained binding " - "acquire and durable serving artifact bind" - ) - if bootstrap_mode == "required" and (retained_requested or has_artifact_locator): - raise ServingStartPlanError( - "TensorCast bootstrap.mode='required' is mutually exclusive with " - "retained binding acquire and durable serving artifact bind" - ) - if bootstrap_mode == "disabled" and not ( - retained_requested or has_artifact_locator - ): - raise ServingStartPlanError( - "TensorCast bootstrap.mode='disabled' requires retained binding " - "authority or durable serving artifact locator" - ) - - if retained_requested: - from tensorcast.serving.retained_binding import ( - parse_retained_serving_binding_authority, - ) - - return RetainedBindingAcquireStartPlan( - authority=parse_retained_serving_binding_authority( - config, - expected_member=expected_member, - ) - ) - if artifact_locator is not None: - return ArtifactBindStartPlan( - artifact_locator=artifact_locator, - policy=config.serving.policy, - ) - if bootstrap_mode in {"auto", "required"} and source_selector is not None: - return SourceBootstrapToBindingStartPlan( - source_selector=source_selector, - bootstrap_policy=config.bootstrap, - ) - - reasons = _candidate_rejection_reasons( - has_retained_authority=retained_requested, - has_artifact_locator=has_artifact_locator, - has_source_selector=has_source_selector, - bootstrap_mode=bootstrap_mode, - ) - raise ServingStartPlanError( - "TensorCast serving config did not resolve to one startup plan; " - f"rejected candidates: {_format_rejection_reasons(reasons)}" - ) diff --git a/tensorcast/serving/contract.py b/tensorcast/serving/contract.py deleted file mode 100644 index 09e21f81..00000000 --- a/tensorcast/serving/contract.py +++ /dev/null @@ -1,180 +0,0 @@ -# Copyright (c) 2026, TensorCast Team. - -"""Canonical serving runtime identity and hash helpers.""" - -from __future__ import annotations - -import json -from collections.abc import Mapping, Sequence -from dataclasses import dataclass - -import torch - -from tensorcast.api.store.serving_builder import ( - _hash_versioned_payload_to_multihash, - _normalize_logical_topology_payload, - compute_serving_tensor_schema_hash, -) -from tensorcast.api.store.types import CanonicalIndex, CanonicalIndexEntry -from tensorcast.types import ServingBindingMemberRef, ServingTopologyRef - - -@dataclass(frozen=True) -class RuntimeTensorSchemaEntry: - name: str - dtype: str - shape: tuple[int, ...] - stride: tuple[int, ...] - element_size: int - storage_offset: int - - -def collect_runtime_tensor_schema( - tensors: Mapping[str, torch.Tensor], - *, - remove_duplicate: bool, -) -> tuple[RuntimeTensorSchemaEntry, ...]: - schema: list[RuntimeTensorSchemaEntry] = [] - seen_ptrs: set[int] = set() - for name, tensor in sorted(tensors.items()): - data_ptr = int(tensor.data_ptr()) - if remove_duplicate and data_ptr in seen_ptrs: - continue - seen_ptrs.add(data_ptr) - storage_offset = int(tensor.storage_offset()) - if storage_offset != 0: - raise ValueError( - "runtime tensor schema hash requires storage_offset == 0: " - f"{name} has storage_offset={storage_offset}" - ) - schema.append( - RuntimeTensorSchemaEntry( - name=str(name), - dtype=str(tensor.dtype), - shape=tuple(int(dim) for dim in tensor.shape), - stride=tuple(int(dim) for dim in tensor.stride()), - element_size=int(tensor.element_size()), - storage_offset=storage_offset, - ) - ) - return tuple(schema) - - -def compute_runtime_tensor_schema_hash( - schema: Sequence[RuntimeTensorSchemaEntry], -) -> str: - entries: list[CanonicalIndexEntry] = [] - segment_offset = 0 - for entry in sorted(schema, key=lambda item: item.name): - if int(entry.storage_offset) != 0: - raise ValueError( - "runtime tensor schema hash requires storage_offset == 0: " - f"{entry.name} has storage_offset={entry.storage_offset}" - ) - size_bytes = _schema_entry_size_bytes(entry) - entries.append( - CanonicalIndexEntry( - name=entry.name, - dtype=_torch_dtype_from_name(entry.dtype), - shape=entry.shape, - stride=entry.stride, - storage_offset=0, - segment_offset=segment_offset, - size_bytes=size_bytes, - ) - ) - segment_offset += size_bytes - return compute_serving_tensor_schema_hash( - CanonicalIndex( - entries=tuple(entries), - total_size_bytes=segment_offset, - avbs_hash="", - ) - ) - - -def logical_topology_json( - topology_ref: ServingTopologyRef, - *, - framework_payload: Mapping[str, object], -) -> str: - del topology_ref - normalized = _normalize_logical_topology_payload( - json.dumps( - dict(framework_payload), - sort_keys=True, - separators=(",", ":"), - ) - ) - if normalized is None: - raise ValueError("framework_payload must define a logical topology") - return json.dumps(normalized, sort_keys=True, separators=(",", ":")) - - -def compute_runtime_representation_contract_hash( - *, - tensor_schema_hash: str, - topology_ref: ServingTopologyRef, - member_ref: ServingBindingMemberRef, - framework_name: str, - framework_version: str, - adapter_version: str, - serving_abi_version: str, - source_identity: Mapping[str, object], -) -> str: - if not tensor_schema_hash: - raise ValueError("tensor_schema_hash must not be empty") - payload = { - "framework": { - "name": str(framework_name), - "version": str(framework_version), - "adapter_version": str(adapter_version), - "serving_abi_version": str(serving_abi_version), - }, - "topology_ref": _stable_payload(topology_ref.model_dump(mode="python")), - "member_ref": _stable_payload(member_ref.model_dump(mode="python")), - "source_identity": _stable_payload(dict(source_identity)), - "tensor_schema_hash": str(tensor_schema_hash), - } - return _hash_versioned_payload_to_multihash( - "tensorcast.representation.runtime_contract.v1", - payload, - ) - - -def _schema_entry_size_bytes(entry: RuntimeTensorSchemaEntry) -> int: - elements = 1 - for dim in entry.shape: - elements *= int(dim) - return int(elements * entry.element_size) - - -def _torch_dtype_from_name(dtype_name: str) -> torch.dtype: - normalized = dtype_name.removeprefix("torch.") - dtype = getattr(torch, normalized, None) - if not isinstance(dtype, torch.dtype): - raise ValueError(f"unsupported runtime tensor dtype: {dtype_name}") - return dtype - - -def _stable_payload(value: object) -> object: - if isinstance(value, Mapping): - return { - str(key): _stable_payload(value[key]) - for key in sorted(value, key=lambda item: str(item)) - if value[key] is not None - } - if isinstance(value, (list, tuple)): - return [_stable_payload(item) for item in value] - if isinstance(value, (str, int, float, bool)) or value is None: - return value - return str(value) - - -__all__ = [ - "RuntimeTensorSchemaEntry", - "collect_runtime_tensor_schema", - "compute_runtime_representation_contract_hash", - "compute_runtime_tensor_schema_hash", - "logical_topology_json", -] diff --git a/tensorcast/serving/policy.py b/tensorcast/serving/policy.py deleted file mode 100644 index 91e1b7ff..00000000 --- a/tensorcast/serving/policy.py +++ /dev/null @@ -1,256 +0,0 @@ -# Copyright (c) 2026, TensorCast Team. - -"""Serving artifact locator and runtime policy schema.""" - -from __future__ import annotations - -from collections.abc import Mapping -from typing import Any -from urllib.parse import quote - -from pydantic import BaseModel, ConfigDict, field_validator, model_validator - -import tensorcast as tc - -_ARTIFACT_LOCATOR_KINDS = {"version_key", "artifact_ref", "ranked_version_key"} -_POLICY_MODES = {"from_manifest", "pinned"} -SERVING_ARTIFACT_LOCATOR_SCHEMA_VERSION = 1 -SERVING_POLICY_SCHEMA_VERSION = 1 -RANKED_VERSION_KEY_MEMBER_SEGMENT = "members" - - -def _normalize_optional_text(value: Any) -> str | None: - if value is None: - return None - normalized = str(value).strip() - return normalized or None - - -def _normalize_enum(value: Any, *, allowed: set[str], field_name: str) -> str: - normalized = str(value).strip().lower() - if normalized not in allowed: - raise ValueError( - f"{field_name} must be one of {sorted(allowed)}, got: {value!r}" - ) - return normalized - - -def _member_id_from_ref(member: Any) -> str: - if member is None: - raise ValueError( - "ranked_version_key artifact locator resolution requires a serving member" - ) - if isinstance(member, Mapping): - member_id = member.get("member_id") - else: - member_id = getattr(member, "member_id", None) - normalized = _normalize_optional_text(member_id) - if normalized is None: - raise ValueError( - "ranked_version_key artifact locator resolution requires member.member_id" - ) - return normalized - - -def _member_from_placement(placement: Any | None) -> Any | None: - if placement is None: - return None - if isinstance(placement, Mapping): - return placement.get("member") - return getattr(placement, "member", None) - - -def ranked_version_key_for_member(version_key: str, member: Any) -> str: - base_key = _normalize_optional_text(version_key) - if base_key is None: - raise ValueError("ranked_version_key base value is required") - member_id = quote(_member_id_from_ref(member), safe=":._-") - return f"{base_key.rstrip('/')}/{RANKED_VERSION_KEY_MEMBER_SEGMENT}/{member_id}" - - -class ServingArtifactLocator(BaseModel): - model_config = ConfigDict(frozen=True, extra="forbid") - - kind: str - value: str - schema_version: int = SERVING_ARTIFACT_LOCATOR_SCHEMA_VERSION - - @field_validator("kind", mode="before") - @classmethod - def _normalize_kind(cls, value: Any) -> str: - return _normalize_enum( - value, - allowed=_ARTIFACT_LOCATOR_KINDS, - field_name="serving.artifact_locator.kind", - ) - - @field_validator("value", mode="before") - @classmethod - def _normalize_value(cls, value: Any) -> str: - normalized = _normalize_optional_text(value) - if normalized is None: - raise ValueError("serving.artifact_locator.value is required") - return normalized - - @classmethod - def artifact_ref(cls, artifact_ref: str) -> ServingArtifactLocator: - return cls(kind="artifact_ref", value=str(artifact_ref)) - - @classmethod - def version_key(cls, version_key: str) -> ServingArtifactLocator: - return cls(kind="version_key", value=str(version_key)) - - @classmethod - def ranked_version_key(cls, version_key: str) -> ServingArtifactLocator: - return cls(kind="ranked_version_key", value=str(version_key)) - - def resolve_version_key( - self, - *, - member: Any | None = None, - placement: Any | None = None, - ) -> str: - if self.kind == "artifact_ref": - return self.value - if self.kind == "ranked_version_key": - if member is None: - member = _member_from_placement(placement) - return ranked_version_key_for_member(self.value, member) - return self.value - - def resolve_artifact_ref( - self, - *, - member: Any | None = None, - placement: Any | None = None, - ) -> str: - if self.kind == "artifact_ref": - return self.value - - from tensorcast.api.store import get_runtime_context - - artifact_id, _disk_path = get_runtime_context().resolve_key_mapping_cached( - key=self.resolve_version_key(member=member, placement=placement) - ) - if not artifact_id: - raise ValueError( - "serving artifact locator version key did not resolve to a serving " - f"artifact: {self.value!r}" - ) - return artifact_id - - -class ServingPolicy(BaseModel): - model_config = ConfigDict(frozen=True, extra="forbid") - - mode: str = "from_manifest" - manifest_ref: str | None = None - representation_contract_hash: str | None = None - serving_build_digest: str | None = None - schema_version: int = SERVING_POLICY_SCHEMA_VERSION - - @field_validator("mode", mode="before") - @classmethod - def _normalize_mode(cls, value: Any) -> str: - if value is None: - return "from_manifest" - return _normalize_enum( - value, - allowed=_POLICY_MODES, - field_name="serving.policy.mode", - ) - - @field_validator( - "manifest_ref", - "representation_contract_hash", - "serving_build_digest", - mode="before", - ) - @classmethod - def _normalize_optional_fields(cls, value: Any) -> Any: - return _normalize_optional_text(value) - - @model_validator(mode="after") - def _validate_pinned_policy(self) -> ServingPolicy: - if self.mode != "pinned": - return self - missing = [ - name - for name, value in ( - ("manifest_ref", self.manifest_ref), - ( - "representation_contract_hash", - self.representation_contract_hash, - ), - ("serving_build_digest", self.serving_build_digest), - ) - if value is None - ] - if missing: - raise ValueError( - f"serving.policy.mode='pinned' requires {', '.join(missing)}" - ) - return self - - def to_runtime_policy(self) -> Any | None: - if self.mode == "from_manifest": - return None - return tc.ServingRuntimePolicy( - require_manifest=True, - serving_manifest_ref=self.manifest_ref, - expected_representation_contract_hash=(self.representation_contract_hash), - expected_serving_build_digest=self.serving_build_digest, - ) - - -def normalize_serving_reload_request_payload( - *, - artifact_locator: ServingArtifactLocator | Mapping[str, Any], - policy: ServingPolicy | Mapping[str, Any] | None = None, -) -> tuple[dict[str, Any], dict[str, Any]]: - """Normalize public reload locator/policy data to the stable wire shape.""" - - parsed_locator = ( - artifact_locator - if isinstance(artifact_locator, ServingArtifactLocator) - else ServingArtifactLocator.model_validate(artifact_locator) - ) - parsed_policy = ( - policy - if isinstance(policy, ServingPolicy) - else ServingPolicy.model_validate(policy or {"mode": "from_manifest"}) - ) - locator_payload = { - "kind": parsed_locator.kind, - "value": parsed_locator.value, - } - policy_payload: dict[str, Any] = {"mode": parsed_policy.mode} - if parsed_policy.manifest_ref is not None: - policy_payload["manifest_ref"] = parsed_policy.manifest_ref - if parsed_policy.representation_contract_hash is not None: - policy_payload["representation_contract_hash"] = ( - parsed_policy.representation_contract_hash - ) - if parsed_policy.serving_build_digest is not None: - policy_payload["serving_build_digest"] = parsed_policy.serving_build_digest - return locator_payload, policy_payload - - -def merge_serving_reload_extra_config( - extra: Mapping[str, Any] | None, - *, - artifact_locator: ServingArtifactLocator | Mapping[str, Any], - policy: ServingPolicy | Mapping[str, Any] | None = None, -) -> dict[str, Any]: - """Return model_loader_extra_config with a normalized serving reload request.""" - - normalized_locator, normalized_policy = normalize_serving_reload_request_payload( - artifact_locator=artifact_locator, - policy=policy, - ) - merged_extra = dict(extra or {}) - serving = dict(merged_extra.get("serving", {})) - serving["artifact_locator"] = normalized_locator - serving["policy"] = normalized_policy - merged_extra["serving"] = serving - return merged_extra diff --git a/tensorcast/serving/runtime.py b/tensorcast/serving/runtime.py deleted file mode 100644 index ef865e12..00000000 --- a/tensorcast/serving/runtime.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright (c) 2026, TensorCast Team. - -"""Framework-facing TensorCast serving runtime API.""" - -from __future__ import annotations - -from tensorcast.serving._runtime_impl.lifecycle import ServingRuntimeSession -from tensorcast.serving.config import ( - ArtifactBindStartPlan, - ReplicaPublicationPolicy, - RetainedBindingAcquireStartPlan, - ServingConfig, - ServingStartPlan, - ServingStartPlanError, - SourceBootstrapToBindingStartPlan, - plan_serving_start, -) -from tensorcast.serving.errors import ( - AdmissionRejectedError, - ArtifactLocatorResolutionError, - AttachFinalizeError, - AuthorityValidationError, - CapabilityMissingError, - ConfigConflictError, - OwnershipTransferError, - PlacementAdmissionError, - PolicyMismatchError, - PublicationRequiredError, - ReplicaPublicationError, - RuntimeSwapError, - SchemaMismatchError, - ServingIntegrationError, - SourceProviderError, - TensorCastServingRuntimeError, -) -from tensorcast.serving.hosts import SourceSelector -from tensorcast.serving.policy import ( - ServingArtifactLocator, - ServingPolicy, - merge_serving_reload_extra_config, - normalize_serving_reload_request_payload, -) -from tensorcast.serving.runtime_attachment import RuntimeAttachment -from tensorcast.serving.runtime_config import ( - DEFAULT_RUNTIME_PROFILE, - RuntimeConfigProfile, - RuntimeDaemonSettings, - RuntimeGlobalStoreSettings, - RuntimeSettings, - resolve_runtime_config_profile, -) -from tensorcast.serving.runtime_intent import ( - BootstrapPolicy, - ExistingServingArtifact, - LocalSourceBootstrap, - RequestContext, - RetainedBindingAcquire, -) -from tensorcast.serving.runtime_view import RuntimeWorkerView - -__all__ = [ - "AdmissionRejectedError", - "ArtifactBindStartPlan", - "ArtifactLocatorResolutionError", - "AttachFinalizeError", - "AuthorityValidationError", - "BootstrapPolicy", - "CapabilityMissingError", - "ConfigConflictError", - "DEFAULT_RUNTIME_PROFILE", - "ExistingServingArtifact", - "LocalSourceBootstrap", - "OwnershipTransferError", - "PlacementAdmissionError", - "PolicyMismatchError", - "PublicationRequiredError", - "ReplicaPublicationError", - "ReplicaPublicationPolicy", - "RequestContext", - "RetainedBindingAcquire", - "RetainedBindingAcquireStartPlan", - "RuntimeAttachment", - "RuntimeConfigProfile", - "RuntimeDaemonSettings", - "RuntimeGlobalStoreSettings", - "RuntimeSettings", - "RuntimeSwapError", - "RuntimeWorkerView", - "SchemaMismatchError", - "ServingArtifactLocator", - "ServingConfig", - "ServingIntegrationError", - "ServingPolicy", - "ServingRuntimeSession", - "ServingStartPlan", - "ServingStartPlanError", - "SourceBootstrapToBindingStartPlan", - "SourceProviderError", - "SourceSelector", - "TensorCastServingRuntimeError", - "merge_serving_reload_extra_config", - "normalize_serving_reload_request_payload", - "plan_serving_start", - "resolve_runtime_config_profile", -] diff --git a/tensorcast/serving/runtime_config.py b/tensorcast/serving/runtime_config.py deleted file mode 100644 index eb85fc3e..00000000 --- a/tensorcast/serving/runtime_config.py +++ /dev/null @@ -1,297 +0,0 @@ -# Copyright (c) 2026, TensorCast Team. - -"""Runtime startup settings for TensorCast serving artifact consumers.""" - -from __future__ import annotations - -import importlib.resources -import re -from dataclasses import dataclass -from pathlib import Path -from threading import Lock -from typing import Any - -from pydantic import BaseModel, ConfigDict, field_validator - -import tensorcast as tc - -_INIT_LOCK = Lock() -_INIT_KWARGS: dict[str, Any] | None = None -_DEFAULT_GLOBAL_STORE_ADDRESS = "127.0.0.1:50051" -DEFAULT_RUNTIME_PROFILE = "serving_single_node" - -_RUNTIME_MODES = {"auto", "connect", "create"} -_GLOBAL_STORE_MODES = {"auto", "connect", "start", "none"} -_PROFILE_NAME_PATTERN = re.compile(r"^[A-Za-z0-9_.-]+$") - - -def _normalize_optional_text(value: Any) -> str | None: - if value is None: - return None - normalized = str(value).strip() - return normalized or None - - -def _normalize_enum(value: Any, *, allowed: set[str], field_name: str) -> str: - normalized = str(value).strip().lower() - if normalized not in allowed: - raise ValueError( - f"{field_name} must be one of {sorted(allowed)}, got: {value!r}" - ) - return normalized - - -def _validate_existing_file(path: str, *, field_name: str) -> str: - candidate = Path(path).expanduser() - if not candidate.is_file(): - raise ValueError(f"{field_name} must point to an existing file, got: {path!r}") - return str(candidate) - - -def _default_resource_path(package: str, name: str) -> str | None: - try: - resource = importlib.resources.files(package).joinpath(name) - except (FileNotFoundError, ModuleNotFoundError): - return None - path = Path(str(resource)) - return str(path) if path.is_file() else None - - -def _normalize_profile_name(value: Any) -> str | None: - normalized = _normalize_optional_text(value) - if normalized is None: - return None - if not _PROFILE_NAME_PATTERN.fullmatch(normalized): - raise ValueError( - "runtime.profile must contain only letters, digits, '.', '_', or '-'" - ) - return normalized - - -def _profile_resource_path(profile: str, filename: str) -> str: - profile_name = _normalize_profile_name(profile) - if profile_name is None: - raise ValueError("runtime.profile must be non-empty") - try: - resource = ( - importlib.resources.files("tensorcast") - .joinpath("config") - .joinpath("profiles") - .joinpath(profile_name) - .joinpath(filename) - ) - except (FileNotFoundError, ModuleNotFoundError) as exc: - raise ValueError( - f"Unknown TensorCast runtime config profile: {profile_name!r}" - ) from exc - path = Path(str(resource)) - if not path.is_file(): - raise ValueError(f"Unknown TensorCast runtime config profile: {profile_name!r}") - return str(path) - - -@dataclass(frozen=True) -class RuntimeConfigProfile: - name: str - daemon_config_path: str - global_store_config_path: str - - -def resolve_runtime_config_profile(profile: str) -> RuntimeConfigProfile: - profile_name = _normalize_profile_name(profile) - if profile_name is None: - raise ValueError("runtime.profile must be non-empty") - return RuntimeConfigProfile( - name=profile_name, - daemon_config_path=_profile_resource_path( - profile_name, "store_daemon_config.yaml" - ), - global_store_config_path=_profile_resource_path( - profile_name, "global_store_config.yaml" - ), - ) - - -class RuntimeDaemonSettings(BaseModel): - model_config = ConfigDict(frozen=True, extra="forbid") - - address: str | None = None - config_path: str | None = None - show_logs: bool = False - - @field_validator("address", "config_path", mode="before") - @classmethod - def _normalize_optional_fields(cls, value: Any) -> Any: - return _normalize_optional_text(value) - - -class RuntimeGlobalStoreSettings(BaseModel): - model_config = ConfigDict(frozen=True, extra="forbid") - - mode: str = "auto" - address: str | None = None - config_path: str | None = None - - @field_validator("mode", mode="before") - @classmethod - def _normalize_mode(cls, value: Any) -> str: - if value is None: - return "auto" - return _normalize_enum( - value, - allowed=_GLOBAL_STORE_MODES, - field_name="runtime.global_store.mode", - ) - - @field_validator("address", "config_path", mode="before") - @classmethod - def _normalize_optional_fields(cls, value: Any) -> Any: - return _normalize_optional_text(value) - - def resolved_mode(self, runtime_mode: str) -> str: - if self.mode != "auto": - return self.mode - if self.address is not None: - return "connect" - if self.config_path is not None: - return "start" - if runtime_mode in {"create", "auto"}: - return "start" - return "none" - - -class RuntimeSettings(BaseModel): - model_config = ConfigDict(frozen=True, extra="forbid") - - profile: str | None = DEFAULT_RUNTIME_PROFILE - mode: str = "auto" - daemon: RuntimeDaemonSettings = RuntimeDaemonSettings() - global_store: RuntimeGlobalStoreSettings = RuntimeGlobalStoreSettings() - - @field_validator("profile", mode="before") - @classmethod - def _normalize_profile(cls, value: Any) -> str | None: - return _normalize_profile_name(value) - - @field_validator("mode", mode="before") - @classmethod - def _normalize_mode(cls, value: Any) -> str: - if value is None: - return "auto" - return _normalize_enum( - value, - allowed=_RUNTIME_MODES, - field_name="runtime.mode", - ) - - @staticmethod - def _default_daemon_config_path() -> str | None: - return _default_resource_path("tensorcast", "daemon_config.yaml") - - @staticmethod - def _default_global_store_config_path() -> str | None: - return _default_resource_path("tensorcast", "global_store_config.yaml") - - def to_init_kwargs( - self, - *, - default_daemon_config_path: str | None = None, - default_global_store_config_path: str | None = None, - ) -> dict[str, Any]: - profile = ( - resolve_runtime_config_profile(self.profile) - if self.profile is not None - else None - ) - kwargs: dict[str, Any] = { - "mode": self.mode, - "show_daemon_logs": self.daemon.show_logs, - } - if self.daemon.address is not None: - kwargs["address"] = self.daemon.address - - daemon_config_path = self.daemon.config_path - if daemon_config_path is None and self.mode in {"create", "auto"}: - daemon_config_path = ( - profile.daemon_config_path - if profile is not None - else default_daemon_config_path or self._default_daemon_config_path() - ) - if daemon_config_path is not None: - kwargs["daemon_config_path"] = _validate_existing_file( - daemon_config_path, - field_name="runtime.daemon.config_path", - ) - elif self.mode in {"create", "auto"}: - raise ValueError( - "runtime.mode requires a daemon config file for create/auto; " - "set runtime.profile or runtime.daemon.config_path" - ) - - global_store_mode = self.global_store.resolved_mode(self.mode) - if global_store_mode != "none": - kwargs["global_store_mode"] = global_store_mode - if global_store_mode == "connect": - kwargs["global_store_address"] = ( - self.global_store.address or _DEFAULT_GLOBAL_STORE_ADDRESS - ) - elif global_store_mode == "start": - global_store_config_path = self.global_store.config_path - if global_store_config_path is None: - global_store_config_path = ( - profile.global_store_config_path - if profile is not None - else default_global_store_config_path - or self._default_global_store_config_path() - ) - if global_store_config_path is not None: - kwargs["global_store_config_path"] = _validate_existing_file( - global_store_config_path, - field_name="runtime.global_store.config_path", - ) - else: - raise ValueError( - "runtime.global_store.mode='start' requires a Global " - "Store config file; set runtime.profile or " - "runtime.global_store.config_path" - ) - - return kwargs - - def ensure_initialized( - self, - *, - default_daemon_config_path: str | None = None, - default_global_store_config_path: str | None = None, - ) -> None: - init_kwargs = self.to_init_kwargs( - default_daemon_config_path=default_daemon_config_path, - default_global_store_config_path=default_global_store_config_path, - ) - with _INIT_LOCK: - global _INIT_KWARGS - if tc.is_initialized(): - if _INIT_KWARGS is None: - raise RuntimeError( - "TensorCast runtime was already initialized outside " - "tensorcast.serving.RuntimeSettings." - ) - if init_kwargs != _INIT_KWARGS: - raise RuntimeError( - "TensorCast runtime already initialized with different " - "settings. Existing=" - f"{_INIT_KWARGS}, requested={init_kwargs}" - ) - return - tc.init(**init_kwargs) - _INIT_KWARGS = dict(init_kwargs) - - -__all__ = [ - "DEFAULT_RUNTIME_PROFILE", - "RuntimeConfigProfile", - "RuntimeDaemonSettings", - "RuntimeGlobalStoreSettings", - "RuntimeSettings", - "resolve_runtime_config_profile", -] diff --git a/tensorcast/serving/runtime_contract.py b/tensorcast/serving/runtime_contract.py deleted file mode 100644 index c8dd96e4..00000000 --- a/tensorcast/serving/runtime_contract.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (c) 2026, TensorCast Team. - -"""Source-bound serving runtime contract readiness helpers.""" - -from __future__ import annotations - -from dataclasses import dataclass -from typing import Any, Callable - -import tensorcast as tc -from tensorcast.types import SourceBoundCapability - -MIN_SOURCE_BOUND_CONTRACT_VERSION = 4 -SOURCE_BOUND_CONTRACT_PATH_COLLECTIVE_FIRST_V4 = "collective_first_v4" -REQUIRED_SOURCE_BOUND_CAPABILITIES = ( - SourceBoundCapability.FIRST_CLASS_COLLECTIVE_INGRESS, - SourceBoundCapability.TYPED_EXECUTION_DIAGNOSTICS, - SourceBoundCapability.SINGLE_MINT_BINDING_CLOSEOUT, -) - - -@dataclass(frozen=True) -class SourceBoundContractState: - server_config_present: bool - source_bound_contract_version: int - source_bound_capability_flags: int - source_bound_capability_names: tuple[str, ...] - source_bound_contract_ready: bool - - @classmethod - def unavailable(cls) -> SourceBoundContractState: - return cls( - server_config_present=False, - source_bound_contract_version=0, - source_bound_capability_flags=0, - source_bound_capability_names=(), - source_bound_contract_ready=False, - ) - - @classmethod - def from_server_config( - cls, - server_config: Any | None, - ) -> SourceBoundContractState: - if server_config is None: - return cls.unavailable() - flags = int(getattr(server_config, "source_bound_capability_flags", 0) or 0) - version = int(getattr(server_config, "source_bound_contract_version", 0) or 0) - capability_names = tuple( - str(capability.name) - for capability in SourceBoundCapability - if flags & int(capability) - ) - contract_ready = version >= MIN_SOURCE_BOUND_CONTRACT_VERSION and all( - flags & int(capability) for capability in REQUIRED_SOURCE_BOUND_CAPABILITIES - ) - return cls( - server_config_present=True, - source_bound_contract_version=version, - source_bound_capability_flags=flags, - source_bound_capability_names=capability_names, - source_bound_contract_ready=contract_ready, - ) - - -def read_source_bound_contract_state( - *, - store_fn: Callable[[], Any] | None = None, -) -> SourceBoundContractState: - try: - store = (store_fn or tc.store)() - capabilities = store.capabilities - server_config = getattr(capabilities, "server_config", None) - except Exception: - return SourceBoundContractState.unavailable() - return SourceBoundContractState.from_server_config(server_config) - - -def source_bound_contract_profile_fields( - state: SourceBoundContractState, - path: str, -) -> dict[str, object]: - return { - "source_bound_contract_version": int(state.source_bound_contract_version), - "source_bound_capability_flags": list(state.source_bound_capability_names), - "source_bound_contract_ready": bool(state.source_bound_contract_ready), - "source_bound_contract_path": path, - } - - -__all__ = [ - "MIN_SOURCE_BOUND_CONTRACT_VERSION", - "REQUIRED_SOURCE_BOUND_CAPABILITIES", - "SOURCE_BOUND_CONTRACT_PATH_COLLECTIVE_FIRST_V4", - "SourceBoundContractState", - "read_source_bound_contract_state", - "source_bound_contract_profile_fields", -] diff --git a/tensorcast/serving/runtime_intent.py b/tensorcast/serving/runtime_intent.py deleted file mode 100644 index 5e6c56c1..00000000 --- a/tensorcast/serving/runtime_intent.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (c) 2026, TensorCast Team. - -"""Serving runtime intent DTOs.""" - -from __future__ import annotations - -from collections.abc import Mapping -from dataclasses import dataclass, field -from typing import Any - -from tensorcast.serving.errors import AuthorityValidationError -from tensorcast.serving.hosts import RecipeCachePolicy, SourceSelector -from tensorcast.serving.policy import ServingArtifactLocator, ServingPolicy -from tensorcast.serving.retained_binding import ParsedRetainedServingBindingAuthority - - -@dataclass(frozen=True) -class BootstrapPolicy: - fields: Mapping[str, object] = field(default_factory=dict) - - -class ServingIntent: - """Marker base class for serving lifecycle intent DTOs.""" - - -@dataclass(frozen=True) -class ExistingServingArtifact(ServingIntent): - artifact_locator: ServingArtifactLocator | object - policy: ServingPolicy | object | None = None - - -@dataclass(frozen=True) -class LocalSourceBootstrap(ServingIntent): - source_selector: SourceSelector - bootstrap_policy: Any - cache_policy: RecipeCachePolicy | None = None - - -@dataclass(frozen=True) -class RetainedBindingAcquire(ServingIntent): - authority: ParsedRetainedServingBindingAuthority - - def __post_init__(self) -> None: - if not isinstance(self.authority, ParsedRetainedServingBindingAuthority): - raise AuthorityValidationError( - "RetainedBindingAcquire.authority must be " - "ParsedRetainedServingBindingAuthority" - ) - - -@dataclass(frozen=True) -class RequestContext: - framework_config: object | None = None - model_config: object | None = None - target_device: object | None = None - timeout_s: float | None = 30.0 - - -__all__ = [ - "BootstrapPolicy", - "ExistingServingArtifact", - "LocalSourceBootstrap", - "RequestContext", - "RetainedBindingAcquire", - "ServingIntent", -] diff --git a/tensorcast/serving/session.py b/tensorcast/serving/session.py deleted file mode 100644 index e947f089..00000000 --- a/tensorcast/serving/session.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) 2026, TensorCast Team. - -"""Common serving binding session state shell.""" - -from __future__ import annotations - -from typing import Any - -from pydantic import BaseModel, ConfigDict - -from tensorcast.serving.policy import ServingArtifactLocator -from tensorcast.types import BindingValueRef - - -class ServingBindingState(BaseModel): - model_config = ConfigDict(frozen=True, extra="forbid") - - state: str - artifact_locator: ServingArtifactLocator | None = None - serving_artifact_ref: str | None = None - manifest_ref: str | None = None - representation_contract_hash: str | None = None - serving_build_digest: str | None = None - binding_value_ref: BindingValueRef | None = None - local_serving_ref: str | None = None - readiness: str | None = None - updated_at: str | None = None - - def to_response(self) -> dict[str, Any]: - return self.model_dump(mode="python") diff --git a/tensorcast/serving/testing.py b/tensorcast/serving/testing.py deleted file mode 100644 index 3124b346..00000000 --- a/tensorcast/serving/testing.py +++ /dev/null @@ -1,1115 +0,0 @@ -# Copyright (c) 2026, TensorCast Team. -"""Reusable conformance checks for framework serving integrations.""" - -from __future__ import annotations - -from collections.abc import Iterable, Mapping -from contextlib import contextmanager -from dataclasses import dataclass, field -from types import ModuleType, SimpleNamespace -from typing import Any, cast - -import torch - -import tensorcast as tc -from tensorcast.serving._runtime_impl import lifecycle as _integration -from tensorcast.serving.retained_binding import ( - ParsedRetainedServingBindingAuthority, - RetainedServingBindingExpectedDigests, -) - - -@dataclass(frozen=True) -class ConformanceResult: - """Result from a lightweight serving runtime conformance check.""" - - checks: Mapping[str, bool] = field(default_factory=dict) - messages: Mapping[str, str] = field(default_factory=dict) - level: str | None = None - - @property - def failed_checks(self) -> tuple[str, ...]: - return tuple(name for name, passed in self.checks.items() if not passed) - - def failure_summary(self) -> str: - failed = self.failed_checks - if not failed: - return "TensorCast serving conformance checks passed" - lines = [ - "TensorCast serving conformance checks failed" - + (f" for {self.level}" if self.level else "") - + ":" - ] - for name in failed: - message = self.messages.get(name, "No remediation hint available") - lines.append(f"- {name}: {message}") - return "\n".join(lines) - - def assert_passed(self) -> None: - if self.failed_checks: - raise AssertionError(self.failure_summary()) - - -def _result( - *, - level: str, - checks: Mapping[str, bool], - messages: Mapping[str, str], -) -> ConformanceResult: - result = ConformanceResult(checks=checks, messages=messages, level=level) - result.assert_passed() - return result - - -_PUBLIC_BOUNDARY_MESSAGES = { - "has_session": ( - "Expose ServingRuntimeSession from tensorcast.serving.runtime; Level 1 " - "frameworks should not construct lower-level lifecycle helpers." - ), - "has_attachment": ( - "Expose RuntimeAttachment as the framework-held lifecycle token." - ), - "has_request_context": ( - "Expose RequestContext so framework facts enter lifecycle calls through " - "one typed context object." - ), - "hides_admin_local_bootstrap": ( - "Keep admin/local-bootstrap override DTOs out of the framework runtime " - "module; route them through admin/offline surfaces." - ), - "hides_low_level_bind": ( - "Do not expose bind/swap/restore helpers from the runtime module; " - "frameworks should call ServingRuntimeSession.start/reload." - ), - "hides_projection_dtos": ( - "Runtime endpoint projection DTOs live in tensorcast.serving.runtime_view." - ), - "hides_state_helpers": ( - "Model attribute helpers live in tensorcast.serving.state." - ), -} - -_FRAMEWORK_ISOLATION_MESSAGES = { - "no_vllm_imports": ( - "Reference and conformance frameworks must not import vLLM. Move any " - "needed generic fact extraction into TensorCast hosts or testing helpers." - ), - "no_internal_runtime_imports": ( - "Framework examples should not import TensorCast private/internal " - "runtime modules." - ), -} - - -def assert_public_runtime_boundary(runtime_module: ModuleType) -> ConformanceResult: - """Check that runtime imports expose framework APIs, not admin helpers.""" - - public_names = set(getattr(runtime_module, "__all__", ())) - checks = { - "has_session": "ServingRuntimeSession" in public_names, - "has_attachment": "RuntimeAttachment" in public_names, - "has_request_context": "RequestContext" in public_names, - "hides_admin_local_bootstrap": "AdminLocalSourceBootstrap" not in public_names - and "_AdminLocalSourceBootstrap" not in public_names, - "hides_low_level_bind": "bind_serving_artifact" not in public_names - and "swap_serving_artifact" not in public_names - and "restore_retained_binding" not in public_names, - "hides_projection_dtos": { - "PublishedReplicaProjection", - "ReloadResponseProjection", - "RuntimeEndpointProjection", - "SourceSelectionProjection", - "WeightVersionProjection", - }.isdisjoint(public_names), - "hides_state_helpers": { - "ModelAttributeRuntimeState", - "RuntimeAttachmentRecord", - "RuntimeAttachmentStore", - }.isdisjoint(public_names), - } - return _result( - level="public-runtime-boundary", - checks=checks, - messages=_PUBLIC_BOUNDARY_MESSAGES, - ) - - -def assert_framework_isolation(module_names: Iterable[str]) -> ConformanceResult: - """Check that a fake/reference framework avoids vLLM imports.""" - - names = tuple(str(name) for name in module_names) - checks = { - "no_vllm_imports": not any( - name == "vllm" or name.startswith("vllm.") for name in names - ), - "no_internal_runtime_imports": not any( - name.startswith("tensorcast.serving.internal") for name in names - ), - } - return _result( - level="framework-isolation", - checks=checks, - messages=_FRAMEWORK_ISOLATION_MESSAGES, - ) - - -class FakeArtifactView: - def __init__(self, names: Iterable[str] = ()) -> None: - self.names = tuple(names) - - def bind(self, **kwargs: Any) -> "FakeBinding": - binding = FakeBinding() - binding.names = self.names - binding.bind_kwargs = kwargs - return binding - - -class FakeArtifact: - def subset(self, names: Iterable[str]) -> FakeArtifactView: - return FakeArtifactView(names) - - -class FakeBinding: - def __init__(self) -> None: - self.tensors = {"w": torch.ones((1,), dtype=torch.float16)} - self.binding_layout_id = "layout-1" - self.names: tuple[str, ...] = () - self.bind_kwargs: dict[str, Any] = {} - self.swapped: tuple[object, dict[str, Any]] | None = None - self.closed = False - - def swap(self, artifact: object, **kwargs: Any) -> "FakeBinding": - self.swapped = (artifact, kwargs) - self.tensors = {"w": torch.full((1,), 2.0, dtype=torch.float16)} - return self - - def close(self) -> None: - self.closed = True - - -class FakeRestoredRetainedBinding: - def __init__(self) -> None: - self.tensors = {"w": torch.ones((1,), dtype=torch.float16)} - self.binding_layout_id = "layout-1" - self.binding_value_ref = SimpleNamespace( - binding_id="binding-1", - binding_layout_id="layout-1", - binding_value_id="value-1", - seal_generation=1, - ) - self.reservation_bytes = 4096 - self.closed = False - self.transferred = False - - def transfer_to_runtime(self) -> object: - self.transferred = True - return SimpleNamespace(close=lambda: None) - - def close(self) -> None: - self.closed = True - - -def _retained_authority(runtime_module: ModuleType) -> object: - member = tc.ServingBindingMemberRef( - member_id="member-0", - member_index=0, - member_count=1, - group_id="group-1", - ) - binding_ref = tc.BindingValueRef( - binding_id="binding-1", - binding_layout_id="layout-1", - binding_value_id="value-1", - seal_generation=1, - ) - capability = tc.BindingReservationCapability( - capability_id="capability-1", - binding_value_ref=binding_ref, - daemon_id="daemon-1", - daemon_session_id="session-1", - device_uuid="gpu-0", - member=member, - reservation_bytes=4096, - scope_digest="scope-1", - ) - return ParsedRetainedServingBindingAuthority( - group_id="group-1", - local_serving_ref="binding-local:fake", - binding_value_ref=binding_ref, - reservation_capability=capability, - daemon_id="daemon-1", - daemon_session_id="session-1", - device_uuid="gpu-0", - member=member, - reservation_bytes=4096, - expected=RetainedServingBindingExpectedDigests( - target_layout_hash="layout-hash", - tensor_schema_hash="fake-schema", - serving_build_digest="build-digest", - resolved_spec_digest="spec-digest", - ), - readiness="serving_local_ready", - verification_state="local_only", - ) - - -class FakeRuntimeModel: - def __init__(self) -> None: - self.tensors = {"w": torch.empty((1,), dtype=torch.float16, device="meta")} - - -class FakeFrameworkHost: - def identity(self, model_config: object) -> _integration.FrameworkIdentity: - del model_config - return _integration.FrameworkIdentity( - framework_name="fakefw", - framework_version="fakefw-v1", - adapter_version="adapter-v1", - serving_abi_version="abi-v1", - ) - - def prepare_model_construction( - self, - framework_config: object | None, - model_config: object | None, - ) -> None: - del framework_config, model_config - - def build_meta_model( - self, - framework_config: object | None, - model_config: object | None, - ) -> FakeRuntimeModel: - del framework_config, model_config - return FakeRuntimeModel() - - def build_runtime_model( - self, - framework_config: object | None, - model_config: object | None, - target_device: object | None, - ) -> FakeRuntimeModel: - del framework_config, model_config, target_device - return FakeRuntimeModel() - - def assert_model_ready_for_runtime_binding( - self, - model: FakeRuntimeModel, - *, - context: object, - ) -> None: - del context - if "w" not in model.tensors: - raise AssertionError("fake model missing runtime tensor 'w'") - - def semantic_probes( - self, - model: FakeRuntimeModel, - model_config: object | None, - ) -> dict[str, object]: - del model, model_config - return {} - - -class FakePlacementHost: - def identity_facts( - self, - framework_config: object | None, - ) -> _integration.PlacementIdentityFacts: - del framework_config - return _integration.PlacementIdentityFacts( - tensor_parallel_rank=0, - tensor_parallel_size=1, - pipeline_parallel_rank=0, - pipeline_parallel_size=1, - data_parallel_rank=0, - data_parallel_size=1, - ) - - def admission_facts( - self, - framework_config: object | None, - ) -> _integration.PlacementAdmissionFacts: - del framework_config - return _integration.PlacementAdmissionFacts() - - def member_facts( - self, - framework_config: object | None, - ) -> _integration.PlacementMemberFacts: - del framework_config - return _integration.PlacementMemberFacts( - runtime_rank=0, - runtime_world_size=1, - member_id="member-0", - member_index=0, - member_count=1, - group_id_hint="group-1", - ) - - def execution_facts( - self, - framework_config: object | None, - ) -> _integration.MaterializationExecutionFacts: - del framework_config - return _integration.MaterializationExecutionFacts( - collective_rank=0, - collective_world_size=1, - tensor_parallel_ranks=(0,), - ) - - -class FakeTensorSurface: - def runtime_only_tensor_names(self, model: FakeRuntimeModel) -> tuple[str, ...]: - del model - return () - - def align_runtime_tensor_names( - self, - model: FakeRuntimeModel, - expected_names: Iterable[str], - ) -> int: - if set(expected_names) != set(model.tensors): - raise AssertionError("fake runtime tensor names do not match") - return 0 - - def collect_runtime_tensors( - self, - model: FakeRuntimeModel, - *, - remove_duplicate: bool = False, - ) -> dict[str, object]: - del remove_duplicate - return dict(model.tensors) - - def collect_runtime_tensor_view( - self, - tensors: Mapping[str, object], - ) -> tuple[object, ...]: - del tensors - return () - - def compute_runtime_tensor_schema_hash( - self, - tensors: Mapping[str, object], - *, - remove_duplicate: bool = False, - ) -> str: - del tensors, remove_duplicate - return "fake-schema" - - def attach_bound_tensors( - self, - model: FakeRuntimeModel, - tensors: Mapping[str, object], - *, - replace_meta_params: bool, - ) -> FakeRuntimeModel: - del replace_meta_params - model.tensors.update(cast(Mapping[str, torch.Tensor], tensors)) - return model - - def allocate_runtime_only_tensors( - self, - model: FakeRuntimeModel, - target_device: torch.device, - ) -> dict[str, object]: - del model, target_device - return {} - - def snapshot_tensor_invariants( - self, - tensors: Mapping[str, object], - ) -> tuple[str, ...]: - return tuple(sorted(tensors)) - - def validate_tensor_invariants( - self, - before: tuple[str, ...], - after: Mapping[str, object], - ) -> None: - if before != tuple(sorted(after)): - raise AssertionError("fake tensor invariants changed") - - -class SchemaMismatchTensorSurface(FakeTensorSurface): - def compute_runtime_tensor_schema_hash( - self, - tensors: Mapping[str, object], - *, - remove_duplicate: bool = False, - ) -> str: - del tensors, remove_duplicate - return "wrong-schema" - - -class FakeSourceHost: - def source_selector( - self, - framework_config: object | None, - model_config: object | None, - ) -> object: - del framework_config, model_config - return _integration.SourceSelector.local_path("/tmp/fakefw-model") - - def source_catalog_config( - self, - framework_config: object | None, - model_config: object | None, - ) -> None: - del framework_config, model_config - return None - - def recipe_cache_policy( - self, - framework_config: object | None, - model_config: object | None, - ) -> None: - del framework_config, model_config - return None - - -class FakeSourceCatalogProvider: - def __init__(self) -> None: - self.requests: list[object] = [] - - def build_catalog(self, request: object) -> object: - self.requests.append(request) - return SimpleNamespace( - source_artifact_ref=request.source_artifact_ref, - selected_files=(), - ) - - -class FakeServingArtifactResolver: - def resolve(self, artifact_ref: str) -> SimpleNamespace: - return SimpleNamespace( - artifact=FakeArtifact(), - artifact_ref=artifact_ref, - tensor_names=("w",), - manifest=SimpleNamespace( - representation_contract_hash=f"repr:{artifact_ref}", - source_artifact_ref="mi2:source", - serving_build_digest=f"build:{artifact_ref}", - ), - ) - - def cross_check( - self, - resolved_artifact: SimpleNamespace, - **kwargs: object, - ) -> SimpleNamespace: - del kwargs - return resolved_artifact - - -def build_fake_runtime_host(hosts_module: ModuleType) -> object: - """Build a minimal non-vLLM host for runtime conformance checks.""" - - return hosts_module.IntegrationHost( - framework=FakeFrameworkHost(), - placement=FakePlacementHost(), - tensor_surface=FakeTensorSurface(), - ) - - -_LEVEL1_MESSAGES = { - "direct_start": ( - "Durable serving artifact startup failed. Verify framework model " - "construction, tensor surface attach/schema behavior, placement facts, " - "and artifact resolver output." - ), - "runtime_initialized": ( - "ServingRuntimeSession.start did not initialize RuntimeSettings before " - "binding the serving artifact." - ), - "describe": ( - "ServingRuntimeSession.describe must return the typed RuntimeWorkerView " - "for the current attachment." - ), - "reload": ( - "Durable serving artifact reload failed. Level 1 reload must use a " - "typed ServingArtifactLocator and ServingPolicy." - ), - "reload_identity_from_runtime_view": ( - "Reload response identity must come from the runtime view, not from the " - "request payload." - ), - "source_capability_not_required": ( - "Level 1 direct serving artifact start/reload must not require SourceHost." - ), - "source_catalog_not_required": ( - "Level 1 direct serving artifact start/reload must not require " - "SourceCatalogProvider." - ), - "rejects_local_reload_artifact_locator": ( - "Reload must reject local source selectors; local paths belong to " - "Level 2 bootstrap, not durable serving artifact reload." - ), - "rejects_untyped_reload_artifact_locator": ( - "Reload must reject untyped artifact locator dictionaries on the public " - "runtime path. Use ServingArtifactLocator." - ), - "rejects_untyped_reload_policy": ( - "Reload must reject untyped policy dictionaries on the public runtime " - "path. Use ServingPolicy." - ), -} - -_LEVEL2_MESSAGES = { - "missing_source_catalog_fails_closed": ( - "Local bootstrap requires a SourceCatalogProvider; TensorCast core owns " - "source identity and catalog request construction." - ), - "source_catalog_request_core_owned": ( - "Source catalog providers must receive a core-owned SourceCatalogRequest " - "with typed source selector and source artifact identity." - ), - "recipe_build_receives_core_catalog": ( - "Recipe build should consume the core source catalog, not framework " - "private catalog state." - ), - "missing_trace_capability_is_explicit": ( - "Cache-miss local bootstrap must fail with a clear missing trace/native " - "load capability instead of AttributeError or fallback loading." - ), - "local_path_is_not_reload_artifact_locator": ( - "Local path selectors must stay in bootstrap; reload accepts only durable " - "serving artifact locators." - ), -} - -_LEVEL3_MESSAGES = { - "retained_acquire_public_start": ( - "Retained binding acquire must enter through ServingRuntimeSession.start and " - "return a RuntimeAttachment with typed endpoint projection." - ), - "retained_acquire_uses_host_member": ( - "Retained acquire must validate authority member facts against the " - "framework placement host." - ), - "retained_acquire_transfers_ownership": ( - "Retained binding ownership must transfer into TensorCast runtime state " - "only after attach/finalize succeeds." - ), - "missing_authority_fails_closed": ( - "Retained binding acquire config must include typed retained authority." - ), - "authority_mismatch_fails_closed": ( - "Daemon/session/member authority mismatches must fail closed." - ), - "failure_cleanup_closes_untransferred_handle": ( - "Attach/finalize failure must close an untransferred retained handle." - ), - "failure_path_used_retained_restore": ( - "Retained binding failure coverage did not exercise restore ownership." - ), - "rejects_arbitrary_retained_authority": ( - "Retained acquire must reject arbitrary authority objects; use the parsed " - "retained serving binding authority." - ), -} - - -def _retained_binding_acquire_config(runtime_module: ModuleType) -> dict[str, Any]: - authority = _retained_authority(runtime_module) - return { - "retained_binding_acquire": { - "mode": "external", - "authority": { - "group_id": authority.group_id, - "member_ref": authority.member.model_dump(mode="python"), - "daemon_id": authority.daemon_id, - "daemon_session_id": authority.daemon_session_id, - "device_uuid": authority.device_uuid, - "binding_value_ref": ( - authority.binding_value_ref.model_dump(mode="python") - ), - "reservation_capability": ( - authority.reservation_capability.model_dump(mode="python") - ), - "local_serving_ref": authority.local_serving_ref, - "readiness": authority.readiness, - "verification_state": authority.verification_state, - "serving_artifact_id": authority.serving_artifact_id, - "trusted_reservation_bytes": authority.reservation_bytes, - "expected": { - "target_layout_hash": authority.expected.target_layout_hash, - "tensor_schema_hash": authority.expected.tensor_schema_hash, - "serving_build_digest": authority.expected.serving_build_digest, - "resolved_spec_digest": authority.expected.resolved_spec_digest, - }, - }, - }, - } - - -@contextmanager -def _patched_fake_runtime(runtime_module: ModuleType): - integration_module = cast(Any, _integration) - original_ensure_initialized = runtime_module.RuntimeSettings.ensure_initialized - original_contract_reader = integration_module.read_source_bound_contract_state - original_materialization_options = ( - integration_module.ServingIntegration.build_materialization_options - ) - initialized: list[object] = [] - - def ensure_initialized(self) -> None: - initialized.append(self) - - runtime_module.RuntimeSettings.ensure_initialized = ensure_initialized - integration_module.read_source_bound_contract_state = lambda: SimpleNamespace( - source_bound_contract_ready=True, - source_bound_contract_version=4, - source_bound_capability_names=("collective",), - ) - integration_module.ServingIntegration.build_materialization_options = ( - lambda self, **kwargs: ("fake-materialization-options", kwargs) - ) - try: - yield initialized - finally: - runtime_module.RuntimeSettings.ensure_initialized = original_ensure_initialized - integration_module.read_source_bound_contract_state = original_contract_reader - integration_module.ServingIntegration.build_materialization_options = ( - original_materialization_options - ) - - -def assert_level1_runtime_conformance( - runtime_module: ModuleType, - hosts_module: ModuleType, - *, - host: object | None = None, -) -> ConformanceResult: - """Run Level 1 durable serving artifact runtime conformance. - - The suite intentionally uses only ``tensorcast.serving.runtime`` and - ``tensorcast.serving.hosts`` plus this testing module's fake host fixtures. - It covers direct artifact start, reload, describe, capability optionality, - strict public DTO rejection and no-vLLM-import contracts. It does not - instantiate local bootstrap or retained binding acquire intent DTOs. - """ - - checks: dict[str, bool] = {} - assert_public_runtime_boundary(runtime_module) - assert_framework_isolation( - (runtime_module.__name__, hosts_module.__name__, __name__) - ) - - with _patched_fake_runtime(runtime_module) as initialized: - host = host if host is not None else build_fake_runtime_host(hosts_module) - session = runtime_module.ServingRuntimeSession.from_config( - { - "bootstrap": { - "mode": "disabled", - }, - "serving": { - "artifact_locator": { - "kind": "artifact_ref", - "value": "mi2:serving", - }, - }, - }, - host=host, - resolver=FakeServingArtifactResolver(), - ) - attachment = session.start( - runtime_module.RequestContext( - framework_config=SimpleNamespace(), - model_config=SimpleNamespace(model="fake-model"), - target_device=torch.device("cuda:0"), - ) - ) - direct_payload = attachment.view.endpoint.to_weight_version_payload() - checks["direct_start"] = ( - direct_payload.get("serving_artifact_ref") == "mi2:serving" - and direct_payload.get("source_artifact_ref") == "mi2:source" - ) - checks["runtime_initialized"] = bool(initialized) - - described = session.describe(attachment) - checks["describe"] = ( - described.endpoint.to_weight_version_payload().get("serving_artifact_ref") - == "mi2:serving" - ) - - reloaded = session.reload( - current_attachment=attachment, - artifact_locator=runtime_module.ServingArtifactLocator.artifact_ref( - "mi2:serving-next" - ), - policy=runtime_module.ServingPolicy(), - context=runtime_module.RequestContext( - framework_config=SimpleNamespace(), - model_config=SimpleNamespace(model="fake-model"), - ), - model=attachment.model, - ) - reload_response = reloaded.view.endpoint.to_reload_response_payload() - checks["reload"] = ( - reload_response.get("serving_artifact_ref") == "mi2:serving-next" - ) - checks["reload_identity_from_runtime_view"] = ( - reloaded.state.runtime_view.serving_artifact_ref - == reload_response.get("serving_artifact_ref") - ) - checks["source_capability_not_required"] = True - checks["source_catalog_not_required"] = True - - try: - session.reload( - current_attachment=reloaded, - artifact_locator=runtime_module.SourceSelector.local_path("/tmp/model"), - policy=runtime_module.ServingPolicy(), - context=runtime_module.RequestContext(), - ) - except _integration.ConfigConflictError: - checks["rejects_local_reload_artifact_locator"] = True - else: - checks["rejects_local_reload_artifact_locator"] = False - - try: - session.reload( - current_attachment=reloaded, - artifact_locator={ - "kind": "artifact_ref", - "value": "mi2:serving-next", - }, - policy=runtime_module.ServingPolicy(), - context=runtime_module.RequestContext(), - ) - except _integration.ConfigConflictError: - checks["rejects_untyped_reload_artifact_locator"] = True - else: - checks["rejects_untyped_reload_artifact_locator"] = False - - try: - session.reload( - current_attachment=reloaded, - artifact_locator=runtime_module.ServingArtifactLocator.artifact_ref( - "mi2:serving-next" - ), - policy={"mode": "from_manifest"}, - context=runtime_module.RequestContext(), - ) - except _integration.ConfigConflictError: - checks["rejects_untyped_reload_policy"] = True - else: - checks["rejects_untyped_reload_policy"] = False - - return _result(level="level1-runtime", checks=checks, messages=_LEVEL1_MESSAGES) - - -def assert_level2_local_bootstrap_conformance( - runtime_module: ModuleType, - hosts_module: ModuleType, -) -> ConformanceResult: - """Run Level 2 local source bootstrap planning conformance.""" - - checks: dict[str, bool] = {} - with _patched_fake_runtime(runtime_module): - integration_module = cast(Any, _integration) - host_without_catalog = hosts_module.IntegrationHost( - framework=FakeFrameworkHost(), - placement=FakePlacementHost(), - tensor_surface=FakeTensorSurface(), - source=FakeSourceHost(), - ) - session = runtime_module.ServingRuntimeSession.from_config( - { - "bootstrap": { - "mode": "required", - }, - }, - host=host_without_catalog, - ) - original_resolve_source_subject = ( - integration_module.ServingIntegration.resolve_source_subject - ) - - def fake_resolve_source_subject(self, selector, **kwargs): - del self, selector, kwargs - return _integration.SourceSubject( - artifact_ref="mi2:source", - subject=SimpleNamespace(), - source_kind="fake", - metadata_fingerprint="meta", - ) - - integration_module.ServingIntegration.resolve_source_subject = ( - fake_resolve_source_subject - ) - try: - try: - session.start( - runtime_module.RequestContext( - framework_config=SimpleNamespace(), - model_config=SimpleNamespace(model="fake-model"), - target_device=torch.device("cuda:0"), - ) - ) - except _integration.CapabilityMissingError as exc: - checks["missing_source_catalog_fails_closed"] = "source_catalog" in str( - exc - ) - else: - checks["missing_source_catalog_fails_closed"] = False - - catalog_provider = FakeSourceCatalogProvider() - host_with_catalog = hosts_module.IntegrationHost( - framework=FakeFrameworkHost(), - placement=FakePlacementHost(), - tensor_surface=FakeTensorSurface(), - source=FakeSourceHost(), - source_catalog=catalog_provider, - ) - session_with_catalog = runtime_module.ServingRuntimeSession.from_config( - { - "bootstrap": { - "mode": "required", - }, - }, - host=host_with_catalog, - ) - original_build_recipe = integration_module.RecipeBuildSession.build_recipe - captured_builds: list[Mapping[str, object]] = [] - - def fake_build_recipe(self, **kwargs): - del self - captured_builds.append(kwargs) - kwargs["framework_adapter"].trace_model_load( - FakeRuntimeModel(), - ["w"], - {"w": SimpleNamespace(name="w")}, - ) - - integration_module.RecipeBuildSession.build_recipe = fake_build_recipe - try: - try: - session_with_catalog.start( - runtime_module.RequestContext( - framework_config=SimpleNamespace(), - model_config=SimpleNamespace(model="fake-model"), - target_device=torch.device("cuda:0"), - ) - ) - except _integration.CapabilityMissingError as exc: - checks["missing_trace_capability_is_explicit"] = ( - "RecipeTraceHost" in str(exc) or "trace_model_load" in str(exc) - ) - else: - checks["missing_trace_capability_is_explicit"] = False - finally: - integration_module.RecipeBuildSession.build_recipe = ( - original_build_recipe - ) - finally: - integration_module.ServingIntegration.resolve_source_subject = ( - original_resolve_source_subject - ) - - catalog_request = ( - catalog_provider.requests[0] if catalog_provider.requests else None - ) - checks["source_catalog_request_core_owned"] = ( - catalog_request is not None - and getattr(catalog_request, "source_artifact_ref", None) == "mi2:source" - and isinstance( - getattr(catalog_request, "source_selector", None), - _integration.SourceSelector, - ) - ) - checks["recipe_build_receives_core_catalog"] = ( - bool(captured_builds) - and captured_builds[0].get("source_catalog") is not None - ) - - attachment = runtime_module.RuntimeAttachment( - model=object(), - state=_integration.RuntimeBindingState( - runtime_view=_integration.RuntimeBindingView() - ), - view=runtime_module.RuntimeWorkerView.from_runtime_view( - _integration.RuntimeBindingView() - ), - ) - try: - session.reload( - current_attachment=attachment, - artifact_locator=runtime_module.SourceSelector.local_path( - "/tmp/fakefw-model" - ), - policy=runtime_module.ServingPolicy(), - context=runtime_module.RequestContext(), - ) - except _integration.ConfigConflictError: - checks["local_path_is_not_reload_artifact_locator"] = True - else: - checks["local_path_is_not_reload_artifact_locator"] = False - - return _result( - level="level2-local-bootstrap", - checks=checks, - messages=_LEVEL2_MESSAGES, - ) - - -def assert_level3_retained_binding_conformance( - runtime_module: ModuleType, - hosts_module: ModuleType, -) -> ConformanceResult: - """Run Level 3 retained binding acquire conformance.""" - - checks: dict[str, bool] = {} - with _patched_fake_runtime(runtime_module): - integration_module = cast(Any, _integration) - host = build_fake_runtime_host(hosts_module) - retained_calls: list[Mapping[str, object]] = [] - restored = FakeRestoredRetainedBinding() - original_restore_retained = integration_module.restore_retained_binding - - @contextmanager - def fake_restore_retained(**kwargs: object): - retained_calls.append(kwargs) - yield restored - - integration_module.restore_retained_binding = fake_restore_retained - try: - session = runtime_module.ServingRuntimeSession.from_config( - _retained_binding_acquire_config(runtime_module), - host=host, - ) - retained = session.start( - runtime_module.RequestContext( - framework_config=SimpleNamespace(), - model_config=SimpleNamespace(model="fake-model"), - target_device=torch.device("cuda:0"), - ) - ) - finally: - integration_module.restore_retained_binding = original_restore_retained - retained_payload = retained.view.endpoint.to_weight_version_payload() - checks["retained_acquire_public_start"] = ( - retained_payload.get("local_serving_ref") == "binding-local:fake" - and retained_payload.get("binding_value_ref", {}).get("binding_value_id") - == "value-1" - ) - checks["retained_acquire_uses_host_member"] = ( - bool(retained_calls) - and getattr(retained_calls[0].get("expected_member"), "member_index", None) - == 0 - ) - checks["retained_acquire_transfers_ownership"] = restored.transferred - - try: - runtime_module.ServingConfig.from_mapping( - { - "retained_binding_acquire": { - "mode": "external", - }, - } - ) - except Exception: - checks["missing_authority_fails_closed"] = True - else: - checks["missing_authority_fails_closed"] = False - - mismatch_config = dict(_retained_binding_acquire_config(runtime_module)) - acquire = dict(mismatch_config["retained_binding_acquire"]) - authority = dict(acquire["authority"]) - capability = dict(authority["reservation_capability"]) - capability["daemon_session_id"] = "wrong-session" - authority["reservation_capability"] = capability - acquire["authority"] = authority - mismatch_config["retained_binding_acquire"] = acquire - try: - mismatch_session = runtime_module.ServingRuntimeSession.from_config( - mismatch_config, - host=host, - ) - mismatch_session.start( - runtime_module.RequestContext( - framework_config=SimpleNamespace(), - model_config=SimpleNamespace(model="fake-model"), - target_device=torch.device("cuda:0"), - ) - ) - except Exception: - checks["authority_mismatch_fails_closed"] = True - else: - checks["authority_mismatch_fails_closed"] = False - - failing_host = hosts_module.IntegrationHost( - framework=FakeFrameworkHost(), - placement=FakePlacementHost(), - tensor_surface=SchemaMismatchTensorSurface(), - ) - failing_restored = FakeRestoredRetainedBinding() - failure_calls: list[Mapping[str, object]] = [] - - @contextmanager - def fake_restore_for_failure(**kwargs: object): - failure_calls.append(kwargs) - yield failing_restored - - integration_module.restore_retained_binding = fake_restore_for_failure - try: - failing_session = runtime_module.ServingRuntimeSession.from_config( - _retained_binding_acquire_config(runtime_module), - host=failing_host, - ) - try: - failing_session.start( - runtime_module.RequestContext( - framework_config=SimpleNamespace(), - model_config=SimpleNamespace(model="fake-model"), - target_device=torch.device("cuda:0"), - ) - ) - except _integration.SchemaMismatchError: - checks["failure_cleanup_closes_untransferred_handle"] = ( - failing_restored.closed and not failing_restored.transferred - ) - else: - checks["failure_cleanup_closes_untransferred_handle"] = False - finally: - integration_module.restore_retained_binding = original_restore_retained - - checks["failure_path_used_retained_restore"] = bool(failure_calls) - - try: - runtime_module.RetainedBindingAcquire(SimpleNamespace()) - except _integration.AuthorityValidationError: - checks["rejects_arbitrary_retained_authority"] = True - else: - checks["rejects_arbitrary_retained_authority"] = False - - return _result( - level="level3-retained-binding", - checks=checks, - messages=_LEVEL3_MESSAGES, - ) - - -__all__ = [ - "ConformanceResult", - "FakeArtifact", - "FakeArtifactView", - "FakeBinding", - "FakeFrameworkHost", - "FakePlacementHost", - "FakeRestoredRetainedBinding", - "FakeRuntimeModel", - "FakeServingArtifactResolver", - "FakeSourceCatalogProvider", - "FakeSourceHost", - "FakeTensorSurface", - "SchemaMismatchTensorSurface", - "assert_framework_isolation", - "assert_level1_runtime_conformance", - "assert_level2_local_bootstrap_conformance", - "assert_level3_retained_binding_conformance", - "assert_public_runtime_boundary", - "build_fake_runtime_host", -] diff --git a/tensorcast/types.py b/tensorcast/types.py index 8cef482d..4d22d17d 100644 --- a/tensorcast/types.py +++ b/tensorcast/types.py @@ -667,6 +667,9 @@ class SealAssemblyResult(BaseModel): _ASSEMBLY_CANONICAL_COVERAGE_CONTRACT = "canonical_full" SERVING_MANIFEST_TENSOR_NAME = "__tensorcast_meta__.manifest_json" SERVING_BUILD_DIGEST_VERSION = "tensorcast.serving_build_digest.v1" +# Serving-manifest names below are persisted publication/wire ABI, not a +# separate runtime source authority. Runtime-facing DTOs use artifact/runtime +# names and map to these fields only at serialization boundaries. def _canonical_json_bytes(payload: object) -> bytes: @@ -718,7 +721,7 @@ class FinalizeClass(str, Enum): UNKNOWN_BLOCKED = "unknown_blocked" -class ServingSupportLevel(str, Enum): +class RuntimeSupportLevel(str, Enum): BLOCKED = "blocked" SOURCE_BIND_BOOTSTRAP_ONLY = "source_bind_bootstrap_only" BUILDER_PUBLICATION_READY = "builder_publication_ready" @@ -749,30 +752,30 @@ class ServingSupportLevel(str, Enum): FinalizeClass.UNKNOWN_BLOCKED ), } -_PUBLICATION_SERVING_SUPPORT_LEVEL_TO_PROTO: dict[ - ServingSupportLevel, publication_pb2.ServingSupportLevel +_PUBLICATION_RUNTIME_SUPPORT_LEVEL_TO_PROTO: dict[ + RuntimeSupportLevel, publication_pb2.ServingSupportLevel ] = { - ServingSupportLevel.BLOCKED: publication_pb2.SERVING_SUPPORT_LEVEL_BLOCKED, - ServingSupportLevel.SOURCE_BIND_BOOTSTRAP_ONLY: ( + RuntimeSupportLevel.BLOCKED: publication_pb2.SERVING_SUPPORT_LEVEL_BLOCKED, + RuntimeSupportLevel.SOURCE_BIND_BOOTSTRAP_ONLY: ( publication_pb2.SERVING_SUPPORT_LEVEL_SOURCE_BIND_BOOTSTRAP_ONLY ), - ServingSupportLevel.BUILDER_PUBLICATION_READY: ( + RuntimeSupportLevel.BUILDER_PUBLICATION_READY: ( publication_pb2.SERVING_SUPPORT_LEVEL_BUILDER_PUBLICATION_READY ), - ServingSupportLevel.RUNTIME_BIND_SWAP_READY: ( + RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY: ( publication_pb2.SERVING_SUPPORT_LEVEL_RUNTIME_BIND_SWAP_READY ), } -_PUBLICATION_SERVING_SUPPORT_LEVEL_FROM_PROTO: dict[int, ServingSupportLevel] = { - int(publication_pb2.SERVING_SUPPORT_LEVEL_BLOCKED): ServingSupportLevel.BLOCKED, +_PUBLICATION_RUNTIME_SUPPORT_LEVEL_FROM_PROTO: dict[int, RuntimeSupportLevel] = { + int(publication_pb2.SERVING_SUPPORT_LEVEL_BLOCKED): RuntimeSupportLevel.BLOCKED, int(publication_pb2.SERVING_SUPPORT_LEVEL_SOURCE_BIND_BOOTSTRAP_ONLY): ( - ServingSupportLevel.SOURCE_BIND_BOOTSTRAP_ONLY + RuntimeSupportLevel.SOURCE_BIND_BOOTSTRAP_ONLY ), int(publication_pb2.SERVING_SUPPORT_LEVEL_BUILDER_PUBLICATION_READY): ( - ServingSupportLevel.BUILDER_PUBLICATION_READY + RuntimeSupportLevel.BUILDER_PUBLICATION_READY ), int(publication_pb2.SERVING_SUPPORT_LEVEL_RUNTIME_BIND_SWAP_READY): ( - ServingSupportLevel.RUNTIME_BIND_SWAP_READY + RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY ), } _PUBLICATION_ASSEMBLY_TARGET_KIND_TO_PROTO: dict[ @@ -1122,7 +1125,7 @@ def from_publication_proto( ) -class ServingBuildIntent(BaseModel): +class RuntimeArtifactBuildIntent(BaseModel): model_config = ConfigDict(frozen=True) representation_contract_hash: str | None = None @@ -1134,7 +1137,7 @@ class ServingBuildIntent(BaseModel): source_artifact_ref: str | None = None @model_validator(mode="after") - def _validate_fields(self) -> "ServingBuildIntent": + def _validate_fields(self) -> "RuntimeArtifactBuildIntent": if ( self.representation_contract_hash is not None and not self.representation_contract_hash @@ -1182,7 +1185,7 @@ def to_publication_proto(self) -> publication_pb2.ServingBuildIntent: def from_publication_proto( cls, proto: publication_pb2.ServingBuildIntent, - ) -> "ServingBuildIntent": + ) -> "RuntimeArtifactBuildIntent": builder_mode = BuilderMode.PURE_TRANSFORM if int(proto.builder_mode) != int(publication_pb2.BUILDER_MODE_UNSPECIFIED): builder_mode = _PUBLICATION_BUILDER_MODE_FROM_PROTO[int(proto.builder_mode)] @@ -1202,7 +1205,7 @@ def from_publication_proto( class PureTransformPublicationSpec(BaseModel): model_config = ConfigDict(frozen=True) - build_intent: ServingBuildIntent + build_intent: RuntimeArtifactBuildIntent contract_family: AssemblyContractFamily | None = None source_version_key: str | None = None serving_version_key: str | None = None @@ -1212,7 +1215,7 @@ class PureTransformPublicationSpec(BaseModel): requirements: AssemblyRequirementSetRef | None = None readiness_policy: AssemblyReadinessPolicy | None = None structural_view_ids: tuple[str, ...] = () - admission_facts: ServingAdmissionFacts | None = None + admission_facts: RuntimeAdmissionFacts | None = None @model_validator(mode="after") def _validate_publication_spec(self) -> "PureTransformPublicationSpec": @@ -1262,7 +1265,9 @@ def from_proto( proto: publication_pb2.PureTransformPublicationSpec, ) -> "PureTransformPublicationSpec": return cls( - build_intent=ServingBuildIntent.from_publication_proto(proto.build_intent), + build_intent=RuntimeArtifactBuildIntent.from_publication_proto( + proto.build_intent + ), contract_family=cast( AssemblyContractFamily | None, str(proto.contract_family or "") or None, @@ -1284,18 +1289,18 @@ def from_proto( ), structural_view_ids=tuple(str(item) for item in proto.structural_view_ids), admission_facts=( - ServingAdmissionFacts.from_publication_proto(proto.admission_facts) + RuntimeAdmissionFacts.from_publication_proto(proto.admission_facts) if proto.HasField("admission_facts") else None ), ) -class ServingAdmissionFacts(BaseModel): +class RuntimeAdmissionFacts(BaseModel): model_config = ConfigDict(frozen=True) finalize_class: FinalizeClass - support_level: ServingSupportLevel + support_level: RuntimeSupportLevel topology_admission_digest: str | None = None same_binding_fast_path_validated: bool = False @@ -1307,7 +1312,7 @@ def _empty_digest_is_none(cls, value: object) -> object: return value @model_validator(mode="after") - def _validate_admission_facts(self) -> "ServingAdmissionFacts": + def _validate_admission_facts(self) -> "RuntimeAdmissionFacts": if ( self.finalize_class == FinalizeClass.REPRESENTATION_CHANGING and not self.same_binding_fast_path_validated @@ -1324,8 +1329,8 @@ def validate_for_representation_publish(self, *, builder_mode: BuilderMode) -> N "representation publish requires a non-blocked finalize_class" ) if self.support_level not in { - ServingSupportLevel.BUILDER_PUBLICATION_READY, - ServingSupportLevel.RUNTIME_BIND_SWAP_READY, + RuntimeSupportLevel.BUILDER_PUBLICATION_READY, + RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY, }: raise ValueError( "representation publish requires support_level to admit builder publication" @@ -1354,12 +1359,12 @@ def validate_for_representation_publish(self, *, builder_mode: BuilderMode) -> N def admits_builder_publication(self) -> bool: return self.support_level in { - ServingSupportLevel.BUILDER_PUBLICATION_READY, - ServingSupportLevel.RUNTIME_BIND_SWAP_READY, + RuntimeSupportLevel.BUILDER_PUBLICATION_READY, + RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY, } def admits_runtime_bind_swap(self) -> bool: - return self.support_level == ServingSupportLevel.RUNTIME_BIND_SWAP_READY + return self.support_level == RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY def require_runtime_bind_swap_ready(self) -> None: if not self.admits_runtime_bind_swap(): @@ -1376,7 +1381,7 @@ def require_serving_key_activation_ready(self) -> None: def to_publication_proto(self) -> publication_pb2.ServingAdmissionFacts: proto = publication_pb2.ServingAdmissionFacts( finalize_class=_PUBLICATION_FINALIZE_CLASS_TO_PROTO[self.finalize_class], - support_level=_PUBLICATION_SERVING_SUPPORT_LEVEL_TO_PROTO[ + support_level=_PUBLICATION_RUNTIME_SUPPORT_LEVEL_TO_PROTO[ self.support_level ], same_binding_fast_path_validated=bool( @@ -1391,18 +1396,18 @@ def to_publication_proto(self) -> publication_pb2.ServingAdmissionFacts: def from_publication_proto( cls, proto: publication_pb2.ServingAdmissionFacts, - ) -> "ServingAdmissionFacts": + ) -> "RuntimeAdmissionFacts": if int(proto.finalize_class) == int(publication_pb2.FINALIZE_CLASS_UNSPECIFIED): - raise ValueError("ServingAdmissionFacts.finalize_class must be specified") + raise ValueError("RuntimeAdmissionFacts.finalize_class must be specified") if int(proto.support_level) == int( publication_pb2.SERVING_SUPPORT_LEVEL_UNSPECIFIED ): - raise ValueError("ServingAdmissionFacts.support_level must be specified") + raise ValueError("RuntimeAdmissionFacts.support_level must be specified") return cls( finalize_class=_PUBLICATION_FINALIZE_CLASS_FROM_PROTO[ int(proto.finalize_class) ], - support_level=_PUBLICATION_SERVING_SUPPORT_LEVEL_FROM_PROTO[ + support_level=_PUBLICATION_RUNTIME_SUPPORT_LEVEL_FROM_PROTO[ int(proto.support_level) ], topology_admission_digest=( @@ -1414,7 +1419,7 @@ def from_publication_proto( ) -class ServingArtifactManifest(BaseModel): +class RuntimeArtifactManifest(BaseModel): model_config = ConfigDict(frozen=True) schema_version: int = 1 @@ -1435,7 +1440,7 @@ class ServingArtifactManifest(BaseModel): topology_admission_digest: str | None = None @model_validator(mode="after") - def _validate_manifest(self) -> "ServingArtifactManifest": + def _validate_manifest(self) -> "RuntimeArtifactManifest": if self.schema_version <= 0: raise ValueError("schema_version must be positive") if self.artifact_kind != "serving": @@ -1465,14 +1470,14 @@ def _validate_manifest(self) -> "ServingArtifactManifest": def from_build_intent( cls, *, - intent: ServingBuildIntent, + intent: RuntimeArtifactBuildIntent, representation_contract_hash: str | None = None, tensor_schema_hash: str, canonical_tensor_count: int, serving_manifest_ref: str | None = None, logical_topology_json: str | None = None, topology_admission_digest: str | None = None, - ) -> "ServingArtifactManifest": + ) -> "RuntimeArtifactManifest": resolved_representation_contract_hash = ( representation_contract_hash or intent.representation_contract_hash ) @@ -1505,7 +1510,7 @@ def to_bytes(self) -> bytes: return _canonical_json_bytes(self.model_dump(mode="json")) @classmethod - def from_bytes(cls, payload: bytes | bytearray | str) -> "ServingArtifactManifest": + def from_bytes(cls, payload: bytes | bytearray | str) -> "RuntimeArtifactManifest": raw = ( payload.decode("utf-8") if isinstance(payload, (bytes, bytearray)) @@ -1517,8 +1522,8 @@ def to_runtime_policy( self, *, require_manifest: bool = True, - ) -> "ServingRuntimePolicy": - return ServingRuntimePolicy( + ) -> "RuntimeArtifactPolicy": + return RuntimeArtifactPolicy( require_manifest=bool(require_manifest), serving_manifest_ref=str(self.serving_manifest_ref), expected_representation_contract_hash=str( @@ -1533,7 +1538,7 @@ def to_runtime_policy( ) -class ServingRuntimePolicy(BaseModel): +class RuntimeArtifactPolicy(BaseModel): model_config = ConfigDict(frozen=True) require_manifest: bool = True @@ -1543,7 +1548,7 @@ class ServingRuntimePolicy(BaseModel): expected_topology_admission_digest: str | None = None @model_validator(mode="after") - def _validate_policy(self) -> "ServingRuntimePolicy": + def _validate_policy(self) -> "RuntimeArtifactPolicy": if self.serving_manifest_ref is not None: parse_serving_manifest_ref(self.serving_manifest_ref) return self @@ -1578,7 +1583,7 @@ def to_proto(self) -> store_daemon_pb2.ServingArtifactRuntimePolicy: def from_proto( cls, proto: store_daemon_pb2.ServingArtifactRuntimePolicy, - ) -> "ServingRuntimePolicy": + ) -> "RuntimeArtifactPolicy": return cls( require_manifest=bool(proto.require_manifest), serving_manifest_ref=str(proto.serving_manifest_ref or "") or None, @@ -1643,25 +1648,25 @@ def from_proto( ) -ServingBindingReadiness = Literal[ - "serving_reserved", - "serving_local_ready", - "serving_published_ready", +RuntimeBindingReadiness = Literal[ + "runtime_reserved", + "runtime_local_ready", + "runtime_published_ready", ] _SERVING_READINESS_TO_PROTO: dict[ - ServingBindingReadiness, operation_pb2.ServingBindingReadiness + RuntimeBindingReadiness, operation_pb2.ServingBindingReadiness ] = { - "serving_reserved": operation_pb2.SERVING_BINDING_READINESS_RESERVED, - "serving_local_ready": operation_pb2.SERVING_BINDING_READINESS_LOCAL_READY, - "serving_published_ready": operation_pb2.SERVING_BINDING_READINESS_PUBLISHED_READY, + "runtime_reserved": operation_pb2.SERVING_BINDING_READINESS_RESERVED, + "runtime_local_ready": operation_pb2.SERVING_BINDING_READINESS_LOCAL_READY, + "runtime_published_ready": operation_pb2.SERVING_BINDING_READINESS_PUBLISHED_READY, } -_SERVING_READINESS_FROM_PROTO: dict[int, ServingBindingReadiness] = { +_SERVING_READINESS_FROM_PROTO: dict[int, RuntimeBindingReadiness] = { int(value): key for key, value in _SERVING_READINESS_TO_PROTO.items() } -class ServingTopologyRef(BaseModel): +class RuntimeTopologyRef(BaseModel): model_config = ConfigDict(frozen=True) schema_version: int = 1 @@ -1671,7 +1676,7 @@ class ServingTopologyRef(BaseModel): runtime_topology_diagnostics_ref: str | None = None @model_validator(mode="after") - def _validate_topology(self) -> "ServingTopologyRef": + def _validate_topology(self) -> "RuntimeTopologyRef": if int(self.schema_version) <= 0: raise ValueError("schema_version must be positive") if not self.schema_topology_digest: @@ -1696,7 +1701,7 @@ def to_proto(self) -> operation_pb2.ServingTopologyRef: @classmethod def from_proto( cls, proto: operation_pb2.ServingTopologyRef - ) -> "ServingTopologyRef": + ) -> "RuntimeTopologyRef": return cls( schema_version=int(proto.schema_version), schema_topology_digest=str(proto.schema_topology_digest), @@ -1718,7 +1723,7 @@ def from_proto( ) -class ServingBindingMemberRef(BaseModel): +class RuntimeBindingMemberRef(BaseModel): model_config = ConfigDict(frozen=True) member_id: str @@ -1727,7 +1732,7 @@ class ServingBindingMemberRef(BaseModel): group_id: str | None = None @model_validator(mode="after") - def _validate_member(self) -> "ServingBindingMemberRef": + def _validate_member(self) -> "RuntimeBindingMemberRef": if not self.member_id: raise ValueError("member_id must not be empty") if int(self.member_index) < 0: @@ -1753,7 +1758,7 @@ def to_proto(self) -> operation_pb2.ServingBindingMemberRef: @classmethod def from_proto( cls, proto: operation_pb2.ServingBindingMemberRef - ) -> "ServingBindingMemberRef": + ) -> "RuntimeBindingMemberRef": return cls( member_id=str(proto.member_id), member_index=int(proto.member_index), @@ -1795,52 +1800,52 @@ def from_proto(cls, proto: operation_pb2.BlobRef) -> "BlobRef": ) -ServingBindingSourceKind = Literal[ +RuntimeBindingSourceKind = Literal[ "checkpoint_artifact", - "serving_artifact", - "serving_artifact_set", + "runtime_artifact", + "runtime_artifact_set", ] -ServingBindingSourceReuseMode = Literal[ - "checkpoint_to_serving", - "serving_direct_member_copy", - "serving_transform_required", +RuntimeBindingSourceReuseMode = Literal[ + "checkpoint_to_runtime", + "runtime_direct_member_copy", + "runtime_transform_required", "unsupported", ] _SOURCE_KIND_TO_PROTO: dict[ - ServingBindingSourceKind, operation_pb2.ServingBindingSourceKind + RuntimeBindingSourceKind, operation_pb2.ServingBindingSourceKind ] = { "checkpoint_artifact": operation_pb2.SERVING_BINDING_SOURCE_KIND_CHECKPOINT_ARTIFACT, - "serving_artifact": operation_pb2.SERVING_BINDING_SOURCE_KIND_SERVING_ARTIFACT, - "serving_artifact_set": operation_pb2.SERVING_BINDING_SOURCE_KIND_SERVING_ARTIFACT_SET, + "runtime_artifact": operation_pb2.SERVING_BINDING_SOURCE_KIND_SERVING_ARTIFACT, + "runtime_artifact_set": operation_pb2.SERVING_BINDING_SOURCE_KIND_SERVING_ARTIFACT_SET, } -_SOURCE_KIND_FROM_PROTO: dict[int, ServingBindingSourceKind] = { +_SOURCE_KIND_FROM_PROTO: dict[int, RuntimeBindingSourceKind] = { int(value): key for key, value in _SOURCE_KIND_TO_PROTO.items() } _SOURCE_REUSE_TO_PROTO: dict[ - ServingBindingSourceReuseMode, operation_pb2.ServingBindingSourceReuseMode + RuntimeBindingSourceReuseMode, operation_pb2.ServingBindingSourceReuseMode ] = { - "checkpoint_to_serving": operation_pb2.SERVING_BINDING_SOURCE_REUSE_MODE_CHECKPOINT_TO_SERVING, - "serving_direct_member_copy": operation_pb2.SERVING_BINDING_SOURCE_REUSE_MODE_SERVING_DIRECT_MEMBER_COPY, - "serving_transform_required": operation_pb2.SERVING_BINDING_SOURCE_REUSE_MODE_SERVING_TRANSFORM_REQUIRED, + "checkpoint_to_runtime": operation_pb2.SERVING_BINDING_SOURCE_REUSE_MODE_CHECKPOINT_TO_SERVING, + "runtime_direct_member_copy": operation_pb2.SERVING_BINDING_SOURCE_REUSE_MODE_SERVING_DIRECT_MEMBER_COPY, + "runtime_transform_required": operation_pb2.SERVING_BINDING_SOURCE_REUSE_MODE_SERVING_TRANSFORM_REQUIRED, "unsupported": operation_pb2.SERVING_BINDING_SOURCE_REUSE_MODE_UNSUPPORTED, } -_SOURCE_REUSE_FROM_PROTO: dict[int, ServingBindingSourceReuseMode] = { +_SOURCE_REUSE_FROM_PROTO: dict[int, RuntimeBindingSourceReuseMode] = { int(value): key for key, value in _SOURCE_REUSE_TO_PROTO.items() } -class ServingBindingSourceMemberRef(BaseModel): +class RuntimeBindingSourceMemberRef(BaseModel): model_config = ConfigDict(frozen=True) - member: ServingBindingMemberRef + member: RuntimeBindingMemberRef artifact_ref: str serving_manifest_ref: str | None = None tensor_schema_hash: str | None = None target_layout_hash: str | None = None @model_validator(mode="after") - def _validate_source_member(self) -> "ServingBindingSourceMemberRef": + def _validate_source_member(self) -> "RuntimeBindingSourceMemberRef": if not self.artifact_ref: raise ValueError("artifact_ref must not be empty") for field_name in ( @@ -1869,9 +1874,9 @@ def to_proto(self) -> operation_pb2.ServingBindingSourceMemberRef: @classmethod def from_proto( cls, proto: operation_pb2.ServingBindingSourceMemberRef - ) -> "ServingBindingSourceMemberRef": + ) -> "RuntimeBindingSourceMemberRef": return cls( - member=ServingBindingMemberRef.from_proto(proto.member), + member=RuntimeBindingMemberRef.from_proto(proto.member), artifact_ref=str(proto.artifact_ref), serving_manifest_ref=( str(proto.serving_manifest_ref) @@ -1891,21 +1896,21 @@ def from_proto( ) -class ServingBindingSourceRef(BaseModel): +class RuntimeBindingSourceRef(BaseModel): model_config = ConfigDict(frozen=True) - source_kind: ServingBindingSourceKind + source_kind: RuntimeBindingSourceKind artifact_selection_digest: str source_artifact_ref: str | None = None source_schema_hash: str representation_contract_hash: str | None = None - serving_build_digest: str | None = None + runtime_build_digest: str | None = None tensor_schema_hash: str | None = None - topology: ServingTopologyRef | None = None - members: tuple[ServingBindingSourceMemberRef, ...] = () + topology: RuntimeTopologyRef | None = None + members: tuple[RuntimeBindingSourceMemberRef, ...] = () @model_validator(mode="after") - def _validate_source(self) -> "ServingBindingSourceRef": + def _validate_source(self) -> "RuntimeBindingSourceRef": if not self.artifact_selection_digest: raise ValueError("artifact_selection_digest must not be empty") if not self.source_schema_hash: @@ -1917,7 +1922,7 @@ def _validate_source(self) -> "ServingBindingSourceRef": ) if self.members: raise ValueError("checkpoint_artifact sources must not carry members") - if self.source_kind == "serving_artifact_set": + if self.source_kind == "runtime_artifact_set": if self.topology is None: raise ValueError( "topology is required for serving_artifact_set sources" @@ -1938,8 +1943,8 @@ def to_proto(self) -> operation_pb2.ServingBindingSourceRef: proto.source_artifact_ref = str(self.source_artifact_ref) if self.representation_contract_hash is not None: proto.representation_contract_hash = str(self.representation_contract_hash) - if self.serving_build_digest is not None: - proto.serving_build_digest = str(self.serving_build_digest) + if self.runtime_build_digest is not None: + proto.serving_build_digest = str(self.runtime_build_digest) if self.tensor_schema_hash is not None: proto.tensor_schema_hash = str(self.tensor_schema_hash) if self.topology is not None: @@ -1950,10 +1955,10 @@ def to_proto(self) -> operation_pb2.ServingBindingSourceRef: @classmethod def from_proto( cls, proto: operation_pb2.ServingBindingSourceRef - ) -> "ServingBindingSourceRef": + ) -> "RuntimeBindingSourceRef": source_kind = _SOURCE_KIND_FROM_PROTO.get(int(proto.source_kind)) if source_kind is None: - raise ValueError("ServingBindingSourceRef source_kind is required") + raise ValueError("RuntimeBindingSourceRef source_kind is required") return cls( source_kind=source_kind, artifact_selection_digest=str(proto.artifact_selection_digest), @@ -1968,7 +1973,7 @@ def from_proto( if proto.HasField("representation_contract_hash") else None ), - serving_build_digest=( + runtime_build_digest=( str(proto.serving_build_digest) if proto.HasField("serving_build_digest") else None @@ -1979,36 +1984,36 @@ def from_proto( else None ), topology=( - ServingTopologyRef.from_proto(proto.topology) + RuntimeTopologyRef.from_proto(proto.topology) if proto.HasField("topology") else None ), members=tuple( - ServingBindingSourceMemberRef.from_proto(member) + RuntimeBindingSourceMemberRef.from_proto(member) for member in proto.members ), ) -class ServingBindingSourceReuseDecision(BaseModel): +class RuntimeBindingSourceReuseDecision(BaseModel): model_config = ConfigDict(frozen=True) - mode: ServingBindingSourceReuseMode + mode: RuntimeBindingSourceReuseMode representation_contract_hash: str | None = None work_plan_hash: str | None = None reason: str | None = None @model_validator(mode="after") - def _validate_reuse(self) -> "ServingBindingSourceReuseDecision": + def _validate_reuse(self) -> "RuntimeBindingSourceReuseDecision": for field_name in ("representation_contract_hash", "work_plan_hash", "reason"): value = getattr(self, field_name) if value is not None and not value: raise ValueError(f"{field_name} must not be empty when provided") - if self.mode == "serving_transform_required" and not ( + if self.mode == "runtime_transform_required" and not ( self.work_plan_hash or self.reason ): raise ValueError( - "serving_transform_required requires work_plan_hash or reason" + "runtime_transform_required requires work_plan_hash or reason" ) if self.mode == "unsupported" and not self.reason: raise ValueError("unsupported source reuse requires reason") @@ -2029,10 +2034,10 @@ def to_proto(self) -> operation_pb2.ServingBindingSourceReuseDecision: @classmethod def from_proto( cls, proto: operation_pb2.ServingBindingSourceReuseDecision - ) -> "ServingBindingSourceReuseDecision": + ) -> "RuntimeBindingSourceReuseDecision": mode = _SOURCE_REUSE_FROM_PROTO.get(int(proto.mode)) if mode is None: - raise ValueError("ServingBindingSourceReuseDecision mode is required") + raise ValueError("RuntimeBindingSourceReuseDecision mode is required") return cls( mode=mode, representation_contract_hash=( @@ -2047,45 +2052,45 @@ def from_proto( ) -def plan_serving_binding_source_reuse( +def plan_runtime_binding_source_reuse( *, - source: ServingBindingSourceRef, - topology: ServingTopologyRef, - member: ServingBindingMemberRef, + source: RuntimeBindingSourceRef, + topology: RuntimeTopologyRef, + member: RuntimeBindingMemberRef, tensor_schema_hash: str, target_layout_hash: str, representation_contract_hash: str | None = None, -) -> ServingBindingSourceReuseDecision: +) -> RuntimeBindingSourceReuseDecision: if source.source_kind == "checkpoint_artifact": - return ServingBindingSourceReuseDecision( - mode="checkpoint_to_serving", + return RuntimeBindingSourceReuseDecision( + mode="checkpoint_to_runtime", representation_contract_hash=representation_contract_hash, ) - if source.source_kind not in {"serving_artifact", "serving_artifact_set"}: - return ServingBindingSourceReuseDecision( + if source.source_kind not in {"runtime_artifact", "runtime_artifact_set"}: + return RuntimeBindingSourceReuseDecision( mode="unsupported", - reason=f"unsupported serving binding source kind: {source.source_kind}", + reason=f"unsupported runtime binding source kind: {source.source_kind}", ) if ( representation_contract_hash is not None and source.representation_contract_hash is not None and representation_contract_hash != source.representation_contract_hash ): - return ServingBindingSourceReuseDecision( - mode="serving_transform_required", + return RuntimeBindingSourceReuseDecision( + mode="runtime_transform_required", reason="source representation contract does not match target", ) if source.topology is not None and source.topology != topology: - return ServingBindingSourceReuseDecision( - mode="serving_transform_required", + return RuntimeBindingSourceReuseDecision( + mode="runtime_transform_required", reason="source topology does not match target topology", ) if ( source.tensor_schema_hash is not None and source.tensor_schema_hash != tensor_schema_hash ): - return ServingBindingSourceReuseDecision( - mode="serving_transform_required", + return RuntimeBindingSourceReuseDecision( + mode="runtime_transform_required", reason="source tensor schema does not match target tensor schema", ) matching_members = [ @@ -2093,43 +2098,43 @@ def plan_serving_binding_source_reuse( for source_member in source.members if source_member.member == member ] - if source.source_kind == "serving_artifact_set" and not matching_members: - return ServingBindingSourceReuseDecision( - mode="serving_transform_required", - reason="source serving set does not contain target member", + if source.source_kind == "runtime_artifact_set" and not matching_members: + return RuntimeBindingSourceReuseDecision( + mode="runtime_transform_required", + reason="source runtime set does not contain target member", ) for source_member in matching_members: if ( source_member.tensor_schema_hash is not None and source_member.tensor_schema_hash != tensor_schema_hash ): - return ServingBindingSourceReuseDecision( - mode="serving_transform_required", + return RuntimeBindingSourceReuseDecision( + mode="runtime_transform_required", reason="source member tensor schema does not match target", ) if ( source_member.target_layout_hash is not None and source_member.target_layout_hash != target_layout_hash ): - return ServingBindingSourceReuseDecision( - mode="serving_transform_required", + return RuntimeBindingSourceReuseDecision( + mode="runtime_transform_required", reason="source member layout does not match target layout", ) - return ServingBindingSourceReuseDecision( - mode="serving_direct_member_copy", + return RuntimeBindingSourceReuseDecision( + mode="runtime_direct_member_copy", representation_contract_hash=representation_contract_hash or source.representation_contract_hash, ) -class ServingBindingResolvedLayout(BaseModel): +class RuntimeBindingResolvedLayout(BaseModel): model_config = ConfigDict(frozen=True) binding_layout_id: str - source: ServingBindingSourceRef - source_reuse: ServingBindingSourceReuseDecision - topology: ServingTopologyRef - member: ServingBindingMemberRef + source: RuntimeBindingSourceRef + source_reuse: RuntimeBindingSourceReuseDecision + topology: RuntimeTopologyRef + member: RuntimeBindingMemberRef target_layout: bytes target_index_bytes: bytes target_layout_hash: str @@ -2140,7 +2145,7 @@ class ServingBindingResolvedLayout(BaseModel): dst_specs_bytes: bytes | None = None @model_validator(mode="after") - def _validate_layout(self) -> "ServingBindingResolvedLayout": + def _validate_layout(self) -> "RuntimeBindingResolvedLayout": if not self.binding_layout_id: raise ValueError("binding_layout_id must not be empty") if not self.target_layout: @@ -2153,13 +2158,13 @@ def _validate_layout(self) -> "ServingBindingResolvedLayout": raise ValueError("tensor_schema_hash must not be empty") if not self.spec_digest: raise ValueError("spec_digest must not be empty") - if self.source_reuse.mode == "serving_direct_member_copy": + if self.source_reuse.mode == "runtime_direct_member_copy": if self.source.source_kind not in { - "serving_artifact", - "serving_artifact_set", + "runtime_artifact", + "runtime_artifact_set", }: raise ValueError( - "serving_direct_member_copy requires a serving artifact source" + "runtime_direct_member_copy requires a runtime artifact source" ) if ( self.source.representation_contract_hash is not None @@ -2174,7 +2179,7 @@ def _validate_layout(self) -> "ServingBindingResolvedLayout": self.source.tensor_schema_hash != self.tensor_schema_hash ): raise ValueError( - "serving_direct_member_copy tensor_schema_hash must match target" + "runtime_direct_member_copy tensor_schema_hash must match target" ) matching_members = [ source_member @@ -2182,11 +2187,11 @@ def _validate_layout(self) -> "ServingBindingResolvedLayout": if source_member.member == self.member ] if ( - self.source.source_kind == "serving_artifact_set" + self.source.source_kind == "runtime_artifact_set" and not matching_members ): raise ValueError( - "serving_direct_member_copy requires a matching source member" + "runtime_direct_member_copy requires a matching source member" ) for source_member in matching_members: if ( @@ -2194,14 +2199,14 @@ def _validate_layout(self) -> "ServingBindingResolvedLayout": and source_member.target_layout_hash != self.target_layout_hash ): raise ValueError( - "serving_direct_member_copy target_layout_hash must match source member" + "runtime_direct_member_copy target_layout_hash must match source member" ) if ( source_member.tensor_schema_hash is not None and source_member.tensor_schema_hash != self.tensor_schema_hash ): raise ValueError( - "serving_direct_member_copy tensor_schema_hash must match source member" + "runtime_direct_member_copy tensor_schema_hash must match source member" ) return self @@ -2229,15 +2234,15 @@ def to_proto(self) -> operation_pb2.ServingBindingResolvedLayout: @classmethod def from_proto( cls, proto: operation_pb2.ServingBindingResolvedLayout - ) -> "ServingBindingResolvedLayout": + ) -> "RuntimeBindingResolvedLayout": return cls( binding_layout_id=str(proto.binding_layout_id), - source=ServingBindingSourceRef.from_proto(proto.source), - source_reuse=ServingBindingSourceReuseDecision.from_proto( + source=RuntimeBindingSourceRef.from_proto(proto.source), + source_reuse=RuntimeBindingSourceReuseDecision.from_proto( proto.source_reuse ), - topology=ServingTopologyRef.from_proto(proto.topology), - member=ServingBindingMemberRef.from_proto(proto.member), + topology=RuntimeTopologyRef.from_proto(proto.topology), + member=RuntimeBindingMemberRef.from_proto(proto.member), target_layout=bytes(proto.target_layout), target_index_bytes=bytes(proto.target_index_bytes), target_layout_hash=str(proto.target_layout_hash), @@ -2261,22 +2266,22 @@ def from_proto( ) -class ServingBindingTarget(BaseModel): +class RealizationTarget(BaseModel): model_config = ConfigDict(frozen=True) runtime: str device: str | int device_uuid: str | None = None - source: ServingBindingSourceRef - topology: ServingTopologyRef - member: ServingBindingMemberRef + source: RuntimeBindingSourceRef + topology: RuntimeTopologyRef + member: RuntimeBindingMemberRef model_config_digest: str load_config_digest: str | None = None - serving_build_digest: str - resolved_layout: ServingBindingResolvedLayout + runtime_build_digest: str + resolved_layout: RuntimeBindingResolvedLayout @model_validator(mode="after") - def _validate_target(self) -> "ServingBindingTarget": + def _validate_target(self) -> "RealizationTarget": if not self.runtime: raise ValueError("runtime must not be empty") if str(self.device) == "": @@ -2287,19 +2292,19 @@ def _validate_target(self) -> "ServingBindingTarget": raise ValueError("model_config_digest must not be empty") if self.load_config_digest is not None and not self.load_config_digest: raise ValueError("load_config_digest must not be empty when provided") - if not self.serving_build_digest: - raise ValueError("serving_build_digest must not be empty") + if not self.runtime_build_digest: + raise ValueError("runtime_build_digest must not be empty") if self.source != self.resolved_layout.source: raise ValueError("resolved_layout.source must match target source") if self.source.topology is not None and self.source.topology != self.topology: raise ValueError("source topology must match target topology when provided") if ( - self.resolved_layout.source_reuse.mode == "serving_direct_member_copy" - and self.source.serving_build_digest is not None - and self.source.serving_build_digest != self.serving_build_digest + self.resolved_layout.source_reuse.mode == "runtime_direct_member_copy" + and self.source.runtime_build_digest is not None + and self.source.runtime_build_digest != self.runtime_build_digest ): raise ValueError( - "serving_direct_member_copy serving_build_digest must match source" + "runtime_direct_member_copy runtime_build_digest must match source" ) if self.topology != self.resolved_layout.topology: raise ValueError("resolved_layout.topology must match target topology") @@ -2312,7 +2317,7 @@ def to_proto(self) -> operation_pb2.ServingBindingTarget: runtime=str(self.runtime), device=str(self.device), model_config_digest=str(self.model_config_digest), - serving_build_digest=str(self.serving_build_digest), + serving_build_digest=str(self.runtime_build_digest), ) if self.device_uuid is not None: proto.device_uuid = str(self.device_uuid) @@ -2327,40 +2332,40 @@ def to_proto(self) -> operation_pb2.ServingBindingTarget: @classmethod def from_proto( cls, proto: operation_pb2.ServingBindingTarget - ) -> "ServingBindingTarget": + ) -> "RealizationTarget": return cls( runtime=str(proto.runtime), device=str(proto.device), device_uuid=str(proto.device_uuid) if proto.HasField("device_uuid") else None, - source=ServingBindingSourceRef.from_proto(proto.source), - topology=ServingTopologyRef.from_proto(proto.topology), - member=ServingBindingMemberRef.from_proto(proto.member), + source=RuntimeBindingSourceRef.from_proto(proto.source), + topology=RuntimeTopologyRef.from_proto(proto.topology), + member=RuntimeBindingMemberRef.from_proto(proto.member), model_config_digest=str(proto.model_config_digest), load_config_digest=( str(proto.load_config_digest) if proto.HasField("load_config_digest") else None ), - serving_build_digest=str(proto.serving_build_digest), - resolved_layout=ServingBindingResolvedLayout.from_proto( + runtime_build_digest=str(proto.serving_build_digest), + resolved_layout=RuntimeBindingResolvedLayout.from_proto( proto.resolved_layout ), ) -class ServingBindingSetTarget(BaseModel): +class RealizationTargetSet(BaseModel): model_config = ConfigDict(frozen=True) runtime: str - source: ServingBindingSourceRef - topology: ServingTopologyRef + source: RuntimeBindingSourceRef + topology: RuntimeTopologyRef group_id: str - members: tuple[ServingBindingTarget, ...] + members: tuple[RealizationTarget, ...] @model_validator(mode="after") - def _validate_set_target(self) -> "ServingBindingSetTarget": + def _validate_set_target(self) -> "RealizationTargetSet": if not self.runtime: raise ValueError("runtime must not be empty") if not self.group_id: @@ -2391,40 +2396,40 @@ def to_proto(self) -> operation_pb2.ServingBindingSetTarget: @classmethod def from_proto( cls, proto: operation_pb2.ServingBindingSetTarget - ) -> "ServingBindingSetTarget": + ) -> "RealizationTargetSet": return cls( runtime=str(proto.runtime), - source=ServingBindingSourceRef.from_proto(proto.source), - topology=ServingTopologyRef.from_proto(proto.topology), + source=RuntimeBindingSourceRef.from_proto(proto.source), + topology=RuntimeTopologyRef.from_proto(proto.topology), group_id=str(proto.group_id), members=tuple( - ServingBindingTarget.from_proto(member) for member in proto.members + RealizationTarget.from_proto(member) for member in proto.members ), ) -class ServingBindingResolvedSpecCacheEntry(BaseModel): +class RuntimeRealizationSpecCacheEntry(BaseModel): model_config = ConfigDict(frozen=True) schema_version: int cache_key_digest: str spec_digest: str runtime: str - source: ServingBindingSourceRef - source_reuse: ServingBindingSourceReuseDecision - topology: ServingTopologyRef - member: ServingBindingMemberRef + source: RuntimeBindingSourceRef + source_reuse: RuntimeBindingSourceReuseDecision + topology: RuntimeTopologyRef + member: RuntimeBindingMemberRef source_schema_hash: str model_config_digest: str load_config_digest: str | None = None - serving_build_digest: str + runtime_build_digest: str binding_layout_id: str target_layout_hash: str tensor_schema_hash: str blob_refs: Mapping[str, BlobRef] @model_validator(mode="after") - def _validate_cache_entry(self) -> "ServingBindingResolvedSpecCacheEntry": + def _validate_cache_entry(self) -> "RuntimeRealizationSpecCacheEntry": if int(self.schema_version) <= 0: raise ValueError("schema_version must be positive") for field_name in ( @@ -2433,7 +2438,7 @@ def _validate_cache_entry(self) -> "ServingBindingResolvedSpecCacheEntry": "runtime", "source_schema_hash", "model_config_digest", - "serving_build_digest", + "runtime_build_digest", "binding_layout_id", "target_layout_hash", "tensor_schema_hash", @@ -2453,7 +2458,7 @@ def canonical_key_json(self) -> str: "load_config_digest": self.load_config_digest, "topology": self.topology.model_dump(mode="json", exclude_none=True), "member": self.member.model_dump(mode="json", exclude_none=True), - "serving_build_digest": self.serving_build_digest, + "runtime_build_digest": self.runtime_build_digest, "source": self.source.model_dump(mode="json", exclude_none=True), "source_reuse": self.source_reuse.model_dump( mode="json", exclude_none=True @@ -2472,7 +2477,7 @@ def canonical_spec_core_json(self) -> str: "target_layout_hash": self.target_layout_hash, "tensor_schema_hash": self.tensor_schema_hash, "source_schema_hash": self.source_schema_hash, - "serving_build_digest": self.serving_build_digest, + "runtime_build_digest": self.runtime_build_digest, "source_reuse": self.source_reuse.model_dump( mode="json", exclude_none=True ), @@ -2496,7 +2501,7 @@ def to_proto(self) -> operation_pb2.ServingBindingResolvedSpecCacheEntry: runtime=str(self.runtime), source_schema_hash=str(self.source_schema_hash), model_config_digest=str(self.model_config_digest), - serving_build_digest=str(self.serving_build_digest), + serving_build_digest=str(self.runtime_build_digest), binding_layout_id=str(self.binding_layout_id), target_layout_hash=str(self.target_layout_hash), tensor_schema_hash=str(self.tensor_schema_hash), @@ -2514,18 +2519,18 @@ def to_proto(self) -> operation_pb2.ServingBindingResolvedSpecCacheEntry: @classmethod def from_proto( cls, proto: operation_pb2.ServingBindingResolvedSpecCacheEntry - ) -> "ServingBindingResolvedSpecCacheEntry": + ) -> "RuntimeRealizationSpecCacheEntry": return cls( schema_version=int(proto.schema_version), cache_key_digest=str(proto.cache_key_digest), spec_digest=str(proto.spec_digest), runtime=str(proto.runtime), - source=ServingBindingSourceRef.from_proto(proto.source), - source_reuse=ServingBindingSourceReuseDecision.from_proto( + source=RuntimeBindingSourceRef.from_proto(proto.source), + source_reuse=RuntimeBindingSourceReuseDecision.from_proto( proto.source_reuse ), - topology=ServingTopologyRef.from_proto(proto.topology), - member=ServingBindingMemberRef.from_proto(proto.member), + topology=RuntimeTopologyRef.from_proto(proto.topology), + member=RuntimeBindingMemberRef.from_proto(proto.member), source_schema_hash=str(proto.source_schema_hash), model_config_digest=str(proto.model_config_digest), load_config_digest=( @@ -2533,7 +2538,7 @@ def from_proto( if proto.HasField("load_config_digest") else None ), - serving_build_digest=str(proto.serving_build_digest), + runtime_build_digest=str(proto.serving_build_digest), binding_layout_id=str(proto.binding_layout_id), target_layout_hash=str(proto.target_layout_hash), tensor_schema_hash=str(proto.tensor_schema_hash), @@ -2614,7 +2619,7 @@ class BindingReservationCapability(BaseModel): daemon_id: str daemon_session_id: str device_uuid: str - member: ServingBindingMemberRef + member: RuntimeBindingMemberRef reservation_bytes: int scope_digest: str expires_at_ms: int | None = None @@ -2661,7 +2666,7 @@ def from_proto( daemon_id=str(proto.daemon_id), daemon_session_id=str(proto.daemon_session_id), device_uuid=str(proto.device_uuid), - member=ServingBindingMemberRef.from_proto(proto.member), + member=RuntimeBindingMemberRef.from_proto(proto.member), reservation_bytes=int(proto.reservation_bytes), scope_digest=str(proto.scope_digest), expires_at_ms=( @@ -2718,7 +2723,7 @@ def from_proto( ) -class PrefetchedServingBinding(BaseModel): +class PrefetchHandoff(BaseModel): model_config = ConfigDict(frozen=True) local_serving_ref: str | None = None @@ -2726,10 +2731,10 @@ class PrefetchedServingBinding(BaseModel): daemon_id: str daemon_session_id: str device_uuid: str - member: ServingBindingMemberRef + member: RuntimeBindingMemberRef reservation_bytes: int reservation_capability: BindingReservationCapability - readiness: ServingBindingReadiness + readiness: RuntimeBindingReadiness verification_state: BindingValueVerificationState serving_artifact_id: str | None = None expires_at_ms: int | None = None @@ -2738,7 +2743,7 @@ class PrefetchedServingBinding(BaseModel): report: object | None = Field(default=None, exclude=True, repr=False) @model_validator(mode="after") - def _validate_result(self) -> "PrefetchedServingBinding": + def _validate_result(self) -> "PrefetchHandoff": if self.local_serving_ref is not None and not self.local_serving_ref: raise ValueError("local_serving_ref must not be empty when provided") for field_name in ("daemon_id", "daemon_session_id", "device_uuid"): @@ -2799,7 +2804,7 @@ def to_proto(self) -> operation_pb2.PrefetchServingBindingResult: @classmethod def from_proto( cls, proto: operation_pb2.PrefetchServingBindingResult - ) -> "PrefetchedServingBinding": + ) -> "PrefetchHandoff": readiness = _SERVING_READINESS_FROM_PROTO.get(int(proto.readiness)) if readiness is None: raise ValueError("PrefetchServingBindingResult readiness is required") @@ -2831,7 +2836,7 @@ def from_proto( daemon_id=str(proto.daemon_id), daemon_session_id=str(proto.daemon_session_id), device_uuid=str(proto.device_uuid), - member=ServingBindingMemberRef.from_proto(proto.member), + member=RuntimeBindingMemberRef.from_proto(proto.member), reservation_bytes=int(proto.reservation_bytes), reservation_capability=BindingReservationCapability.from_proto( proto.reservation_capability @@ -2853,10 +2858,10 @@ def from_proto( ) -class PrefetchedServingBindingMemberFailure(BaseModel): +class PrefetchHandoffMemberFailure(BaseModel): model_config = ConfigDict(frozen=True) - member: ServingBindingMemberRef + member: RuntimeBindingMemberRef code: str message: str phase: str | None = None @@ -2864,7 +2869,7 @@ class PrefetchedServingBindingMemberFailure(BaseModel): spec_digest: str | None = None @model_validator(mode="after") - def _validate_failure(self) -> "PrefetchedServingBindingMemberFailure": + def _validate_failure(self) -> "PrefetchHandoffMemberFailure": if not self.code: raise ValueError("code must not be empty") if not self.message: @@ -2894,9 +2899,9 @@ def to_proto(self) -> operation_pb2.PrefetchServingBindingMemberFailure: @classmethod def from_proto( cls, proto: operation_pb2.PrefetchServingBindingMemberFailure - ) -> "PrefetchedServingBindingMemberFailure": + ) -> "PrefetchHandoffMemberFailure": return cls( - member=ServingBindingMemberRef.from_proto(proto.member), + member=RuntimeBindingMemberRef.from_proto(proto.member), code=str(proto.code), message=str(proto.message), phase=str(proto.phase) if proto.HasField("phase") else None, @@ -2911,21 +2916,21 @@ def from_proto( ) -class PrefetchedServingBindingSet(BaseModel): +class PrefetchHandoffSet(BaseModel): model_config = ConfigDict(frozen=True) runtime: str - topology: ServingTopologyRef + topology: RuntimeTopologyRef group_id: str - members: tuple[PrefetchedServingBinding, ...] - readiness: ServingBindingReadiness + members: tuple[PrefetchHandoff, ...] + readiness: RuntimeBindingReadiness expires_at_ms: int | None = None - member_failures: tuple[PrefetchedServingBindingMemberFailure, ...] = () + member_failures: tuple[PrefetchHandoffMemberFailure, ...] = () partial: bool = False report: object | None = Field(default=None, exclude=True, repr=False) @model_validator(mode="after") - def _validate_result_set(self) -> "PrefetchedServingBindingSet": + def _validate_result_set(self) -> "PrefetchHandoffSet": if not self.runtime: raise ValueError("runtime must not be empty") if not self.group_id: @@ -2935,7 +2940,7 @@ def _validate_result_set(self) -> "PrefetchedServingBindingSet": if self.expires_at_ms is not None and int(self.expires_at_ms) < 0: raise ValueError("expires_at_ms must be non-negative") if self.partial and not self.member_failures: - raise ValueError("partial serving binding set requires member_failures") + raise ValueError("partial runtime binding set requires member_failures") success_member_ids = {member.member.member_id for member in self.members} failed_member_ids = { failure.member.member_id for failure in self.member_failures @@ -2943,7 +2948,7 @@ def _validate_result_set(self) -> "PrefetchedServingBindingSet": overlap = success_member_ids & failed_member_ids if overlap: raise ValueError( - "serving binding set member cannot be both success and failure" + "runtime binding set member cannot be both success and failure" ) return self @@ -2966,44 +2971,44 @@ def to_proto(self) -> operation_pb2.PrefetchServingBindingSetResult: @classmethod def from_proto( cls, proto: operation_pb2.PrefetchServingBindingSetResult - ) -> "PrefetchedServingBindingSet": + ) -> "PrefetchHandoffSet": readiness = _SERVING_READINESS_FROM_PROTO.get(int(proto.readiness)) if readiness is None: raise ValueError("PrefetchServingBindingSetResult readiness is required") return cls( runtime=str(proto.runtime), - topology=ServingTopologyRef.from_proto(proto.topology), + topology=RuntimeTopologyRef.from_proto(proto.topology), group_id=str(proto.group_id), members=tuple( - PrefetchedServingBinding.from_proto(member) for member in proto.members + PrefetchHandoff.from_proto(member) for member in proto.members ), readiness=readiness, expires_at_ms=( int(proto.expires_at_ms) if proto.HasField("expires_at_ms") else None ), member_failures=tuple( - PrefetchedServingBindingMemberFailure.from_proto(failure) + PrefetchHandoffMemberFailure.from_proto(failure) for failure in proto.member_failures ), partial=bool(proto.partial), ) -class ServingPublicationSubject(BaseModel): +class RuntimePublicationSubject(BaseModel): model_config = ConfigDict(frozen=True) serving_artifact_id: str | None = None binding_value_ref: BindingValueRef | None = None @model_validator(mode="after") - def _validate_subject(self) -> "ServingPublicationSubject": + def _validate_subject(self) -> "RuntimePublicationSubject": artifact_id = self.serving_artifact_id binding_value_ref = self.binding_value_ref if artifact_id is not None and not artifact_id: raise ValueError("serving_artifact_id must not be empty") if (artifact_id is None) == (binding_value_ref is None): raise ValueError( - "ServingPublicationSubject requires exactly one of serving_artifact_id or binding_value_ref" + "RuntimePublicationSubject requires exactly one of serving_artifact_id or binding_value_ref" ) return self @@ -3048,9 +3053,8 @@ def to_store_proto(self) -> publication_pb2.ServingPublicationSubject: @classmethod def from_proto( cls, - proto: publication_pb2.ServingPublicationSubject - | publication_pb2.ServingPublicationSubject, - ) -> "ServingPublicationSubject": + proto: publication_pb2.ServingPublicationSubject, + ) -> "RuntimePublicationSubject": ref_case = proto.WhichOneof("ref") if ref_case == "serving_artifact_id": return cls(serving_artifact_id=str(proto.serving_artifact_id)) @@ -3058,13 +3062,13 @@ def from_proto( return cls( binding_value_ref=BindingValueRef.from_proto(proto.binding_value) ) - raise ValueError("ServingPublicationSubject requires exactly one ref") + raise ValueError("RuntimePublicationSubject requires exactly one ref") class RepresentationPublishContract(BaseModel): model_config = ConfigDict(frozen=True) - subject: ServingPublicationSubject + subject: RuntimePublicationSubject serving_manifest_ref: str representation_contract_hash: str serving_build_digest: str @@ -3117,7 +3121,7 @@ def from_proto( raise ValueError( "RepresentationPublishContract requires a serving publication subject" ) - subject = ServingPublicationSubject.from_proto(proto.subject) + subject = RuntimePublicationSubject.from_proto(proto.subject) return cls( subject=subject, serving_manifest_ref=str(proto.serving_manifest_ref), @@ -3130,7 +3134,7 @@ def from_proto( def validate_against_manifest( self, - manifest: ServingArtifactManifest, + manifest: RuntimeArtifactManifest, ) -> None: if manifest.serving_manifest_ref != self.serving_manifest_ref: raise ValueError( @@ -3157,13 +3161,13 @@ def to_runtime_policy( self, *, require_manifest: bool = True, - ) -> ServingRuntimePolicy: + ) -> RuntimeArtifactPolicy: serving_artifact_id = self.serving_artifact_id if serving_artifact_id is None: raise ValueError( "binding publication subjects do not resolve to a serving runtime policy until closeout promotion completes" ) - return ServingRuntimePolicy( + return RuntimeArtifactPolicy( require_manifest=bool(require_manifest), serving_manifest_ref=str(self.serving_manifest_ref), expected_representation_contract_hash=str( @@ -3194,7 +3198,7 @@ def from_publication_proto( raise ValueError( "RepresentationPublishContract requires a serving publication subject" ) - subject = ServingPublicationSubject.from_proto(proto.subject) + subject = RuntimePublicationSubject.from_proto(proto.subject) return cls( subject=subject, serving_manifest_ref=str(proto.serving_manifest_ref), @@ -3316,7 +3320,7 @@ class RepresentationPublishSpec(BaseModel): serving_artifact_id: str | None = None serving_manifest_ref: str - serving_manifest: ServingArtifactManifest + serving_manifest: RuntimeArtifactManifest serving_manifest_bytes: bytes canonical_index: object | None = None representation_publish_contract: RepresentationPublishContract @@ -3327,7 +3331,7 @@ class RepresentationPublishSpec(BaseModel): layout_id: str | None = None requirements: AssemblyRequirementSetRef | None = None readiness_policy: AssemblyReadinessPolicy | None = None - admission_facts: ServingAdmissionFacts | None = None + admission_facts: RuntimeAdmissionFacts | None = None @model_validator(mode="after") def _validate_representation_publish_spec(self) -> "RepresentationPublishSpec": @@ -3338,7 +3342,7 @@ def _validate_representation_publish_spec(self) -> "RepresentationPublishSpec": "canonical_full", }: raise ValueError("contract_family must be one of: pp, ep, canonical_full") - manifest_from_bytes = ServingArtifactManifest.from_bytes( + manifest_from_bytes = RuntimeArtifactManifest.from_bytes( self.serving_manifest_bytes ) if manifest_from_bytes != self.serving_manifest: @@ -3408,11 +3412,11 @@ def _validate_representation_publish_spec(self) -> "RepresentationPublishSpec": def manifest_tensor_name(self) -> str: return parse_serving_manifest_ref(self.serving_manifest_ref) - def require_serving_runtime_policy( + def require_runtime_artifact_policy( self, *, require_manifest: bool = True, - ) -> ServingRuntimePolicy: + ) -> RuntimeArtifactPolicy: if self.admission_facts is not None: self.admission_facts.require_runtime_bind_swap_ready() return self.representation_publish_contract.to_runtime_policy( @@ -3465,7 +3469,7 @@ def from_proto( representation_publish_contract=representation_publish_contract, ) manifest_bytes = bytes(proto.serving_manifest_bytes) - manifest = ServingArtifactManifest.from_bytes(manifest_bytes) + manifest = RuntimeArtifactManifest.from_bytes(manifest_bytes) return cls( serving_artifact_id=representation_publish_contract.serving_artifact_id, serving_manifest_ref=representation_publish_contract.serving_manifest_ref, @@ -3491,7 +3495,7 @@ def from_proto( else None ), admission_facts=( - ServingAdmissionFacts.from_publication_proto(proto.admission_facts) + RuntimeAdmissionFacts.from_publication_proto(proto.admission_facts) if proto.HasField("admission_facts") else None ), @@ -3671,7 +3675,7 @@ class PublishedModelVersion(BaseModel): serving_manifest_ref: str | None = None serving_execution_diagnostics: ExecutionDiagnostics | None = None - def require_serving_runtime_policy(self) -> ServingRuntimePolicy: + def require_runtime_artifact_policy(self) -> RuntimeArtifactPolicy: if not self.serving_manifest_ref: raise ValueError( "PublishedModelVersion does not carry serving_manifest_ref" @@ -3684,7 +3688,7 @@ def require_serving_runtime_policy(self) -> ServingRuntimePolicy: raise ValueError( "PublishedModelVersion does not carry serving_build_digest" ) - return ServingRuntimePolicy( + return RuntimeArtifactPolicy( require_manifest=True, serving_manifest_ref=str(self.serving_manifest_ref), expected_representation_contract_hash=str( @@ -3694,32 +3698,32 @@ def require_serving_runtime_policy(self) -> ServingRuntimePolicy: ) -ServingRuntimePolicyInput = Union[ - ServingRuntimePolicy, - ServingArtifactManifest, +RuntimeArtifactPolicyInput = Union[ + RuntimeArtifactPolicy, + RuntimeArtifactManifest, RepresentationPublishContract, RepresentationPublishSpec, PublishedModelVersion, ] -def coerce_serving_runtime_policy( - value: ServingRuntimePolicyInput | None, -) -> ServingRuntimePolicy | None: +def coerce_runtime_artifact_policy( + value: RuntimeArtifactPolicyInput | None, +) -> RuntimeArtifactPolicy | None: if value is None: return None - if isinstance(value, ServingRuntimePolicy): + if isinstance(value, RuntimeArtifactPolicy): return value - if isinstance(value, ServingArtifactManifest): + if isinstance(value, RuntimeArtifactManifest): return value.to_runtime_policy() if isinstance(value, RepresentationPublishContract): return value.to_runtime_policy() if isinstance(value, RepresentationPublishSpec): - return value.require_serving_runtime_policy() + return value.require_runtime_artifact_policy() if isinstance(value, PublishedModelVersion): - return value.require_serving_runtime_policy() + return value.require_runtime_artifact_policy() raise TypeError( - "serving runtime policy requires ServingRuntimePolicy, ServingArtifactManifest, " + "runtime artifact policy requires RuntimeArtifactPolicy, RuntimeArtifactManifest, " "RepresentationPublishContract, RepresentationPublishSpec, or PublishedModelVersion" ) @@ -3939,28 +3943,28 @@ class DeregisterArtifactOutcome(BaseModel): "BeginRegisterArtifactResult", "ArtifactDescriptor", "BindingValueRef", - "ServingBindingReadiness", - "ServingBindingSourceKind", - "ServingBindingSourceReuseMode", - "ServingTopologyRef", - "ServingBindingMemberRef", + "RuntimeBindingReadiness", + "RuntimeBindingSourceKind", + "RuntimeBindingSourceReuseMode", + "RuntimeTopologyRef", + "RuntimeBindingMemberRef", "BlobRef", - "ServingBindingSourceMemberRef", - "ServingBindingSourceRef", - "ServingBindingSourceReuseDecision", - "plan_serving_binding_source_reuse", - "ServingBindingResolvedLayout", - "ServingBindingTarget", - "ServingBindingSetTarget", - "ServingBindingResolvedSpecCacheEntry", + "RuntimeBindingSourceMemberRef", + "RuntimeBindingSourceRef", + "RuntimeBindingSourceReuseDecision", + "plan_runtime_binding_source_reuse", + "RuntimeBindingResolvedLayout", + "RealizationTarget", + "RealizationTargetSet", + "RuntimeRealizationSpecCacheEntry", "PrefetchRetentionPolicy", "BindingReservationCapability", "GroupRealizationAcquireRef", - "PrefetchedServingBinding", - "PrefetchedServingBindingMemberFailure", - "PrefetchedServingBindingSet", + "PrefetchHandoff", + "PrefetchHandoffMemberFailure", + "PrefetchHandoffSet", "BuilderMode", - "ServingPublicationSubject", + "RuntimePublicationSubject", "AssemblyCloseoutContract", "AssemblyAttemptRef", "AssemblyContractFamily", @@ -3977,15 +3981,15 @@ class DeregisterArtifactOutcome(BaseModel): "RepresentationPublishContract", "RepresentationPublishSpec", "PublicDiskSourceHandle", - "ServingAdmissionFacts", + "RuntimeArtifactBuildIntent", + "RuntimeArtifactManifest", + "RuntimeArtifactPolicy", + "RuntimeArtifactPolicyInput", + "RuntimeAdmissionFacts", "ViewRegistrationKind", "SealAssemblyResult", - "ServingArtifactManifest", - "ServingBuildIntent", "SERVING_BUILD_DIGEST_VERSION", - "ServingRuntimePolicy", - "ServingRuntimePolicyInput", - "ServingSupportLevel", + "RuntimeSupportLevel", "SERVING_MANIFEST_TENSOR_NAME", "PlanBase", "CoalescedPlan", @@ -4002,6 +4006,6 @@ class DeregisterArtifactOutcome(BaseModel): "VramRegionHandle", "DeregisterArtifactOutcome", "build_serving_manifest_ref", - "coerce_serving_runtime_policy", + "coerce_runtime_artifact_policy", "parse_serving_manifest_ref", ] diff --git a/tests/python/api/test_artifact_handle.py b/tests/python/api/test_artifact_handle.py index 091c5ad6..2ce63633 100644 --- a/tests/python/api/test_artifact_handle.py +++ b/tests/python/api/test_artifact_handle.py @@ -30,8 +30,8 @@ from tensorcast.proto.daemon.v2 import store_daemon_pb2 from tensorcast.types import ( BuilderMode, - ServingArtifactManifest, - ServingRuntimePolicy, + RuntimeArtifactManifest, + RuntimeArtifactPolicy, build_serving_manifest_ref, ) @@ -211,7 +211,7 @@ def __init__(self, client: _ClientStub) -> None: self._artifact_cache = ArtifactCache( daemon_endpoint="daemon", ttl_seconds=10, max_entries=8 ) - self._key_cache: dict[str, tuple[str | None, str | None]] = {} + self._key_cache: dict[str, tuple[str | None, str | None, int | None]] = {} self._client = client def ensure_client(self) -> _ClientStub: @@ -228,8 +228,10 @@ def invalidate_artifact( ) -> None: self._artifact_cache.invalidate_artifact(artifact_id or "", reason=reason) - def resolve_key_mapping_cached(self, *, key: str) -> tuple[str | None, str | None]: - return self._key_cache.get(key, (None, None)) + def resolve_key_mapping_cached( + self, *, key: str + ) -> tuple[str | None, str | None, int | None]: + return self._key_cache.get(key, (None, None, None)) def cache_key_mapping( self, @@ -237,10 +239,11 @@ def cache_key_mapping( *, artifact_id: str | None, disk_path: str | None = None, + generation: int | None = None, ttl_override=None, ) -> None: del ttl_override - self._key_cache[key] = (artifact_id, disk_path) + self._key_cache[key] = (artifact_id, disk_path, generation) class _PipelineStub: @@ -718,7 +721,7 @@ def _fake_bind_owned(self, **kwargs): monkeypatch.setattr(Artifact, "_bind_owned", _fake_bind_owned) - manifest = ServingArtifactManifest( + manifest = RuntimeArtifactManifest( framework_name="torch", adapter_version="adapter-v1", serving_abi_version="abi-v1", @@ -733,12 +736,12 @@ def _fake_bind_owned(self, **kwargs): result = artifact.bind( device="cuda:0", - serving_runtime_policy=manifest, + runtime_artifact_policy=manifest, ) assert result is fake_binding assert captured["device"] == torch.device("cuda:0") - assert captured["serving_runtime_policy"] == ServingRuntimePolicy( + assert captured["runtime_artifact_policy"] == RuntimeArtifactPolicy( require_manifest=True, serving_manifest_ref="tensor:__alt_manifest__.json", expected_representation_contract_hash="bafkrepresentation", @@ -746,7 +749,7 @@ def _fake_bind_owned(self, **kwargs): ) -def test_tensor_dict_and_adopted_binding_share_source_selection_with_separate_target_digests( +def test_tensor_dict_and_mapped_bindings_share_source_selection_with_separate_target_digests( monkeypatch: pytest.MonkeyPatch, ) -> None: canonical_bytes, payload = _build_payload({"foo": torch.ones(2)}) @@ -800,6 +803,50 @@ def test_tensor_dict_and_adopted_binding_share_source_selection_with_separate_ta "last_source_bound_plan_diagnostics": None, }, )() + owned_binding_value = type( + "_OwnedBindingValueStub", + (), + { + "binding_id": "owned-binding", + "binding_layout_id": "bl1:owned", + "binding_value_id": "owned-value-1", + "seal_generation": 1, + "source_artifact_id": "aid", + "is_artifact_backed": True, + "verification_state": 0, + "is_published": False, + }, + )() + owned_layout = type( + "_OwnedLayoutStub", + (), + { + "binding_layout_id": "bl1:owned", + "target_layout": store_daemon_pb2.TargetLayout( + view_id="mapped:v1:owned-target" + ), + "target_index_bytes": canonical_bytes, + "dst_specs": (), + }, + )() + fake_owned_binding = type( + "_OwnedBindingStub", + (), + { + "binding_id": "owned-binding", + "binding_layout_id": "bl1:owned", + "layout": owned_layout, + "current_value": owned_binding_value, + "staged_value": None, + "last_materialization_diagnostics": { + "source": "disk", + "total_bytes": 8, + "retry_reason_buckets": {}, + }, + "last_execution_diagnostics": None, + "last_source_bound_plan_diagnostics": None, + }, + )() def _fake_execute_bind_into(self, target_tensors, **kwargs): del self @@ -807,7 +854,16 @@ def _fake_execute_bind_into(self, target_tensors, **kwargs): captured.update(kwargs) return fake_binding + owned_captured: dict[str, object] = {} + + def _fake_execute_bind_owned(self, device, **kwargs): + del self + owned_captured["device"] = device + owned_captured.update(kwargs) + return fake_owned_binding + monkeypatch.setattr(Artifact, "_execute_bind_into", _fake_execute_bind_into) + monkeypatch.setattr(Artifact, "_execute_bind_owned", _fake_execute_bind_owned) tensor_handle = artifact.realize(ArtifactRealizationSpec.tensor_dict(device="cpu")) adopted_handle = artifact.realize( @@ -817,29 +873,49 @@ def _fake_execute_bind_into(self, target_tensors, **kwargs): packing="byte_space", ) ) + owned_handle = artifact.realize( + ArtifactRealizationSpec.binding( + device="cuda:0", + mapping=copy_plan, + packing="byte_space", + ) + ) tensor_report = tensor_handle.report adopted_report = adopted_handle.report + owned_report = owned_handle.report assert adopted_handle.binding() is fake_binding assert captured["mapping"] == copy_plan + assert owned_handle.binding() is fake_owned_binding + assert owned_captured["mapping"] == copy_plan assert ( adopted_report.source_selection_digest == tensor_report.source_selection_digest ) + assert owned_report.source_selection_digest == tensor_report.source_selection_digest assert adopted_report.target_layout_digest assert adopted_report.copy_plan_digest + assert owned_report.target_layout_digest == "binding-layout:bl1:owned" + assert owned_report.copy_plan_digest == "mapped:v1:owned-target" + assert owned_report.representation_admission is not None + assert owned_report.representation_admission.transform_required is True assert adopted_report.target_layout_digest != adopted_report.source_selection_digest assert adopted_report.copy_plan_digest != adopted_report.target_layout_digest + assert owned_report.target_layout_digest != owned_report.source_selection_digest + assert owned_report.copy_plan_digest != owned_report.target_layout_digest assert str(adopted_report.copy_plan_digest).startswith("mapped:v1:") assert adopted_report.target_plan is not None + assert owned_report.target_plan is not None assert adopted_report.target_plan.target_layout_digest == ( adopted_report.target_layout_digest ) assert ( adopted_report.target_plan.copy_plan_digest == adopted_report.copy_plan_digest ) + assert owned_report.target_plan.copy_plan_digest == owned_report.copy_plan_digest tensor_handle.close() adopted_handle.close() + owned_handle.close() def test_tensor_into_materializes_subset_only(): @@ -971,6 +1047,27 @@ def test_subset_clone_handles_multiple_identifiers(): assert clone.tensor_names == ("foo",) +def test_key_mapping_generation_flows_into_realization_selection_digest(): + canonical_bytes, payload = _build_payload({"foo": torch.ones(1)}) + runtime = _RuntimeStub(_ClientStub(canonical_bytes)) + runtime.cache_key_mapping("mapped-v7", artifact_id="aid", generation=7) + runtime.cache_key_mapping("mapped-v8", artifact_id="aid", generation=8) + store = _StoreStub(runtime, _PipelineStub(payload)) + + selection_v7 = Artifact( + store_ref=_store_ref(store), + key="mapped-v7", + )._resolve_realization_selection() + selection_v8 = Artifact( + store_ref=_store_ref(store), + key="mapped-v8", + )._resolve_realization_selection() + + assert selection_v7.generation_hint == 7 + assert selection_v8.generation_hint == 8 + assert selection_v7.source_selection_digest != selection_v8.source_selection_digest + + def test_describe_uses_cached_generation_without_fetch(): canonical_bytes, payload = _build_payload({"foo": torch.ones(1)}) runtime = _RuntimeStub(_ClientStub(canonical_bytes)) diff --git a/tests/python/api/test_artifact_tensor_subset.py b/tests/python/api/test_artifact_tensor_subset.py index 71cb8578..1725985b 100644 --- a/tests/python/api/test_artifact_tensor_subset.py +++ b/tests/python/api/test_artifact_tensor_subset.py @@ -11,8 +11,8 @@ from tensorcast.api.store.artifact import Artifact from tensorcast.api.store.cache import ArtifactCache from tensorcast.api.store.common import canonical_index_from_bytes -from tensorcast.api.store.types import ArtifactError, StoreOptions from tensorcast.api.store.retry import build_retry_policies +from tensorcast.api.store.types import ArtifactError, StoreOptions def _canonical_index_bytes() -> bytes: diff --git a/tests/python/api/test_mapped_binding.py b/tests/python/api/test_mapped_binding.py index 1a10bd1c..2c906586 100644 --- a/tests/python/api/test_mapped_binding.py +++ b/tests/python/api/test_mapped_binding.py @@ -819,7 +819,7 @@ def test_bind_into_mapping_propagates_collective_hint_in_operation_id( import tensorcast.api._device as device_mod artifact_mod = importlib.import_module("tensorcast.api.store.artifact") - store_mod = importlib.import_module("tensorcast.api.store.__init__") + store_mod = importlib.import_module("tensorcast.api.store") monkeypatch.setattr(device_mod, "device_uuid_for", lambda device_id: "gpu-0") monkeypatch.setattr(artifact_mod, "device_uuid_for", lambda device_id: "gpu-0") diff --git a/tests/python/api/test_plan_spec.py b/tests/python/api/test_plan_spec.py index e07fc582..021d563d 100644 --- a/tests/python/api/test_plan_spec.py +++ b/tests/python/api/test_plan_spec.py @@ -25,7 +25,7 @@ from tensorcast.api.store import ( BuilderMode, RepresentationPublishSpec, - ServingBuildIntent, + RuntimeArtifactBuildIntent, build_pure_transform_publication_bundle_from_registered_artifact, ) from tensorcast.api.store.artifact import Artifact @@ -44,13 +44,64 @@ from tensorcast.proto.common.v1 import common_pb2 from tensorcast.proto.node_agent.v1 import node_agent_pb2 from tensorcast.proto.plan.v1 import plan_pb2 -from tensorcast.types import build_serving_manifest_ref +from tensorcast.types import ( + RealizationTarget, + RuntimeBindingMemberRef, + RuntimeBindingResolvedLayout, + RuntimeBindingSourceRef, + RuntimeBindingSourceReuseDecision, + RuntimeTopologyRef, + build_serving_manifest_ref, +) def _canonical_index_bytes() -> bytes: return b'{"w":[0,4,[1],[1],"torch.float32",0]}' +def _realization_target() -> RealizationTarget: + topology = RuntimeTopologyRef(schema_topology_digest="topology-schema") + member = RuntimeBindingMemberRef( + member_id="member-0", + member_index=0, + member_count=1, + group_id="group-1", + ) + source = RuntimeBindingSourceRef( + source_kind="checkpoint_artifact", + artifact_selection_digest="selection-digest", + source_artifact_ref="mi2:source", + source_schema_hash="source-schema", + ) + resolved_layout = RuntimeBindingResolvedLayout( + binding_layout_id="layout-1", + source=source, + source_reuse=RuntimeBindingSourceReuseDecision( + mode="checkpoint_to_runtime", + representation_contract_hash="repr-contract", + ), + topology=topology, + member=member, + target_layout=b"target-layout", + target_index_bytes=b"target-index", + target_layout_hash="target-layout-hash", + tensor_schema_hash="tensor-schema", + spec_digest="spec-digest", + source_schema_hash="source-schema", + ) + return RealizationTarget( + runtime="vllm", + device="cuda:0", + device_uuid="GPU-0", + source=source, + topology=topology, + member=member, + model_config_digest="model-config", + runtime_build_digest="serving-build", + resolved_layout=resolved_layout, + ) + + def _sample_publish_manifest() -> PublishManifest: artifact_manifest = ManifestResult.from_artifact_selections( engine_request_id="rid-transfer", @@ -148,6 +199,33 @@ def test_plan_view_selection_hash_populated() -> None: assert list(selection.tensor_names) == ["w"] +def test_plan_prefetch_accepts_realization_target() -> None: + store = _StoreStub() + canonical_bytes = _canonical_index_bytes() + artifact = Artifact( + store_ref=_store_ref(store), + artifact_id="mi2:target-test", + canonical_index_bytes=canonical_bytes, + canonical_index=canonical_index_from_bytes(canonical_bytes), + ) + target = _realization_target() + plan = Plan(CallContext(request_id="req-target")) + worker = Worker( + worker_id="worker-target", + daemon_address="127.0.0.1:50051", + daemon_id="daemon-target", + ) + + ref = plan.on_worker(worker).prefetch(artifact, target=target) + + spec = plan.to_spec() + assert spec.steps[0].step_id == ref.step_id + prefetch = spec.steps[0].action.prefetch + assert prefetch.HasField("serving_binding_target") + assert prefetch.serving_binding_target.runtime == "vllm" + assert prefetch.serving_binding_target.member.member_id == "member-0" + + def test_plan_publish_serializes_canonical_action() -> None: ctx = CallContext(request_id="req-cache", idempotency_key="idem-cache") plan = Plan(ctx) @@ -233,7 +311,7 @@ def test_plan_transform_register_pure_transform_builds_repo_owned_spec() -> None step_ref = plan.on_instance(inst).transform_register_pure_transform( artifact, - build_intent=ServingBuildIntent( + build_intent=RuntimeArtifactBuildIntent( builder_mode=BuilderMode.PURE_TRANSFORM, framework_name="torch", adapter_version="adapter-v7", @@ -681,7 +759,7 @@ def test_plan_result_decodes_pure_transform_publication_result() -> None: lease=None, ) bundle = build_pure_transform_publication_bundle_from_registered_artifact( - build_intent=ServingBuildIntent( + build_intent=RuntimeArtifactBuildIntent( representation_contract_hash="bafkrepresentation", builder_mode=BuilderMode.PURE_TRANSFORM, framework_name="torch", diff --git a/tests/python/api/test_prefetch_operation.py b/tests/python/api/test_prefetch_operation.py index ffc3ea22..24667ea1 100644 --- a/tests/python/api/test_prefetch_operation.py +++ b/tests/python/api/test_prefetch_operation.py @@ -29,16 +29,16 @@ BindingValueRef, BindingValueVerificationState, GroupRealizationAcquireRef, - PrefetchedServingBinding, - PrefetchedServingBindingSet, - ServingBindingMemberRef, - ServingBindingResolvedLayout, - ServingBindingSetTarget, - ServingBindingSourceMemberRef, - ServingBindingSourceRef, - ServingBindingSourceReuseDecision, - ServingBindingTarget, - ServingTopologyRef, + PrefetchHandoff, + PrefetchHandoffSet, + RealizationTarget, + RealizationTargetSet, + RuntimeBindingMemberRef, + RuntimeBindingResolvedLayout, + RuntimeBindingSourceMemberRef, + RuntimeBindingSourceRef, + RuntimeBindingSourceReuseDecision, + RuntimeTopologyRef, ) @@ -85,12 +85,12 @@ def release_replica(self, ticket: store_daemon_pb2.ReplicaTicket): def _prefetched_binding( self, - target: ServingBindingTarget, + target: RealizationTarget, *, readiness: object, staged_value: bool = False, wait_for_publish: bool = False, - ) -> PrefetchedServingBinding: + ) -> PrefetchHandoff: device_uuid = str(target.device_uuid or "GPU-0") suffix = target.member.member_index + 1 value_id = f"staged-value-{suffix}" if staged_value else f"value-{suffix}" @@ -111,7 +111,7 @@ def _prefetched_binding( scope_digest=f"scope-digest-{suffix}", expires_at_ms=1234, ) - return PrefetchedServingBinding( + return PrefetchHandoff( local_serving_ref=f"binding-local:binding-{suffix}:{value_id}", binding_value_ref=binding_ref, daemon_id="daemon-1", @@ -141,12 +141,12 @@ def _prefetched_binding( def prefetch_serving_binding(self, **kwargs): self.prefetch_binding_calls.append(kwargs) - target = cast(ServingBindingTarget | ServingBindingSetTarget, kwargs["target"]) + target = cast(RealizationTarget | RealizationTargetSet, kwargs["target"]) operation_id = str(kwargs.get("operation_id") or "prefetch-binding-op") readiness = kwargs["requested_readiness"] - if isinstance(target, ServingBindingSetTarget): - staged_members = target.source.source_kind == "serving_artifact_set" - result = PrefetchedServingBindingSet( + if isinstance(target, RealizationTargetSet): + staged_members = target.source.source_kind == "runtime_artifact_set" + result = PrefetchHandoffSet( runtime=target.runtime, topology=target.topology, group_id=target.group_id, @@ -224,10 +224,10 @@ def _store_ref(store: _Store) -> Any: return cast(Any, weakref.ref(store)) -def _serving_target( +def _realization_target( *, - topology: ServingTopologyRef | None = None, - source: ServingBindingSourceRef | None = None, + topology: RuntimeTopologyRef | None = None, + source: RuntimeBindingSourceRef | None = None, member_id: str = "member-0", member_index: int = 0, member_count: int = 1, @@ -238,25 +238,25 @@ def _serving_target( target_index_bytes: bytes = b"target-index", target_layout_hash: str = "target-layout-hash", spec_digest: str = "spec-digest", -) -> ServingBindingTarget: - topology = topology or ServingTopologyRef(schema_topology_digest="topology-schema") - member = ServingBindingMemberRef( +) -> RealizationTarget: + topology = topology or RuntimeTopologyRef(schema_topology_digest="topology-schema") + member = RuntimeBindingMemberRef( member_id=member_id, member_index=member_index, member_count=member_count, group_id="group-1", ) - source = source or ServingBindingSourceRef( + source = source or RuntimeBindingSourceRef( source_kind="checkpoint_artifact", artifact_selection_digest="selection-digest", source_artifact_ref="mi2:source", source_schema_hash="source-schema", ) - source_reuse = ServingBindingSourceReuseDecision( - mode="checkpoint_to_serving", + source_reuse = RuntimeBindingSourceReuseDecision( + mode="checkpoint_to_runtime", representation_contract_hash="repr-contract", ) - resolved_layout = ServingBindingResolvedLayout( + resolved_layout = RuntimeBindingResolvedLayout( binding_layout_id=binding_layout_id, source=source, source_reuse=source_reuse, @@ -271,7 +271,7 @@ def _serving_target( copy_plan_bytes=b"copy-plan", dst_specs_bytes=b"dst-specs", ) - return ServingBindingTarget( + return RealizationTarget( runtime="vllm", device=device, device_uuid=device_uuid, @@ -279,20 +279,20 @@ def _serving_target( topology=topology, member=member, model_config_digest="model-config", - serving_build_digest="serving-build", + runtime_build_digest="serving-build", resolved_layout=resolved_layout, ) -def _serving_target_set() -> ServingBindingSetTarget: - topology = ServingTopologyRef(schema_topology_digest="topology-schema") - source = ServingBindingSourceRef( +def _realization_target_set() -> RealizationTargetSet: + topology = RuntimeTopologyRef(schema_topology_digest="topology-schema") + source = RuntimeBindingSourceRef( source_kind="checkpoint_artifact", artifact_selection_digest="selection-digest", source_artifact_ref="mi2:source", source_schema_hash="source-schema", ) - target_0 = _serving_target( + target_0 = _realization_target( topology=topology, source=source, member_id="member-0", @@ -306,7 +306,7 @@ def _serving_target_set() -> ServingBindingSetTarget: target_layout_hash="target-layout-hash-0", spec_digest="spec-digest-0", ) - target_1 = _serving_target( + target_1 = _realization_target( topology=topology, source=source, member_id="member-1", @@ -320,7 +320,7 @@ def _serving_target_set() -> ServingBindingSetTarget: target_layout_hash="target-layout-hash-1", spec_digest="spec-digest-1", ) - return ServingBindingSetTarget( + return RealizationTargetSet( runtime="vllm", source=source, topology=topology, @@ -329,37 +329,37 @@ def _serving_target_set() -> ServingBindingSetTarget: ) -def _serving_artifact_set_target_set() -> ServingBindingSetTarget: - topology = ServingTopologyRef(schema_topology_digest="topology-schema") - member_0 = ServingBindingMemberRef( +def _serving_artifact_realization_target_set() -> RealizationTargetSet: + topology = RuntimeTopologyRef(schema_topology_digest="topology-schema") + member_0 = RuntimeBindingMemberRef( member_id="member-0", member_index=0, member_count=2, group_id="group-1", ) - member_1 = ServingBindingMemberRef( + member_1 = RuntimeBindingMemberRef( member_id="member-1", member_index=1, member_count=2, group_id="group-1", ) - source = ServingBindingSourceRef( - source_kind="serving_artifact_set", + source = RuntimeBindingSourceRef( + source_kind="runtime_artifact_set", artifact_selection_digest="artifact-set-selection", source_schema_hash="source-schema", topology=topology, members=( - ServingBindingSourceMemberRef( + RuntimeBindingSourceMemberRef( member=member_0, artifact_ref="mi2:serving-member-0", ), - ServingBindingSourceMemberRef( + RuntimeBindingSourceMemberRef( member=member_1, artifact_ref="mi2:serving-member-1", ), ), ) - target_0 = _serving_target( + target_0 = _realization_target( topology=topology, source=source, member_id=member_0.member_id, @@ -373,7 +373,7 @@ def _serving_artifact_set_target_set() -> ServingBindingSetTarget: target_layout_hash="target-layout-hash-0", spec_digest="spec-digest-0", ) - target_1 = _serving_target( + target_1 = _realization_target( topology=topology, source=source, member_id=member_1.member_id, @@ -387,7 +387,7 @@ def _serving_artifact_set_target_set() -> ServingBindingSetTarget: target_layout_hash="target-layout-hash-1", spec_digest="spec-digest-1", ) - return ServingBindingSetTarget( + return RealizationTargetSet( runtime="vllm", source=source, topology=topology, @@ -518,11 +518,11 @@ def test_realize_async_prefetch_targets_emit_report_shaped_profile_events( ) _ = replica_op.result(timeout_s=1.0) retained_op = artifact.realize_async( - ArtifactRealizationSpec.retained_binding(target=_serving_target()) + ArtifactRealizationSpec.retained_binding(target=_realization_target()) ) _ = retained_op.result(timeout_s=1.0) target_set_op = artifact.realize_async( - ArtifactRealizationSpec.target_set(target=_serving_target_set()) + ArtifactRealizationSpec.target_set(target=_realization_target_set()) ) _ = target_set_op.result(timeout_s=1.0) @@ -551,7 +551,7 @@ def test_realize_async_prefetch_targets_emit_report_shaped_profile_events( def test_realize_async_retained_binding_completed_operation_status_and_cancel() -> None: store = _Store() artifact = Artifact(store_ref=_store_ref(store), artifact_id="aid") - target = _serving_target() + target = _realization_target() op = artifact.realize_async(ArtifactRealizationSpec.retained_binding(target=target)) @@ -559,18 +559,18 @@ def test_realize_async_retained_binding_completed_operation_status_and_cancel() assert op.done() is True assert op.cancel() is False result = op.result(timeout_s=1.0) - assert isinstance(result, PrefetchedServingBinding) + assert isinstance(result, PrefetchHandoff) def test_realize_async_retained_binding_attaches_report_to_result() -> None: store = _Store() artifact = Artifact(store_ref=_store_ref(store), artifact_id="aid") - target = _serving_target() + target = _realization_target() op = artifact.realize_async(ArtifactRealizationSpec.retained_binding(target=target)) result = op.result(timeout_s=1.0) - assert isinstance(result, PrefetchedServingBinding) + assert isinstance(result, PrefetchHandoff) assert store._runtime.ensure_client().prefetch_binding_calls assert result.report is not None report = cast(ArtifactRealizationReport, result.report) @@ -598,19 +598,19 @@ def test_realize_async_retained_binding_attaches_report_to_result() -> None: assert retained.binding_layout_id == "layout-1" assert retained.binding_value_id == "value-1" assert retained.reservation_bytes == 1024 - assert retained.readiness == "serving_local_ready" + assert retained.readiness == "runtime_local_ready" assert retained.verification_state == "local_only" def test_realize_async_retained_binding_set_attaches_target_set_report() -> None: store = _Store() artifact = Artifact(store_ref=_store_ref(store), artifact_id="aid") - target = _serving_target_set() + target = _realization_target_set() op = artifact.realize_async(ArtifactRealizationSpec.target_set(target=target)) result = op.result(timeout_s=1.0) - assert isinstance(result, PrefetchedServingBindingSet) + assert isinstance(result, PrefetchHandoffSet) assert store._runtime.ensure_client().prefetch_binding_calls assert result.report is not None report = cast(ArtifactRealizationReport, result.report) @@ -668,22 +668,22 @@ def test_retained_binding_realization_rejects_target_set_bypass() -> None: with pytest.raises( tc.ArtifactError, - match="ServingBindingSetTarget requires target_set realization", + match="RealizationTargetSet requires target_set realization", ): artifact.realize_async( - ArtifactRealizationSpec.retained_binding(target=_serving_target_set()) + ArtifactRealizationSpec.retained_binding(target=_realization_target_set()) ) def test_realize_async_target_set_per_part_selection_reports_group_lifecycle() -> None: store = _Store() artifact = Artifact(store_ref=_store_ref(store), artifact_id="aid") - target = _serving_artifact_set_target_set() + target = _serving_artifact_realization_target_set() op = artifact.realize_async(ArtifactRealizationSpec.target_set(target=target)) result = op.result(timeout_s=1.0) - assert isinstance(result, PrefetchedServingBindingSet) + assert isinstance(result, PrefetchHandoffSet) assert result.report is not None report = cast(ArtifactRealizationReport, result.report) assert report.target_kind == "target_set" @@ -692,7 +692,7 @@ def test_realize_async_target_set_per_part_selection_reports_group_lifecycle() - "release_group_staged_acquire", ) assert report.target_set is not None - assert report.target_set.source_kind == "serving_artifact_set" + assert report.target_set.source_kind == "runtime_artifact_set" assert report.target_set.source_selection_mode == "per_part_selection" assert report.target_set.publish_barrier is True assert report.target_set.group_realization_transaction_ids == ("txn-1",) @@ -729,12 +729,12 @@ def test_realize_async_target_set_per_part_selection_reports_group_lifecycle() - def test_prefetch_target_set_uses_target_set_realization_spec() -> None: store = _Store() artifact = Artifact(store_ref=_store_ref(store), artifact_id="aid") - target = _serving_target_set() + target = _realization_target_set() op = artifact.prefetch(target=target) result = op.result(timeout_s=1.0) - assert isinstance(result, PrefetchedServingBindingSet) + assert isinstance(result, PrefetchHandoffSet) assert store._runtime.ensure_client().prefetch_binding_calls[0]["target"] == target report = cast(ArtifactRealizationReport, result.report) assert report.target_kind == "target_set" diff --git a/tests/python/api/test_public_surface.py b/tests/python/api/test_public_surface.py index aa2624c4..0b0cf585 100644 --- a/tests/python/api/test_public_surface.py +++ b/tests/python/api/test_public_surface.py @@ -2,10 +2,92 @@ from __future__ import annotations +import importlib +import importlib.util import inspect +from pathlib import Path import tensorcast as tc -from tensorcast.api.store import Store +import tensorcast.artifact_runtime.diagnostics as tc_runtime_diagnostics +import tensorcast.artifact_runtime.readiness as tc_runtime_readiness +from tensorcast.api.store import ( + ArtifactRealizationHandle, + ArtifactRealizationReport, + ArtifactRealizationSpec, + PrefetchHandoff, + PrefetchHandoffMemberFailure, + PrefetchHandoffSet, + RealizationTarget, + RealizationTargetSet, + RuntimeArtifactBuildIntent, + RuntimeArtifactManifest, + RuntimeArtifactPolicy, + RuntimeBindingMemberRef, + RuntimeBindingReadiness, + RuntimeBindingResolvedLayout, + RuntimeBindingSourceKind, + RuntimeBindingSourceMemberRef, + RuntimeBindingSourceRef, + RuntimeBindingSourceReuseDecision, + RuntimeBindingSourceReuseMode, + RuntimeRealizationSpecCacheEntry, + RuntimeTopologyRef, + Store, +) +from tensorcast.artifact_runtime.attachment import ( + RuntimeAttachment, + RuntimeBindingState, +) +from tensorcast.artifact_runtime.config import ( + RuntimeArtifactLocator, + RuntimeStartPlanError, + TensorCastRuntimeConfig, + plan_runtime_start, +) +from tensorcast.artifact_runtime.diagnostics import RuntimeRealizationReport +from tensorcast.artifact_runtime.host import ( + RuntimeAdmissionDecision, + RuntimeAdmissionPolicy, + RuntimeAdmissionRequest, + RuntimeHostCapabilities, + RuntimePlacement, + RuntimeProfile, + RuntimeTensorView, +) +from tensorcast.artifact_runtime.intent import RuntimeRequestContext +from tensorcast.artifact_runtime.locator import ArtifactLocator +from tensorcast.artifact_runtime.policy import RuntimePolicy +from tensorcast.artifact_runtime.publication.actions import ( + RuntimeReplicaPublicationSettings, + project_runtime_replica_publication_state, + publish_runtime_replica, + retire_runtime_replica, + runtime_replica_publication_settings, +) +from tensorcast.artifact_runtime.reload import ( + merge_runtime_reload_extra_config, + normalize_runtime_reload_request_payload, + reload_runtime_attachment, +) +from tensorcast.artifact_runtime.state import ( + ModelAttributeNames, + ModelAttributeRuntimeState, + OneShotRuntimeHook, +) +from tensorcast.artifact_runtime.view import ( + BindingValueRefProjection, + RuntimeEndpointProjection, + RuntimeWorkerView, + SourceSelectionProjection, + WeightVersionProjection, + aggregate_runtime_view_outputs, +) +from tensorcast.retained_realization import ( + RetainedRealizationClaim, + RetainedRealizationExpectedDigests, + retained_realization_claim_extra_from_handoff, + retained_realization_claim_extra_json_from_handoff, +) def test_tensorcast_exports_artifact_helpers() -> None: @@ -13,6 +95,267 @@ def test_tensorcast_exports_artifact_helpers() -> None: assert callable(tc.artifact) assert hasattr(tc, "artifact_async") assert callable(tc.artifact_async) + assert tc.ArtifactRealizationSpec is ArtifactRealizationSpec + assert tc.ArtifactRealizationHandle is ArtifactRealizationHandle + assert tc.ArtifactRealizationReport is ArtifactRealizationReport + assert "ArtifactRealizationSpec" in tc.__all__ + assert tc.RetainedRealizationClaim is RetainedRealizationClaim + assert "RetainedRealizationClaim" in tc.__all__ + assert tc.RetainedRealizationExpectedDigests is RetainedRealizationExpectedDigests + assert "RetainedRealizationExpectedDigests" in tc.__all__ + assert callable(tc.parse_retained_realization_claim) + assert ( + tc.retained_realization_claim_extra_from_handoff + is retained_realization_claim_extra_from_handoff + ) + assert "retained_realization_claim_extra_from_handoff" in tc.__all__ + assert ( + tc.retained_realization_claim_extra_json_from_handoff + is retained_realization_claim_extra_json_from_handoff + ) + assert "retained_realization_claim_extra_json_from_handoff" in tc.__all__ + assert tc.RuntimeAttachment is RuntimeAttachment + assert tc.RuntimeBindingState is RuntimeBindingState + assert tc.RuntimeRequestContext is RuntimeRequestContext + assert tc.RuntimeAdmissionDecision is RuntimeAdmissionDecision + assert tc.RuntimeAdmissionPolicy is RuntimeAdmissionPolicy + assert tc.RuntimeAdmissionRequest is RuntimeAdmissionRequest + assert tc.RuntimeHostCapabilities is RuntimeHostCapabilities + assert tc.RuntimePlacement is RuntimePlacement + assert tc.RuntimeProfile is RuntimeProfile + assert tc.RuntimeTensorView is RuntimeTensorView + assert tc.ArtifactLocator is ArtifactLocator + assert tc.RuntimeArtifactLocator is RuntimeArtifactLocator + assert tc.RuntimePolicy is RuntimePolicy + assert tc.RuntimeRealizationReport is RuntimeRealizationReport + assert tc.RuntimeArtifactBuildIntent is RuntimeArtifactBuildIntent + assert tc.RuntimeArtifactManifest is RuntimeArtifactManifest + assert tc.RuntimeArtifactPolicy is RuntimeArtifactPolicy + assert tc.RealizationTarget is RealizationTarget + assert tc.RealizationTargetSet is RealizationTargetSet + assert tc.RuntimeBindingMemberRef is RuntimeBindingMemberRef + assert tc.RuntimeBindingReadiness is RuntimeBindingReadiness + assert tc.RuntimeBindingResolvedLayout is RuntimeBindingResolvedLayout + assert tc.RuntimeBindingSourceKind is RuntimeBindingSourceKind + assert tc.RuntimeBindingSourceMemberRef is RuntimeBindingSourceMemberRef + assert tc.RuntimeBindingSourceRef is RuntimeBindingSourceRef + assert tc.RuntimeBindingSourceReuseDecision is RuntimeBindingSourceReuseDecision + assert tc.RuntimeBindingSourceReuseMode is RuntimeBindingSourceReuseMode + assert tc.RuntimeRealizationSpecCacheEntry is RuntimeRealizationSpecCacheEntry + assert tc.RuntimeTopologyRef is RuntimeTopologyRef + assert tc.PrefetchHandoff is PrefetchHandoff + assert tc.PrefetchHandoffMemberFailure is PrefetchHandoffMemberFailure + assert tc.PrefetchHandoffSet is PrefetchHandoffSet + for removed_name in ( + "ServingBindingTarget", + "ServingBindingSetTarget", + "PrefetchedServingBinding", + "PrefetchedServingBindingSet", + "ServingBuildIntent", + "ServingArtifactManifest", + "ServingRuntimePolicy", + "ServingRealizationReport", + "ServingBindingMemberRef", + "ServingBindingReadiness", + "ServingBindingResolvedLayout", + "ServingBindingResolvedSpecCacheEntry", + "ServingBindingSourceKind", + "ServingBindingSourceMemberRef", + "ServingBindingSourceRef", + "ServingBindingSourceReuseDecision", + "ServingBindingSourceReuseMode", + "ServingTopologyRef", + "ServingAdmissionFacts", + "ServingPublicationSubject", + "ServingSupportLevel", + "PreparedServingRegistration", + "RegisteredServingPublication", + "CapabilityDirectoryClient", + "CapabilityDirectoryOptions", + "RegisteredRuntimeArtifactPublication", + "PreparedRuntimeArtifactRegistration", + "RuntimePublicationSubject", + "RuntimeAdmissionFacts", + "RuntimeSupportLevel", + "build_serving_publication_bundle", + "build_serving_publication_bundle_from_registered_artifact", + "build_runtime_artifact_publication_bundle", + "build_runtime_artifact_publication_bundle_from_registered_artifact", + "build_serving_manifest_ref", + "SERVING_BUILD_DIGEST_VERSION", + "compute_serving_tensor_schema_hash", + "compute_runtime_artifact_tensor_schema_hash", + "count_canonical_serving_tensors", + "count_canonical_runtime_tensors", + "prepare_pure_transform_serving_registration", + "prepare_binding_finalize_serving_registration", + "prepare_serving_registration", + "prepare_pure_transform_runtime_registration", + "prepare_binding_finalize_runtime_registration", + "prepare_runtime_artifact_registration", + "parse_serving_manifest_ref", + "plan_serving_binding_source_reuse", + "retained_realization_claim_extra_from_prefetched_binding", + "retained_realization_claim_extra_json", + ): + assert removed_name not in tc.__all__ + assert not hasattr(tc, removed_name) + assert tc.RuntimeStartPlanError is RuntimeStartPlanError + assert tc.TensorCastRuntimeConfig is TensorCastRuntimeConfig + assert tc.plan_runtime_start is plan_runtime_start + assert tc.runtime is importlib.import_module("tensorcast.runtime") + assert "runtime" in tc.__all__ + assert "serving" not in tc.__all__ + assert not hasattr(tc, "serving") + assert "RuntimeAttachment" in tc.__all__ + assert "RuntimeBindingState" in tc.__all__ + assert "RuntimeRequestContext" in tc.__all__ + assert "RuntimeAdmissionDecision" in tc.__all__ + assert "RuntimeAdmissionPolicy" in tc.__all__ + assert "RuntimeAdmissionRequest" in tc.__all__ + assert "RuntimeHostCapabilities" in tc.__all__ + assert "RuntimePlacement" in tc.__all__ + assert "RuntimeProfile" in tc.__all__ + assert "RuntimeTensorView" in tc.__all__ + assert "ArtifactLocator" in tc.__all__ + assert "RuntimeArtifactLocator" in tc.__all__ + assert "RuntimePolicy" in tc.__all__ + assert "RuntimeRealizationReport" in tc.__all__ + + assert "RuntimeArtifactBuildIntent" in tc.__all__ + assert "RuntimeArtifactManifest" in tc.__all__ + assert "RuntimeArtifactPolicy" in tc.__all__ + assert "RealizationTarget" in tc.__all__ + assert "RealizationTargetSet" in tc.__all__ + assert "RuntimeBindingMemberRef" in tc.__all__ + assert "RuntimeBindingReadiness" in tc.__all__ + assert "RuntimeBindingResolvedLayout" in tc.__all__ + assert "RuntimeBindingSourceKind" in tc.__all__ + assert "RuntimeBindingSourceMemberRef" in tc.__all__ + assert "RuntimeBindingSourceRef" in tc.__all__ + assert "RuntimeBindingSourceReuseDecision" in tc.__all__ + assert "RuntimeBindingSourceReuseMode" in tc.__all__ + assert "RuntimeRealizationSpecCacheEntry" in tc.__all__ + assert "RuntimeTopologyRef" in tc.__all__ + assert "PrefetchHandoff" in tc.__all__ + assert "PrefetchHandoffMemberFailure" in tc.__all__ + assert "PrefetchHandoffSet" in tc.__all__ + assert "RuntimeStartPlanError" in tc.__all__ + assert "TensorCastRuntimeConfig" in tc.__all__ + assert "plan_runtime_start" in tc.__all__ + assert "ServingRealizationReport" not in tc_runtime_diagnostics.__all__ + assert not hasattr(tc_runtime_diagnostics, "ServingRealizationReport") + for removed_module in ( + "tensorcast.serving", + "tensorcast.serving.runtime", + "tensorcast.serving.diagnostics", + "tensorcast.serving.config", + "tensorcast.serving.contract", + "tensorcast.serving.hosts", + "tensorcast.serving.readiness", + "tensorcast.serving.runtime_attachment", + "tensorcast.serving.runtime_config", + "tensorcast.serving.runtime_contract", + "tensorcast.serving.runtime_intent", + "tensorcast.serving.runtime_view", + "tensorcast.serving.policy", + "tensorcast.serving.session", + "tensorcast.serving.source_catalog", + "tensorcast.serving.state", + "tensorcast.serving._runtime_impl", + "tensorcast.serving._runtime_impl.lifecycle", + "tensorcast.serving.admin", + "tensorcast.serving.artifact_manifest", + "tensorcast.serving.binding_runtime", + "tensorcast.serving.builder", + "tensorcast.serving.builder.tensor_parity", + "tensorcast.serving.dto", + "tensorcast.serving.errors", + "tensorcast.serving.local_ready", + "tensorcast.serving.replica_publication", + "tensorcast.serving.resolver", + "tensorcast.serving.retained_binding", + "tensorcast.serving.testing", + ): + try: + spec = importlib.util.find_spec(removed_module) + except ModuleNotFoundError: + spec = None + assert spec is None + assert tc.ModelAttributeNames is ModelAttributeNames + assert tc.ModelAttributeRuntimeState is ModelAttributeRuntimeState + assert tc.OneShotRuntimeHook is OneShotRuntimeHook + assert "ModelAttributeNames" in tc.__all__ + assert "ModelAttributeRuntimeState" in tc.__all__ + assert "OneShotRuntimeHook" in tc.__all__ + assert tc.BindingValueRefProjection is BindingValueRefProjection + assert tc.RuntimeEndpointProjection is RuntimeEndpointProjection + assert tc.RuntimeWorkerView is RuntimeWorkerView + assert tc.SourceSelectionProjection is SourceSelectionProjection + assert tc.WeightVersionProjection is WeightVersionProjection + assert "BindingValueRefProjection" in tc.__all__ + assert "RuntimeEndpointProjection" in tc.__all__ + assert "RuntimeWorkerView" in tc.__all__ + assert "SourceSelectionProjection" in tc.__all__ + assert "WeightVersionProjection" in tc.__all__ + assert tc.aggregate_runtime_view_outputs is aggregate_runtime_view_outputs + assert "aggregate_runtime_view_outputs" in tc.__all__ + assert tc.publish_runtime_replica is publish_runtime_replica + assert tc.project_runtime_replica_publication_state is ( + project_runtime_replica_publication_state + ) + assert tc.retire_runtime_replica is retire_runtime_replica + assert "publish_runtime_replica" in tc.__all__ + assert "retire_runtime_replica" in tc.__all__ + assert tc.RuntimeReplicaPublicationSettings is (RuntimeReplicaPublicationSettings) + assert tc.runtime_replica_publication_settings is ( + runtime_replica_publication_settings + ) + assert "RuntimeReplicaPublicationSettings" in tc.__all__ + assert "runtime_replica_publication_settings" in tc.__all__ + assert tc.reload_runtime_attachment is reload_runtime_attachment + assert "reload_runtime_attachment" in tc.__all__ + assert tc.merge_runtime_reload_extra_config is merge_runtime_reload_extra_config + assert tc.normalize_runtime_reload_request_payload is ( + normalize_runtime_reload_request_payload + ) + assert "merge_runtime_reload_extra_config" in tc.__all__ + assert "normalize_runtime_reload_request_payload" in tc.__all__ + assert callable(tc_runtime_diagnostics.binding_layout_tensor_count) + assert tc_runtime_readiness.ReadinessInventoryAdmissionPolicy is not None + + +def test_public_sdk_surface_does_not_open_global_store_channels() -> None: + forbidden = ( + "GlobalStoreCompositeStub", + "tensorcast.global_store", + "global_store_pb2", + "global_store_pb2_grpc", + "tensorcast.proto.global_store", + "grpc.insecure_channel", + "grpc.secure_channel", + "grpc.aio.insecure_channel", + "grpc.aio.secure_channel", + ) + roots = ( + Path("tensorcast/__init__.py"), + Path("tensorcast/api"), + Path("tensorcast/artifact_runtime"), + Path("tensorcast/retained_realization.py"), + ) + checked = [ + path + for root in roots + for path in ([root] if root.is_file() else sorted(root.rglob("*.py"))) + ] + offenders = [ + f"{path}:{token}" + for path in checked + for token in forbidden + if token in path.read_text(encoding="utf-8") + ] + + assert offenders == [] def test_tensorcast_exports_programmable_primitives() -> None: diff --git a/tests/python/api/test_realization_kernel.py b/tests/python/api/test_realization_kernel.py index c72c6b9a..4a3da07e 100644 --- a/tests/python/api/test_realization_kernel.py +++ b/tests/python/api/test_realization_kernel.py @@ -35,6 +35,7 @@ envelope_for_target_set, envelope_for_tensor_dict, lifecycle_plan_for_envelope, + materialization_source_label, model_runtime_report_for, mounted_source_target_digest, publishability_report_for, @@ -65,7 +66,7 @@ BindingReservationCapability, BindingValueRef, GroupRealizationAcquireRef, - ServingBindingMemberRef, + RuntimeBindingMemberRef, ) @@ -95,6 +96,21 @@ def _canonical_index_bytes() -> bytes: ) +def test_materialization_source_label_uses_realization_report_vocabulary() -> None: + assert ( + materialization_source_label( + store_daemon_pb2.MATERIALIZATION_SOURCE_LOCAL_REPLICA + ) + == "local_replica" + ) + assert materialization_source_label( + store_daemon_pb2.MATERIALIZATION_SOURCE_P2P + ) == ("p2p") + assert materialization_source_label( + store_daemon_pb2.MATERIALIZATION_SOURCE_DISK + ) == ("disk") + + def test_resolve_artifact_selection_subset_digest_is_stable() -> None: index_bytes = _canonical_index_bytes() @@ -563,7 +579,7 @@ def retained_state( binding_value_id=f"value-{member_index}", seal_generation=1, ) - member = ServingBindingMemberRef( + member = RuntimeBindingMemberRef( member_id=f"member-{member_index}", member_index=member_index, member_count=2, @@ -588,7 +604,7 @@ def retained_state( daemon_session_id="session-1", device_uuid=f"GPU-{member_index}", reservation_bytes=reservation_bytes, - readiness="serving_local_ready", + readiness="runtime_local_ready", verification_state="local_only", staged_value=staged_value, group_realization_acquire=GroupRealizationAcquireRef( @@ -1267,7 +1283,7 @@ def test_model_runtime_report_wraps_runtime_attachment_report() -> None: artifact_id="mi2:test:serving", canonical_index_bytes=_canonical_index_bytes(), tensor_names=("a",), - artifact_profile="serving_artifact", + artifact_profile="runtime_artifact", authority_scope="daemon_mediated_runtime_attachment", ) target_plan = RealizationTargetPlan( @@ -1350,7 +1366,7 @@ def test_publication_spec_and_handle_facade_own_release_contract() -> None: publish_replica=lambda: None, size_bytes=1024, ) - spec = ArtifactRealizationSpec.publication(target=projection, timeout_s=5) + spec = ArtifactRealizationSpec._publication(target=projection, timeout_s=5) target_plan = RealizationTargetPlan( kind=spec.target_kind, target_layout_digest="layout-1", @@ -1502,6 +1518,7 @@ def test_binding_envelope_and_report_capture_identity_diagnostics() -> None: target_plan = RealizationTargetPlan( kind="binding_owned", device="cuda:0", + target_layout_digest="binding-layout:bl1:test", binding_layout_id="bl1:test", ) @@ -1543,6 +1560,9 @@ def test_binding_envelope_and_report_capture_identity_diagnostics() -> None: assert report.binding.binding_layout_id == "bl1:test" assert report.binding.binding_value_id == "value-1" assert report.binding.value_state == "current" + assert report.view_subset_hash == selection.view_subset_hash.hex() + assert report.logical_layout_hash == selection.logical_layout_hash.hex() + assert report.selection_hash == selection.selection_hash.hex() assert report.binding.publication_eligible is True assert report.binding.publish_requested is True assert report.binding.published is True @@ -1627,6 +1647,8 @@ def test_binding_envelope_and_report_capture_identity_diagnostics() -> None: assert execution_dict["planner_reject_reason_buckets"] == {"not_collective": 2} profile_payload = artifact_realization_profile_payload(report) + assert profile_payload["logical_layout_hash"] == selection.logical_layout_hash.hex() + assert profile_payload["selection_hash"] == selection.selection_hash.hex() assert profile_payload["execution_actual_executor_path"] == "mixed_collective" assert profile_payload["execution_residual_bytes"] == 4 assert profile_payload["execution_plan_kind"] == "collective_first_mixed" @@ -1710,6 +1732,7 @@ def test_local_ready_pending_verification_report_records_admission_state() -> No ) target_plan = RealizationTargetPlan( kind="binding_owned", + target_layout_digest="binding-layout:layout-local-ready", binding_layout_id="layout-local-ready", ) envelope = envelope_for_binding(binding, target_kind="binding_owned") @@ -1744,7 +1767,7 @@ def test_runtime_attachment_envelope_and_report_capture_release_contract() -> No artifact_id="mi2:test:serving", canonical_index_bytes=_canonical_index_bytes(), tensor_names=("a",), - artifact_profile="serving_artifact", + artifact_profile="runtime_artifact", authority_scope="daemon_mediated_runtime_attachment", ) target_plan = RealizationTargetPlan( @@ -1832,7 +1855,7 @@ def _target_set_retained_member(member_index: int) -> RealizationRetainedBinding reservation_bytes=1024, reservation_capability_id=f"cap-{suffix}", reservation_scope_digest=f"scope-{suffix}", - readiness="serving_local_ready", + readiness="runtime_local_ready", verification_state="local_only", ) @@ -1862,7 +1885,7 @@ def test_retained_binding_report_captures_capability_expiry() -> None: daemon_session_id="session-1", device_uuid="GPU-0", reservation_bytes=4096, - readiness="serving_local_ready", + readiness="runtime_local_ready", verification_state="local_only", expires_at_ms=4_102_444_800_000, ) @@ -1892,7 +1915,7 @@ def test_target_set_report_groups_retained_member_facts() -> None: reservation_bytes=1024, reservation_capability_id="cap-0", reservation_scope_digest="scope-0", - readiness="serving_local_ready", + readiness="runtime_local_ready", verification_state="local_only", staged_value=True, group_realization_transaction_id="txn-1", @@ -1915,7 +1938,7 @@ def test_target_set_report_groups_retained_member_facts() -> None: reservation_bytes=2048, reservation_capability_id="cap-1", reservation_scope_digest="scope-1", - readiness="serving_local_ready", + readiness="runtime_local_ready", verification_state="local_only", staged_value=True, group_realization_transaction_id="txn-1", @@ -1950,7 +1973,7 @@ def test_target_set_report_groups_retained_member_facts() -> None: runtime="vllm", group_id="group-1", topology=target.topology, - readiness="serving_local_ready", + readiness="runtime_local_ready", partial=False, member_failures=(), ) @@ -2040,6 +2063,7 @@ def test_reports_share_core_realization_fields_across_targets() -> None: binding_target = RealizationTargetPlan( kind="binding_owned", device="cuda:0", + target_layout_digest="binding-layout:bl1:test", binding_layout_id="bl1:test", ) binding_envelope = envelope_for_binding(binding, target_kind="binding_owned") @@ -2208,7 +2232,7 @@ def test_reports_share_core_realization_fields_across_targets() -> None: assert retained_profile["retained_binding_capability_expires_at_ms"] == ( 4_102_444_800_000, ) - assert retained_profile["retained_binding_readiness"] == ("serving_local_ready",) + assert retained_profile["retained_binding_readiness"] == ("runtime_local_ready",) assert retained_profile["retained_binding_verification_states"] == ("local_only",) @@ -2258,7 +2282,7 @@ def test_target_set_strategy_and_lifecycle_plans_capture_group_barriers() -> Non group_id="group-1", ), model_config_digest="model-config", - serving_build_digest="serving-build", + runtime_build_digest="serving-build", source=source, resolved_layout=SimpleNamespace( target_layout_hash="target-layout-0", @@ -2276,7 +2300,7 @@ def test_target_set_strategy_and_lifecycle_plans_capture_group_barriers() -> Non group_id="group-1", ), model_config_digest="model-config", - serving_build_digest="serving-build", + runtime_build_digest="serving-build", source=source, resolved_layout=SimpleNamespace( target_layout_hash="target-layout-1", @@ -2333,7 +2357,7 @@ def test_target_set_report_marks_serving_artifact_set_as_per_part() -> None: runtime="vllm", group_id="group-1", source=SimpleNamespace( - source_kind="serving_artifact_set", + source_kind="runtime_artifact_set", artifact_selection_digest="artifact-set-selection", source_artifact_ref=None, members=( @@ -2366,7 +2390,7 @@ def test_target_set_report_marks_serving_artifact_set_as_per_part() -> None: source_selection_digest="fallback-selection", ) - assert report.source_kind == "serving_artifact_set" + assert report.source_kind == "runtime_artifact_set" assert report.source_selection_mode == "per_part_selection" assert [member.source_artifact_ref for member in report.members] == [ "mi2:serving-member-0", @@ -2466,6 +2490,154 @@ def test_risk_labels_are_derived_from_target_plan_and_envelope() -> None: ) +_RISK_CLOSURE_MATRIX: tuple[dict[str, str], ...] = ( + { + "risk": "Selection resolver becomes too broad.", + "admission_field": "artifact_id/key exclusivity, view_id, generation_hint", + "envelope_field": "target_layout_digest remains target-plan owned", + "report_field": "source_selection_digest", + "guardrail_test": "test_resolve_artifact_selection_keeps_target_plan_identity_separate", + "blocking_condition": "target layout or copy-plan policy moves into selection", + }, + { + "risk": "SDK direct Global Store access survives behind helper APIs.", + "admission_field": "authority_scope", + "envelope_field": "owner_kind", + "report_field": "authority_scope", + "guardrail_test": "test_sdk_api_paths_do_not_open_global_store_channels", + "blocking_condition": "SDK artifact realization opens Global Store channels", + }, + { + "risk": "`PublicDiskSourceHandle` becomes a permanent source authority.", + "admission_field": "artifact_profile=mounted_source", + "envelope_field": "backing_kind=mounted_source_metadata", + "report_field": "mounted_source.source_artifact_id", + "guardrail_test": "test_mounted_source_realize_rejects_non_msa1_subject", + "blocking_condition": "mounted source executes without msa1 identity", + }, + { + "risk": "Mapped target layout is confused with source selection.", + "admission_field": "target_layout_digest", + "envelope_field": "projection_kind", + "report_field": "copy_plan_digest", + "guardrail_test": "test_resolve_artifact_selection_accepts_mapped_source_view_hint", + "blocking_condition": "mapped/adopted target reports reuse selection digest as layout", + }, + { + "risk": "TensorDict accidentally inherits binding lifecycle.", + "admission_field": "target_kind=tensor_dict", + "envelope_field": "projection_kind=tensor_dict", + "report_field": "publishability.reason", + "guardrail_test": "test_tensor_dict_handle_rejects_binding_lifecycle_capabilities", + "blocking_condition": "TensorDict handle can publish, promote, or retain", + }, + { + "risk": "TensorDict projections release daemon payloads too early or leak them.", + "admission_field": "release_strictness", + "envelope_field": "release_policy", + "report_field": "envelope.release_policy", + "guardrail_test": "test_tensor_subset_materialization_and_release", + "blocking_condition": "projection close does not unload daemon payload exactly once", + }, + { + "risk": "Resource lifecycle remains path-specific under a unified API.", + "admission_field": "release_strictness", + "envelope_field": "release_policy", + "report_field": "lifecycle_plan.capability", + "guardrail_test": "test_release_contract_lifecycle_matrix_runs_policy_actions_once", + "blocking_condition": "cleanup action exists outside a release contract", + }, + { + "risk": "Handle-lease mint failure silently weakens export lifetime.", + "admission_field": "export_lifetime_kind", + "envelope_field": "export_kind", + "report_field": "envelope.export_lifetime_kind", + "guardrail_test": "test_cpu_memfd_materialization_fails_before_tensor_restore_without_export_authority", + "blocking_condition": "CPU memfd or CUDA IPC export succeeds without token authority", + }, + { + "risk": "CPU TensorDict mutability stays ambiguous.", + "admission_field": "mutability_contract", + "envelope_field": "mutability_contract", + "report_field": "envelope.mutability_contract", + "guardrail_test": "test_tensor_dict_projection_rejects_mapping_mutations", + "blocking_condition": "TensorDict mapping mutation succeeds", + }, + { + "risk": "`get_into` hides expensive fallback copies.", + "admission_field": "fallback_policy", + "envelope_field": "fallback_reason_buckets", + "report_field": "copy_bytes", + "guardrail_test": "test_get_into_returns_fallback_result_and_unloads", + "blocking_condition": "temporary-payload fallback has no report bucket", + }, + { + "risk": "Prefetch grows a second continuation model.", + "admission_field": "operation_id", + "envelope_field": "projection_kind=prefetch_handoff", + "report_field": "operation_backend", + "guardrail_test": "test_realize_async_retained_replica_operation_status_wait_and_cancel", + "blocking_condition": "prefetch bypasses Operation status/wait/cancel", + }, + { + "risk": "Binding paths bypass strategy planning.", + "admission_field": "fallback_policy", + "envelope_field": "direct_write_bytes", + "report_field": "strategy_plan.fallback_policy", + "guardrail_test": "test_binding_envelope_and_report_capture_identity_diagnostics", + "blocking_condition": "binding materialization report lacks strategy facts", + }, + { + "risk": "Tensor-aware strategy loses lane/residual visibility.", + "admission_field": "execution_plan_kind", + "envelope_field": "temporary_replica_bytes", + "report_field": "execution_commit.lane_allocation_bytes", + "guardrail_test": "test_binding_envelope_and_report_capture_identity_diagnostics", + "blocking_condition": "mixed execution omits lane, residual, or reject buckets", + }, + { + "risk": "TP grows special-case orchestration.", + "admission_field": "target_set.source_selection_mode", + "envelope_field": "projection_kind=target_set", + "report_field": "target_set.members", + "guardrail_test": "test_group_member_same_and_per_part_selection_identity", + "blocking_condition": "TP path adds non-target-set realization state", + }, + { + "risk": "RPC cleanup is attempted too early.", + "admission_field": "controller plan validation", + "envelope_field": "resource_authorities", + "report_field": "controller plan spans", + "guardrail_test": "daemon controller realization plan tests", + "blocking_condition": "proto cleanup lands before shared controller path", + }, + { + "risk": "Target-state behavior regresses while compatibility code is deleted.", + "admission_field": "scenario acceptance coverage", + "envelope_field": "runtime_attachment release_policy", + "report_field": "model_runtime.runtime_attachment_target_kind", + "guardrail_test": "serving integration/runtime publication scenarios", + "blocking_condition": "compatibility code deletion lacks runtime scenario coverage", + }, +) + + +def test_risk_closure_matrix_has_unique_risks_and_enforcement_fields() -> None: + required_fields = ( + "admission_field", + "envelope_field", + "report_field", + "guardrail_test", + "blocking_condition", + ) + matrix_by_risk = {entry["risk"]: entry for entry in _RISK_CLOSURE_MATRIX} + + assert len(matrix_by_risk) == len(_RISK_CLOSURE_MATRIX) + for risk, entry in matrix_by_risk.items(): + for field in required_fields: + assert entry[field], f"{risk} missing {field}" + + def test_sdk_realization_paths_do_not_import_selection_builder() -> None: checked = ( Path("tensorcast/api/store/__init__.py"), @@ -2473,6 +2645,7 @@ def test_sdk_realization_paths_do_not_import_selection_builder() -> None: Path("tensorcast/api/store/artifact.py"), Path("tensorcast/api/store/materialization.py"), Path("tensorcast/api/store/inplace_slot.py"), + Path("tensorcast/api/store/runtime_realization_reference_consumer.py"), Path("tensorcast/api/plan/plan.py"), ) offenders: list[str] = [] @@ -2583,7 +2756,9 @@ def test_client_binding_rollbacks_log_cleanup_failures_instead_of_suppressing() assert "logger.exception" in helper_source -def test_realization_lifecycle_code_does_not_silently_suppress_broad_exceptions() -> None: +def test_realization_lifecycle_code_does_not_silently_suppress_broad_exceptions() -> ( + None +): guarded_paths = ( Path("tensorcast/api/_register.py"), Path("tensorcast/api/store/__init__.py"), @@ -2601,10 +2776,10 @@ def test_realization_lifecycle_code_does_not_silently_suppress_broad_exceptions( Path("tensorcast/global_store/rpc/replica_registration_rpc_handler.py"), Path("tensorcast/global_store/rpc/transport_rpc_handler.py"), Path("tensorcast/global_store/services/instance_service.py"), - Path("tensorcast/serving/retained_binding.py"), - Path("tensorcast/serving/_runtime_impl/lifecycle.py"), - Path("tensorcast/serving/local_ready.py"), - Path("tensorcast/serving/recipe_build.py"), + Path("tensorcast/artifact_runtime/binding/retained.py"), + Path("tensorcast/artifact_runtime/lifecycle.py"), + Path("tensorcast/artifact_runtime/recipe/local_ready.py"), + Path("tensorcast/artifact_runtime/recipe/build.py"), ) offenders = [ str(path) @@ -2674,7 +2849,9 @@ def test_mounted_source_config_no_longer_exposes_absolute_fallback_mode() -> Non assert offenders == [] -def test_daemon_canonical_index_loading_uses_explicit_authority_not_disk_fallback() -> None: +def test_daemon_canonical_index_loading_uses_explicit_authority_not_disk_fallback() -> ( + None +): guarded_paths = ( Path("daemon/service/controllers/materialization_index_source_utils.h"), Path("daemon/service/controllers/materialization_index_source_utils.cc"), diff --git a/tests/python/api/test_register_stable_dram_streaming.py b/tests/python/api/test_register_stable_dram_streaming.py index c310b977..a9ec88f8 100644 --- a/tests/python/api/test_register_stable_dram_streaming.py +++ b/tests/python/api/test_register_stable_dram_streaming.py @@ -8,8 +8,8 @@ from dataclasses import dataclass from typing import Any -import torch import pytest +import torch from tensorcast.api._config import PlanType, RegisterArtifactOptions from tensorcast.api._errors import TensorCastError diff --git a/tests/python/api/test_retrieval_options.py b/tests/python/api/test_retrieval_options.py index f7024f73..e8b8ee48 100644 --- a/tests/python/api/test_retrieval_options.py +++ b/tests/python/api/test_retrieval_options.py @@ -50,6 +50,27 @@ def test_get_options_parse_topology() -> None: ) +def test_execution_topology_keeps_unspecified_collective_policy() -> None: + opts = GetArtifactOptions( + execution_topology=ExecutionTopologyContext( + collective_group=CollectiveLoadGroup( + group_id="group-a", + world_size=4, + rank=2, + ) + ) + ) + + assert opts.execution_topology is not None + assert opts.execution_topology.collective_group is not None + assert opts.execution_topology.collective_policy is None + + +def test_collective_policy_parse_rejects_unspecified_value() -> None: + with pytest.raises(ValueError, match="must be explicit"): + CollectivePolicyMode.parse(None) + + def test_store_options_accept_execution_scoped_defaults() -> None: opts = StoreOptions(get=GetArtifactOptions(source=RetrievalPreset.DISK_ONLY)) assert opts.get is not None diff --git a/tests/python/api/test_serving_binding_reference_consumer.py b/tests/python/api/test_runtime_realization_reference_consumer.py similarity index 88% rename from tests/python/api/test_serving_binding_reference_consumer.py rename to tests/python/api/test_runtime_realization_reference_consumer.py index 91ccc645..4ea0c01b 100644 --- a/tests/python/api/test_serving_binding_reference_consumer.py +++ b/tests/python/api/test_runtime_realization_reference_consumer.py @@ -2,13 +2,16 @@ from __future__ import annotations +import importlib.util + from google.protobuf.any_pb2 import Any +import tensorcast.api.store as store_api from tensorcast.api.context import GroupRealization -from tensorcast.api.store.serving_binding_reference_consumer import ( +from tensorcast.api.store.runtime_realization_reference_consumer import ( REFERENCE_RUNTIME, - ReferenceServingAcquireResult, - ReferenceServingTensorSpec, + ReferenceRuntimeAcquireResult, + ReferenceRuntimeTensorSpec, acquire_reference_binding, build_reference_resolved_spec, prefetch_reference_binding, @@ -23,12 +26,28 @@ BindingValueRef, BindingValueVerificationState, GroupRealizationAcquireRef, - PrefetchedServingBinding, + PrefetchHandoff, ) +def test_legacy_reference_consumer_surface_is_removed() -> None: + spec = importlib.util.find_spec( + "tensorcast.api.store.serving_binding_reference_consumer" + ) + assert spec is None + for removed_name in ( + "ReferenceServingAcquireResult", + "ReferenceServingResolvedSpec", + "ReferenceServingTensorSpec", + "unpack_prefetched_serving_binding", + "unpack_prefetched_serving_binding_set", + ): + assert removed_name not in store_api.__all__ + assert not hasattr(store_api, removed_name) + + class _FakeDaemonClient: - def __init__(self, prefetched: PrefetchedServingBinding) -> None: + def __init__(self, prefetched: PrefetchHandoff) -> None: self.prefetched = prefetched self.prefetch_calls: list[dict[str, object]] = [] self.acquire_calls: list[dict[str, object]] = [] @@ -68,7 +87,7 @@ def release_placement_lease(self, **kwargs: object): return store_daemon_pb2.ReleasePlacementLeaseResponse() -def _prefetched() -> PrefetchedServingBinding: +def _prefetched() -> PrefetchHandoff: member = build_reference_resolved_spec( source_artifact_id="mi2:source", artifact_selection_digest="selection", @@ -90,7 +109,7 @@ def _prefetched() -> PrefetchedServingBinding: reservation_bytes=4, scope_digest="scope", ) - return PrefetchedServingBinding( + return PrefetchHandoff( local_serving_ref="binding-local:binding-1:value-1", binding_value_ref=ref, daemon_id="daemon-1", @@ -99,12 +118,12 @@ def _prefetched() -> PrefetchedServingBinding: member=member, reservation_bytes=4, reservation_capability=capability, - readiness="serving_local_ready", + readiness="runtime_local_ready", verification_state=BindingValueVerificationState.LOCAL_ONLY, ) -def _staged_prefetched() -> PrefetchedServingBinding: +def _staged_prefetched() -> PrefetchHandoff: base = _prefetched() ref = BindingValueRef( binding_id=base.binding_value_ref.binding_id, @@ -139,7 +158,7 @@ def test_reference_consumer_writes_cache_and_rebuilds_target(tmp_path) -> None: source_artifact_id="mi2:source", artifact_selection_digest="selection", device_uuid="gpu-0", - tensor=ReferenceServingTensorSpec(name="weight", size_bytes=16, shape=(4,)), + tensor=ReferenceRuntimeTensorSpec(name="weight", size_bytes=16, shape=(4,)), ) record = write_reference_resolved_spec_cache_entry( @@ -182,7 +201,7 @@ def test_reference_consumer_prefetch_acquire_and_release_lifecycle(tmp_path) -> ) release_reference_acquire(fake_client, acquire_result=acquired) - assert isinstance(acquired, ReferenceServingAcquireResult) + assert isinstance(acquired, ReferenceRuntimeAcquireResult) assert acquired.has_cuda_ipc_handle is True assert acquired.lease_token == b"lease-token" assert fake_client.released_tokens == [b"lease-token"] diff --git a/tests/python/api/test_serving_binding_spec_cache.py b/tests/python/api/test_runtime_realization_spec_cache.py similarity index 88% rename from tests/python/api/test_serving_binding_spec_cache.py rename to tests/python/api/test_runtime_realization_spec_cache.py index cd57508d..606151d0 100644 --- a/tests/python/api/test_serving_binding_spec_cache.py +++ b/tests/python/api/test_runtime_realization_spec_cache.py @@ -3,44 +3,56 @@ from __future__ import annotations import hashlib +import importlib.util import json import threading import pytest -from tensorcast.api.store.serving_binding_spec_cache import ( - ServingBindingSpecCacheGroupIndex, +from tensorcast.api.store.runtime_realization_spec_cache import ( + RuntimeRealizationSpecCacheGroupIndex, read_matching_resolved_spec_cache_entry, read_resolved_spec_cache_entry, read_resolved_spec_cache_group_index, - serving_binding_spec_cache_root, + runtime_realization_spec_cache_root, write_resolved_spec_cache_entry, write_resolved_spec_cache_group_index, ) from tensorcast.types import ( BlobRef, - ServingBindingMemberRef, - ServingBindingResolvedSpecCacheEntry, - ServingBindingSourceRef, - ServingBindingSourceReuseDecision, - ServingTopologyRef, + RuntimeBindingMemberRef, + RuntimeBindingSourceRef, + RuntimeBindingSourceReuseDecision, + RuntimeRealizationSpecCacheEntry, + RuntimeTopologyRef, ) +def _find_spec_or_none(module_name: str): + try: + return importlib.util.find_spec(module_name) + except ModuleNotFoundError: + return None + + +def test_old_serving_binding_spec_cache_module_is_removed() -> None: + assert _find_spec_or_none("tensorcast.api.store.serving_binding_spec_cache") is None + + def _entry( *, blob: bytes = b"layout-bytes", member_index: int = 0, member_count: int = 1, -) -> ServingBindingResolvedSpecCacheEntry: - topology = ServingTopologyRef(schema_topology_digest="topology-schema") - member = ServingBindingMemberRef( +) -> RuntimeRealizationSpecCacheEntry: + topology = RuntimeTopologyRef(schema_topology_digest="topology-schema") + member = RuntimeBindingMemberRef( member_id=f"member-{member_index}", member_index=member_index, member_count=member_count, group_id="group-1", ) - source = ServingBindingSourceRef( + source = RuntimeBindingSourceRef( source_kind="checkpoint_artifact", artifact_selection_digest="selection-digest", source_artifact_ref="mi2:checkpoint", @@ -51,21 +63,21 @@ def _entry( sha256=hashlib.sha256(blob).hexdigest(), size_bytes=len(blob), ) - draft = ServingBindingResolvedSpecCacheEntry( + draft = RuntimeRealizationSpecCacheEntry( schema_version=1, cache_key_digest="placeholder", spec_digest="placeholder", runtime="vllm", source=source, - source_reuse=ServingBindingSourceReuseDecision( - mode="checkpoint_to_serving", + source_reuse=RuntimeBindingSourceReuseDecision( + mode="checkpoint_to_runtime", representation_contract_hash="repr-contract", ), topology=topology, member=member, source_schema_hash="source-schema", model_config_digest="model-config", - serving_build_digest="serving-build", + runtime_build_digest="serving-build", binding_layout_id="layout-1", target_layout_hash="target-layout-hash", tensor_schema_hash="tensor-schema", @@ -80,14 +92,14 @@ def _entry( def _with_recomputed_spec_digest( - entry: ServingBindingResolvedSpecCacheEntry, -) -> ServingBindingResolvedSpecCacheEntry: + entry: RuntimeRealizationSpecCacheEntry, +) -> RuntimeRealizationSpecCacheEntry: return entry.model_copy(update={"spec_digest": entry.computed_spec_digest()}) def _with_recomputed_digests( - entry: ServingBindingResolvedSpecCacheEntry, -) -> ServingBindingResolvedSpecCacheEntry: + entry: RuntimeRealizationSpecCacheEntry, +) -> RuntimeRealizationSpecCacheEntry: with_cache_key = entry.model_copy( update={"cache_key_digest": entry.computed_cache_key_digest()} ) @@ -121,7 +133,7 @@ def test_first_cold_start_cache_write_publishes_readable_entry(tmp_path) -> None record = read_resolved_spec_cache_entry(tmp_path, entry.cache_key_digest) spec_dir = ( - serving_binding_spec_cache_root(tmp_path) + runtime_realization_spec_cache_root(tmp_path) / "specs" / "sha256" / entry.spec_digest @@ -229,7 +241,7 @@ def test_resolved_spec_cache_rejects_unsupported_key_schema_version(tmp_path) -> blobs={"target_layout": blob}, ) key_path = ( - serving_binding_spec_cache_root(tmp_path) + runtime_realization_spec_cache_root(tmp_path) / "keys" / "sha256" / f"{entry.cache_key_digest}.json" @@ -253,7 +265,7 @@ def test_resolved_spec_cache_rejects_unsupported_manifest_schema_version( blobs={"target_layout": blob}, ) manifest_path = ( - serving_binding_spec_cache_root(tmp_path) + runtime_realization_spec_cache_root(tmp_path) / "specs" / "sha256" / entry.spec_digest @@ -278,7 +290,7 @@ def test_resolved_spec_cache_rejects_unsupported_manifest_producer_version( blobs={"target_layout": blob}, ) manifest_path = ( - serving_binding_spec_cache_root(tmp_path) + runtime_realization_spec_cache_root(tmp_path) / "specs" / "sha256" / entry.spec_digest @@ -318,7 +330,7 @@ def test_resolved_spec_cache_cleans_tmp_after_publish(tmp_path) -> None: blobs={"target_layout": blob}, ) - tmp_dir = serving_binding_spec_cache_root(tmp_path) / "tmp" + tmp_dir = runtime_realization_spec_cache_root(tmp_path) / "tmp" assert tmp_dir.exists() assert list(tmp_dir.iterdir()) == [] @@ -384,7 +396,7 @@ def test_resolved_spec_cache_group_index_roundtrip(tmp_path) -> None: entry=entry_1, blobs={"target_layout": blob_1}, ) - draft = ServingBindingSpecCacheGroupIndex( + draft = RuntimeRealizationSpecCacheGroupIndex( group_cache_key_digest="placeholder", runtime="vllm", topology=entry_0.topology, @@ -414,7 +426,7 @@ def test_resolved_spec_cache_group_index_rejects_empty_runtime(tmp_path) -> None entry=entry, blobs={"target_layout": b"layout-bytes"}, ) - draft = ServingBindingSpecCacheGroupIndex( + draft = RuntimeRealizationSpecCacheGroupIndex( group_cache_key_digest="placeholder", runtime="", topology=entry.topology, @@ -437,7 +449,7 @@ def test_resolved_spec_cache_group_index_rejects_member_mismatch(tmp_path) -> No entry=entry, blobs={"target_layout": blob}, ) - draft = ServingBindingSpecCacheGroupIndex( + draft = RuntimeRealizationSpecCacheGroupIndex( group_cache_key_digest="placeholder", runtime="vllm", topology=entry.topology, @@ -460,7 +472,7 @@ def test_resolved_spec_cache_group_lookup_validates_member_cache(tmp_path) -> No entry=entry, blobs={"target_layout": blob}, ) - draft = ServingBindingSpecCacheGroupIndex( + draft = RuntimeRealizationSpecCacheGroupIndex( group_cache_key_digest="placeholder", runtime="vllm", topology=entry.topology, @@ -472,7 +484,7 @@ def test_resolved_spec_cache_group_lookup_validates_member_cache(tmp_path) -> No ) write_resolved_spec_cache_group_index(tmp_path, index=index) key_path = ( - serving_binding_spec_cache_root(tmp_path) + runtime_realization_spec_cache_root(tmp_path) / "keys" / "sha256" / f"{entry.cache_key_digest}.json" diff --git a/tests/python/api/test_prefetch_serving_binding_target.py b/tests/python/api/test_runtime_realization_target.py similarity index 72% rename from tests/python/api/test_prefetch_serving_binding_target.py rename to tests/python/api/test_runtime_realization_target.py index e685a9d9..5020bc2d 100644 --- a/tests/python/api/test_prefetch_serving_binding_target.py +++ b/tests/python/api/test_runtime_realization_target.py @@ -12,31 +12,31 @@ BindingValueRef, BindingValueVerificationState, GroupRealizationAcquireRef, - PrefetchedServingBinding, - PrefetchedServingBindingMemberFailure, - PrefetchedServingBindingSet, + PrefetchHandoff, + PrefetchHandoffMemberFailure, + PrefetchHandoffSet, PrefetchRetentionPolicy, - ServingBindingMemberRef, - ServingBindingResolvedLayout, - ServingBindingSetTarget, - ServingBindingSourceMemberRef, - ServingBindingSourceRef, - ServingBindingSourceReuseDecision, - ServingBindingTarget, - ServingTopologyRef, - plan_serving_binding_source_reuse, + RealizationTarget, + RealizationTargetSet, + RuntimeBindingMemberRef, + RuntimeBindingResolvedLayout, + RuntimeBindingSourceMemberRef, + RuntimeBindingSourceRef, + RuntimeBindingSourceReuseDecision, + RuntimeTopologyRef, + plan_runtime_binding_source_reuse, ) -def _topology() -> ServingTopologyRef: - return ServingTopologyRef( +def _topology() -> RuntimeTopologyRef: + return RuntimeTopologyRef( schema_topology_digest="topology-schema", admission_topology_digest="topology-admission", ) -def _member() -> ServingBindingMemberRef: - return ServingBindingMemberRef( +def _member() -> RuntimeBindingMemberRef: + return RuntimeBindingMemberRef( member_id="member-0", member_index=0, member_count=1, @@ -44,8 +44,8 @@ def _member() -> ServingBindingMemberRef: ) -def _checkpoint_source() -> ServingBindingSourceRef: - return ServingBindingSourceRef( +def _checkpoint_source() -> RuntimeBindingSourceRef: + return RuntimeBindingSourceRef( source_kind="checkpoint_artifact", artifact_selection_digest="selection-digest", source_artifact_ref="mi2:checkpoint", @@ -55,17 +55,17 @@ def _checkpoint_source() -> ServingBindingSourceRef: def _resolved_layout( *, - source: ServingBindingSourceRef | None = None, - source_reuse: ServingBindingSourceReuseDecision | None = None, - topology: ServingTopologyRef | None = None, - member: ServingBindingMemberRef | None = None, -) -> ServingBindingResolvedLayout: - return ServingBindingResolvedLayout( + source: RuntimeBindingSourceRef | None = None, + source_reuse: RuntimeBindingSourceReuseDecision | None = None, + topology: RuntimeTopologyRef | None = None, + member: RuntimeBindingMemberRef | None = None, +) -> RuntimeBindingResolvedLayout: + return RuntimeBindingResolvedLayout( binding_layout_id="layout-1", source=source or _checkpoint_source(), source_reuse=source_reuse - or ServingBindingSourceReuseDecision( - mode="checkpoint_to_serving", + or RuntimeBindingSourceReuseDecision( + mode="checkpoint_to_runtime", representation_contract_hash="repr-contract", ), topology=topology or _topology(), @@ -83,16 +83,16 @@ def _resolved_layout( def _target( *, - source: ServingBindingSourceRef | None = None, - source_reuse: ServingBindingSourceReuseDecision | None = None, - topology: ServingTopologyRef | None = None, - member: ServingBindingMemberRef | None = None, + source: RuntimeBindingSourceRef | None = None, + source_reuse: RuntimeBindingSourceReuseDecision | None = None, + topology: RuntimeTopologyRef | None = None, + member: RuntimeBindingMemberRef | None = None, device: str = "cuda:0", device_uuid: str = "GPU-0", target_layout: bytes = b"target-layout", target_index_bytes: bytes = b"target-index", target_layout_hash: str = "target-layout-hash", -) -> ServingBindingTarget: +) -> RealizationTarget: resolved_source = source or _checkpoint_source() resolved_topology = topology or _topology() resolved_member = member or _member() @@ -108,7 +108,7 @@ def _target( "target_layout_hash": target_layout_hash, } ) - return ServingBindingTarget( + return RealizationTarget( runtime="vllm", device=device, device_uuid=device_uuid, @@ -116,7 +116,7 @@ def _target( topology=resolved_topology, member=resolved_member, model_config_digest="model-config", - serving_build_digest="serving-build", + runtime_build_digest="serving-build", resolved_layout=resolved_layout, ) @@ -131,19 +131,19 @@ def test_prefetch_signature_keeps_device_and_adds_target() -> None: assert "retention" in params -def test_serving_binding_target_proto_roundtrip_includes_source() -> None: +def test_runtime_target_proto_roundtrip_includes_source() -> None: target = _target() - roundtripped = ServingBindingTarget.from_proto(target.to_proto()) + roundtripped = RealizationTarget.from_proto(target.to_proto()) assert roundtripped == target assert roundtripped.source.source_kind == "checkpoint_artifact" - assert roundtripped.resolved_layout.source_reuse.mode == "checkpoint_to_serving" + assert roundtripped.resolved_layout.source_reuse.mode == "checkpoint_to_runtime" -def test_serving_binding_set_requires_shared_source() -> None: +def test_runtime_target_set_requires_shared_source() -> None: target = _target() - serving_set = ServingBindingSetTarget( + runtime_set = RealizationTargetSet( runtime="vllm", source=target.source, topology=target.topology, @@ -151,19 +151,19 @@ def test_serving_binding_set_requires_shared_source() -> None: members=(target,), ) - assert ServingBindingSetTarget.from_proto(serving_set.to_proto()) == serving_set + assert RealizationTargetSet.from_proto(runtime_set.to_proto()) == runtime_set -def test_serving_binding_set_allows_distinct_member_device_and_layout_specs() -> None: +def test_runtime_target_set_allows_distinct_member_device_and_layout_specs() -> None: topology = _topology() source = _checkpoint_source() - member_0 = ServingBindingMemberRef( + member_0 = RuntimeBindingMemberRef( member_id="member-0", member_index=0, member_count=2, group_id="group-1", ) - member_1 = ServingBindingMemberRef( + member_1 = RuntimeBindingMemberRef( member_id="member-1", member_index=1, member_count=2, @@ -190,16 +190,16 @@ def test_serving_binding_set_allows_distinct_member_device_and_layout_specs() -> target_layout_hash="target-layout-hash-member-1", ) - serving_set = ServingBindingSetTarget( + runtime_set = RealizationTargetSet( runtime="vllm", source=source, topology=topology, group_id="group-1", members=(target_0, target_1), ) - roundtripped = ServingBindingSetTarget.from_proto(serving_set.to_proto()) + roundtripped = RealizationTargetSet.from_proto(runtime_set.to_proto()) - assert roundtripped == serving_set + assert roundtripped == runtime_set assert {member.device_uuid for member in roundtripped.members} == { "GPU-0", "GPU-1", @@ -210,21 +210,21 @@ def test_serving_binding_set_allows_distinct_member_device_and_layout_specs() -> assert all(member.topology == topology for member in roundtripped.members) -def test_direct_serving_member_copy_requires_matching_member_schema_and_layout() -> ( +def test_direct_runtime_member_copy_requires_matching_member_schema_and_layout() -> ( None ): topology = _topology() member = _member() - source = ServingBindingSourceRef( - source_kind="serving_artifact_set", + source = RuntimeBindingSourceRef( + source_kind="runtime_artifact_set", artifact_selection_digest="selection-digest", source_schema_hash="source-schema", representation_contract_hash="repr-contract", - serving_build_digest="serving-build", + runtime_build_digest="serving-build", tensor_schema_hash="tensor-schema", topology=topology, members=( - ServingBindingSourceMemberRef( + RuntimeBindingSourceMemberRef( member=member, artifact_ref="mi2:serving-member", tensor_schema_hash="tensor-schema", @@ -232,29 +232,29 @@ def test_direct_serving_member_copy_requires_matching_member_schema_and_layout() ), ), ) - reuse = ServingBindingSourceReuseDecision( - mode="serving_direct_member_copy", + reuse = RuntimeBindingSourceReuseDecision( + mode="runtime_direct_member_copy", representation_contract_hash="repr-contract", ) target = _target(source=source, source_reuse=reuse) - assert target.resolved_layout.source_reuse.mode == "serving_direct_member_copy" + assert target.resolved_layout.source_reuse.mode == "runtime_direct_member_copy" -def test_direct_serving_member_copy_planner_admits_only_matching_source() -> None: +def test_direct_runtime_member_copy_planner_admits_only_matching_source() -> None: topology = _topology() member = _member() - source = ServingBindingSourceRef( - source_kind="serving_artifact_set", + source = RuntimeBindingSourceRef( + source_kind="runtime_artifact_set", artifact_selection_digest="selection-digest", source_schema_hash="source-schema", representation_contract_hash="repr-contract", - serving_build_digest="serving-build", + runtime_build_digest="serving-build", tensor_schema_hash="tensor-schema", topology=topology, members=( - ServingBindingSourceMemberRef( + RuntimeBindingSourceMemberRef( member=member, artifact_ref="mi2:serving-member", tensor_schema_hash="tensor-schema", @@ -263,7 +263,7 @@ def test_direct_serving_member_copy_planner_admits_only_matching_source() -> Non ), ) - decision = plan_serving_binding_source_reuse( + decision = plan_runtime_binding_source_reuse( source=source, topology=topology, member=member, @@ -272,22 +272,22 @@ def test_direct_serving_member_copy_planner_admits_only_matching_source() -> Non representation_contract_hash="repr-contract", ) - assert decision.mode == "serving_direct_member_copy" + assert decision.mode == "runtime_direct_member_copy" -def test_direct_serving_member_copy_planner_returns_transform_for_topology_mismatch() -> ( +def test_direct_runtime_member_copy_planner_returns_transform_for_topology_mismatch() -> ( None ): - source = ServingBindingSourceRef( - source_kind="serving_artifact_set", + source = RuntimeBindingSourceRef( + source_kind="runtime_artifact_set", artifact_selection_digest="selection-digest", source_schema_hash="source-schema", representation_contract_hash="repr-contract", - serving_build_digest="serving-build", + runtime_build_digest="serving-build", tensor_schema_hash="tensor-schema", - topology=ServingTopologyRef(schema_topology_digest="different-topology"), + topology=RuntimeTopologyRef(schema_topology_digest="different-topology"), members=( - ServingBindingSourceMemberRef( + RuntimeBindingSourceMemberRef( member=_member(), artifact_ref="mi2:serving-member", tensor_schema_hash="tensor-schema", @@ -295,7 +295,7 @@ def test_direct_serving_member_copy_planner_returns_transform_for_topology_misma ), ), ) - decision = plan_serving_binding_source_reuse( + decision = plan_runtime_binding_source_reuse( source=source, topology=_topology(), member=_member(), @@ -304,20 +304,20 @@ def test_direct_serving_member_copy_planner_returns_transform_for_topology_misma representation_contract_hash="repr-contract", ) - assert decision.mode == "serving_transform_required" + assert decision.mode == "runtime_transform_required" assert "topology" in (decision.reason or "") def test_transform_required_decision_is_serializable_but_not_direct_copy() -> None: - decision = ServingBindingSourceReuseDecision( - mode="serving_transform_required", + decision = RuntimeBindingSourceReuseDecision( + mode="runtime_transform_required", work_plan_hash="work-plan", ) - assert ServingBindingSourceReuseDecision.from_proto(decision.to_proto()) == decision + assert RuntimeBindingSourceReuseDecision.from_proto(decision.to_proto()) == decision -def test_prefetched_serving_binding_result_proto_roundtrip() -> None: +def test_prefetch_handoff_result_proto_roundtrip() -> None: member = _member() binding_ref = BindingValueRef( binding_id="binding-1", @@ -336,7 +336,7 @@ def test_prefetched_serving_binding_result_proto_roundtrip() -> None: scope_digest="scope", expires_at_ms=1234, ) - result = PrefetchedServingBinding( + result = PrefetchHandoff( local_serving_ref="binding-local:binding-1:value-1", binding_value_ref=binding_ref, daemon_id="daemon-1", @@ -345,16 +345,16 @@ def test_prefetched_serving_binding_result_proto_roundtrip() -> None: member=member, reservation_bytes=1024, reservation_capability=capability, - readiness="serving_local_ready", + readiness="runtime_local_ready", verification_state=BindingValueVerificationState.LOCAL_ONLY, serving_artifact_id=None, expires_at_ms=1234, ) - assert PrefetchedServingBinding.from_proto(result.to_proto()) == result + assert PrefetchHandoff.from_proto(result.to_proto()) == result -def test_prefetched_serving_binding_staged_result_proto_roundtrip() -> None: +def test_prefetch_handoff_staged_result_proto_roundtrip() -> None: member = _member() binding_ref = BindingValueRef( binding_id="binding-1", @@ -372,7 +372,7 @@ def test_prefetched_serving_binding_staged_result_proto_roundtrip() -> None: reservation_bytes=1024, scope_digest="scope", ) - result = PrefetchedServingBinding( + result = PrefetchHandoff( local_serving_ref="binding-local:binding-1:staged-value-1", binding_value_ref=binding_ref, daemon_id="daemon-1", @@ -381,7 +381,7 @@ def test_prefetched_serving_binding_staged_result_proto_roundtrip() -> None: member=member, reservation_bytes=1024, reservation_capability=capability, - readiness="serving_local_ready", + readiness="runtime_local_ready", verification_state=BindingValueVerificationState.LOCAL_ONLY, staged_value=True, group_realization_acquire=GroupRealizationAcquireRef( @@ -394,10 +394,10 @@ def test_prefetched_serving_binding_staged_result_proto_roundtrip() -> None: ), ) - assert PrefetchedServingBinding.from_proto(result.to_proto()) == result + assert PrefetchHandoff.from_proto(result.to_proto()) == result -def test_prefetched_serving_binding_set_partial_diagnostics_roundtrip() -> None: +def test_prefetch_handoff_set_partial_diagnostics_roundtrip() -> None: member = _member() binding_ref = BindingValueRef( binding_id="binding-1", @@ -415,7 +415,7 @@ def test_prefetched_serving_binding_set_partial_diagnostics_roundtrip() -> None: reservation_bytes=1024, scope_digest="scope", ) - success = PrefetchedServingBinding( + success = PrefetchHandoff( local_serving_ref="binding-local:binding-1:value-1", binding_value_ref=binding_ref, daemon_id="daemon-1", @@ -424,16 +424,16 @@ def test_prefetched_serving_binding_set_partial_diagnostics_roundtrip() -> None: member=member, reservation_bytes=1024, reservation_capability=capability, - readiness="serving_local_ready", + readiness="runtime_local_ready", verification_state=BindingValueVerificationState.LOCAL_ONLY, ) - failed_member = ServingBindingMemberRef( + failed_member = RuntimeBindingMemberRef( member_id="member-1", member_index=1, member_count=2, group_id="group-1", ) - failure = PrefetchedServingBindingMemberFailure( + failure = PrefetchHandoffMemberFailure( member=failed_member, code="FAILED_PRECONDITION", message="resolved spec mismatch", @@ -441,20 +441,20 @@ def test_prefetched_serving_binding_set_partial_diagnostics_roundtrip() -> None: cache_key_digest="cache-key", spec_digest="spec", ) - result = PrefetchedServingBindingSet( + result = PrefetchHandoffSet( runtime="vllm", topology=_topology(), group_id="group-1", members=(success,), - readiness="serving_local_ready", + readiness="runtime_local_ready", member_failures=(failure,), partial=True, ) - assert PrefetchedServingBindingSet.from_proto(result.to_proto()) == result + assert PrefetchHandoffSet.from_proto(result.to_proto()) == result -def test_prefetched_serving_binding_set_rejects_overlap_between_success_and_failure() -> ( +def test_prefetch_handoff_set_rejects_overlap_between_success_and_failure() -> ( None ): member = _member() @@ -474,7 +474,7 @@ def test_prefetched_serving_binding_set_rejects_overlap_between_success_and_fail reservation_bytes=1024, scope_digest="scope", ) - success = PrefetchedServingBinding( + success = PrefetchHandoff( binding_value_ref=binding_ref, daemon_id="daemon-1", daemon_session_id="session-1", @@ -482,22 +482,22 @@ def test_prefetched_serving_binding_set_rejects_overlap_between_success_and_fail member=member, reservation_bytes=1024, reservation_capability=capability, - readiness="serving_local_ready", + readiness="runtime_local_ready", verification_state=BindingValueVerificationState.LOCAL_ONLY, ) - failure = PrefetchedServingBindingMemberFailure( + failure = PrefetchHandoffMemberFailure( member=member, code="FAILED_PRECONDITION", message="same member failed", ) with pytest.raises(ValueError, match="both success and failure"): - PrefetchedServingBindingSet( + PrefetchHandoffSet( runtime="vllm", topology=_topology(), group_id="group-1", members=(success,), - readiness="serving_local_ready", + readiness="runtime_local_ready", member_failures=(failure,), partial=True, ) diff --git a/tests/python/test_serving_artifact_manifest.py b/tests/python/artifact_runtime/artifact/test_manifest.py similarity index 58% rename from tests/python/test_serving_artifact_manifest.py rename to tests/python/artifact_runtime/artifact/test_manifest.py index 06145dce..3004a362 100644 --- a/tests/python/test_serving_artifact_manifest.py +++ b/tests/python/artifact_runtime/artifact/test_manifest.py @@ -6,14 +6,14 @@ import torch import tensorcast as tc -from tensorcast.serving.artifact_manifest import ( +from tensorcast.artifact_runtime.artifact.manifest import ( SERVING_MANIFEST_TENSOR_NAME, - cross_check_serving_artifact_manifest, - read_serving_artifact_manifest_tensor, + cross_check_runtime_artifact_manifest, + read_runtime_artifact_manifest_tensor, ) -def _manifest(**overrides) -> tc.ServingArtifactManifest: +def _manifest(**overrides) -> tc.RuntimeArtifactManifest: values = { "framework_name": "vllm", "adapter_version": "adapter-v1", @@ -26,7 +26,22 @@ def _manifest(**overrides) -> tc.ServingArtifactManifest: "build_pipeline_version": "pipeline-v1", } values.update(overrides) - return tc.ServingArtifactManifest(**values) + return tc.RuntimeArtifactManifest(**values) + + +class _ManifestTensorResult: + def __init__( + self, + tensors: dict[str, torch.Tensor], + releases: list[str], + marker: object, + ) -> None: + self.tensors = tensors + self._releases = releases + self._marker = marker + + def release(self) -> None: + self._releases.append(str(self._marker)) def test_serving_artifact_manifest_builds_runtime_policy() -> None: @@ -48,14 +63,14 @@ def test_serving_artifact_manifest_policy_round_trips_topology_digest() -> None: ).to_runtime_policy() assert policy.expected_topology_admission_digest == "topology-digest" - assert tc.ServingRuntimePolicy.from_proto(policy.to_proto()) == policy + assert tc.RuntimeArtifactPolicy.from_proto(policy.to_proto()) == policy -def test_cross_check_serving_artifact_manifest_accepts_matching_contract() -> None: +def test_cross_check_runtime_artifact_manifest_accepts_matching_contract() -> None: manifest = _manifest() assert ( - cross_check_serving_artifact_manifest( + cross_check_runtime_artifact_manifest( manifest=manifest, descriptor_tensor_schema_hash="schema-hash", tensor_names=("w",), @@ -65,9 +80,9 @@ def test_cross_check_serving_artifact_manifest_accepts_matching_contract() -> No ) -def test_cross_check_serving_artifact_manifest_rejects_mismatch() -> None: +def test_cross_check_runtime_artifact_manifest_rejects_mismatch() -> None: with pytest.raises(RuntimeError, match="tensor schema hash mismatch"): - cross_check_serving_artifact_manifest( + cross_check_runtime_artifact_manifest( manifest=_manifest(tensor_schema_hash="other"), descriptor_tensor_schema_hash="schema-hash", tensor_names=("w",), @@ -75,62 +90,80 @@ def test_cross_check_serving_artifact_manifest_rejects_mismatch() -> None: ) -def test_read_serving_artifact_manifest_tensor_reads_uint8_payload() -> None: +def test_read_runtime_artifact_manifest_tensor_reads_uint8_payload() -> None: manifest = _manifest() class _Artifact: + def __init__(self) -> None: + self.releases: list[str] = [] + def subset(self, names): assert names == [SERVING_MANIFEST_TENSOR_NAME] return self - def tensor_dict(self, *, device: str): + def tensor_dict_with_diagnostics(self, *, device: str): assert device == "cpu" - return { - SERVING_MANIFEST_TENSOR_NAME: torch.tensor( - list(manifest.to_bytes()), dtype=torch.uint8 - ) - } - + return _ManifestTensorResult( + { + SERVING_MANIFEST_TENSOR_NAME: torch.tensor( + list(manifest.to_bytes()), dtype=torch.uint8 + ) + }, + self.releases, + device, + ) + + artifact = _Artifact() assert ( - read_serving_artifact_manifest_tensor( - _Artifact(), + read_runtime_artifact_manifest_tensor( + artifact, artifact_ref="mi2:serving", ) == manifest ) + assert artifact.releases == ["cpu"] -def test_read_serving_artifact_manifest_tensor_falls_back_to_cuda_payload( +def test_read_runtime_artifact_manifest_tensor_falls_back_to_cuda_payload( monkeypatch, ) -> None: manifest = _manifest() monkeypatch.setattr(torch.cuda, "current_device", lambda: 2) class _Artifact: + def __init__(self) -> None: + self.releases: list[str] = [] + def subset(self, names): assert names == [SERVING_MANIFEST_TENSOR_NAME] return self - def tensor_dict(self, *, device: str): + def tensor_dict_with_diagnostics(self, *, device: str): if device == "cpu": raise RuntimeError("CPU replica is not loaded") assert device == torch.device("cuda", 2) - return { - SERVING_MANIFEST_TENSOR_NAME: torch.tensor( - list(manifest.to_bytes()), dtype=torch.uint8 - ) - } - + return _ManifestTensorResult( + { + SERVING_MANIFEST_TENSOR_NAME: torch.tensor( + list(manifest.to_bytes()), dtype=torch.uint8 + ) + }, + self.releases, + device, + ) + + artifact = _Artifact() assert ( - read_serving_artifact_manifest_tensor( - _Artifact(), + read_runtime_artifact_manifest_tensor( + artifact, artifact_ref="mi2:serving", ) == manifest ) + assert artifact.releases == ["cuda:2"] -def test_read_serving_artifact_manifest_tensor_reports_cpu_and_cuda_errors( +def test_read_runtime_artifact_manifest_tensor_reports_cpu_and_cuda_errors( monkeypatch, ) -> None: monkeypatch.setattr(torch.cuda, "current_device", lambda: 3) @@ -140,14 +173,14 @@ def subset(self, names): assert names == [SERVING_MANIFEST_TENSOR_NAME] return self - def tensor_dict(self, *, device: str): + def tensor_dict_with_diagnostics(self, *, device: str): if device == "cpu": raise RuntimeError("CPU replica is not loaded") assert device == torch.device("cuda", 3) raise RuntimeError("CUDA replica is not loaded") with pytest.raises(RuntimeError) as exc_info: - read_serving_artifact_manifest_tensor( + read_runtime_artifact_manifest_tensor( _Artifact(), artifact_ref="mi2:serving", ) @@ -157,30 +190,61 @@ def tensor_dict(self, *, device: str): assert "CUDA replica is not loaded" in message -def test_cross_check_serving_artifact_manifest_enforces_runtime_policy() -> None: +def test_read_runtime_artifact_manifest_tensor_releases_invalid_payload() -> None: + class _Artifact: + def __init__(self) -> None: + self.releases: list[str] = [] + + def subset(self, names): + assert names == [SERVING_MANIFEST_TENSOR_NAME] + return self + + def tensor_dict_with_diagnostics(self, *, device: str): + assert device == "cpu" + return _ManifestTensorResult( + { + SERVING_MANIFEST_TENSOR_NAME: torch.tensor( + [1.0], + dtype=torch.float32, + ) + }, + self.releases, + device, + ) + + artifact = _Artifact() + with pytest.raises(RuntimeError, match="1D torch.uint8"): + read_runtime_artifact_manifest_tensor( + artifact, + artifact_ref="mi2:serving", + ) + assert artifact.releases == ["cpu"] + + +def test_cross_check_runtime_artifact_manifest_enforces_runtime_policy() -> None: manifest = _manifest(topology_admission_digest="topology-digest") policy = manifest.to_runtime_policy() assert ( - cross_check_serving_artifact_manifest( + cross_check_runtime_artifact_manifest( manifest=manifest, descriptor_tensor_schema_hash="schema-hash", tensor_names=("w",), expected_tensor_schema_hash="schema-hash", - serving_runtime_policy=policy, + runtime_artifact_policy=policy, ) == manifest ) with pytest.raises(RuntimeError, match="manifest ref mismatch"): - cross_check_serving_artifact_manifest( + cross_check_runtime_artifact_manifest( manifest=manifest.model_copy( update={"serving_manifest_ref": "tensor:other_manifest"} ), descriptor_tensor_schema_hash="schema-hash", tensor_names=("w",), expected_tensor_schema_hash="schema-hash", - serving_runtime_policy=policy, + runtime_artifact_policy=policy, ) @@ -209,7 +273,7 @@ def test_cross_check_serving_artifact_manifest_enforces_runtime_policy() -> None ), ], ) -def test_cross_check_serving_artifact_manifest_rejects_pinned_policy_mismatch( +def test_cross_check_runtime_artifact_manifest_rejects_pinned_policy_mismatch( manifest_update, policy_update, match, @@ -218,10 +282,10 @@ def test_cross_check_serving_artifact_manifest_rejects_pinned_policy_mismatch( policy = manifest.to_runtime_policy().model_copy(update=policy_update) with pytest.raises(RuntimeError, match=match): - cross_check_serving_artifact_manifest( + cross_check_runtime_artifact_manifest( manifest=manifest.model_copy(update=manifest_update), descriptor_tensor_schema_hash="schema-hash", tensor_names=("w",), expected_tensor_schema_hash="schema-hash", - serving_runtime_policy=policy, + runtime_artifact_policy=policy, ) diff --git a/tests/python/test_serving_resolver.py b/tests/python/artifact_runtime/artifact/test_resolver.py similarity index 76% rename from tests/python/test_serving_resolver.py rename to tests/python/artifact_runtime/artifact/test_resolver.py index ce023231..3c9be22f 100644 --- a/tests/python/test_serving_resolver.py +++ b/tests/python/artifact_runtime/artifact/test_resolver.py @@ -9,9 +9,9 @@ import torch import tensorcast as tc -import tensorcast.serving.resolver as resolver_mod -from tensorcast.serving.dto import PreparedServingArtifact -from tensorcast.serving.resolver import ServingArtifactResolver +import tensorcast.artifact_runtime.artifact.resolver as resolver_mod +from tensorcast.artifact_runtime.artifact.resolver import RuntimeArtifactResolver +from tensorcast.artifact_runtime.dto import PreparedRuntimeArtifact class _FakeArtifact: @@ -41,8 +41,8 @@ def _descriptor(*, include_manifest: bool = True) -> Any: ) -def _manifest(*, tensor_schema_hash: str) -> tc.ServingArtifactManifest: - return tc.ServingArtifactManifest( +def _manifest(*, tensor_schema_hash: str) -> tc.RuntimeArtifactManifest: + return tc.RuntimeArtifactManifest( framework_name="vllm", adapter_version="adapter-v1", serving_abi_version="abi-v1", @@ -56,21 +56,21 @@ def _manifest(*, tensor_schema_hash: str) -> tc.ServingArtifactManifest: ) -def test_resolve_serving_artifact_cross_checks_manifest(monkeypatch) -> None: +def test_resolve_runtime_artifact_cross_checks_manifest(monkeypatch) -> None: descriptor = _descriptor() artifact = _FakeArtifact(descriptor) - resolver = ServingArtifactResolver( + resolver = RuntimeArtifactResolver( manifest_tensor_name=tc.SERVING_MANIFEST_TENSOR_NAME, schema_version=int( - tc.ServingArtifactManifest.model_fields["schema_version"].default + tc.RuntimeArtifactManifest.model_fields["schema_version"].default ), + open_artifact_fn=lambda _ref: artifact, ) tensor_schema_hash = resolver.compute_descriptor_tensor_schema_hash(descriptor) manifest = _manifest(tensor_schema_hash=tensor_schema_hash) - monkeypatch.setattr(resolver_mod, "open_artifact", lambda ref: artifact) monkeypatch.setattr( resolver_mod.tc_artifact_manifest, - "read_serving_artifact_manifest_tensor", + "read_runtime_artifact_manifest_tensor", lambda *_args, **_kwargs: manifest, ) @@ -82,7 +82,7 @@ def test_resolve_serving_artifact_cross_checks_manifest(monkeypatch) -> None: resolver.cross_check( resolved, expected_tensor_schema_hash=tensor_schema_hash, - serving_runtime_policy=manifest.to_runtime_policy(), + runtime_artifact_policy=manifest.to_runtime_policy(), ) is resolved ) @@ -93,15 +93,15 @@ def test_resolve_serving_artifact_cross_checks_manifest(monkeypatch) -> None: ) -def test_resolve_serving_artifact_rejects_missing_manifest_tensor() -> None: - resolver = ServingArtifactResolver( +def test_resolve_runtime_artifact_rejects_missing_manifest_tensor() -> None: + resolver = RuntimeArtifactResolver( manifest_tensor_name=tc.SERVING_MANIFEST_TENSOR_NAME, schema_version=int( - tc.ServingArtifactManifest.model_fields["schema_version"].default + tc.RuntimeArtifactManifest.model_fields["schema_version"].default ), ) - with pytest.raises(RuntimeError, match="missing serving manifest tensor"): + with pytest.raises(RuntimeError, match="missing runtime manifest tensor"): resolver.read_manifest( _FakeArtifact(_descriptor(include_manifest=False)), artifact_ref="mi2:test:serving", @@ -109,19 +109,19 @@ def test_resolve_serving_artifact_rejects_missing_manifest_tensor() -> None: def test_resolve_prepared_rejects_local_ready_only_summary() -> None: - resolver = ServingArtifactResolver( + resolver = RuntimeArtifactResolver( manifest_tensor_name=tc.SERVING_MANIFEST_TENSOR_NAME, schema_version=int( - tc.ServingArtifactManifest.model_fields["schema_version"].default + tc.RuntimeArtifactManifest.model_fields["schema_version"].default ), ) - prepared = PreparedServingArtifact( + prepared = PreparedRuntimeArtifact( source_artifact_ref="disk:/model", serving_artifact_ref=None, serving_manifest_ref="tensor:manifest", representation_contract_hash="repr-hash", serving_build_digest="build-digest", - readiness="serving_local_ready", + readiness="runtime_local_ready", family="demo", tensor_schema_hash="schema-hash", ) @@ -133,35 +133,34 @@ def test_resolve_prepared_rejects_local_ready_only_summary() -> None: def test_resolve_prepared_reads_manifest_tensor(monkeypatch) -> None: descriptor = _descriptor() artifact = _FakeArtifact(descriptor) - resolver = ServingArtifactResolver( + resolver = RuntimeArtifactResolver( manifest_tensor_name=tc.SERVING_MANIFEST_TENSOR_NAME, schema_version=int( - tc.ServingArtifactManifest.model_fields["schema_version"].default + tc.RuntimeArtifactManifest.model_fields["schema_version"].default ), + open_artifact_fn=lambda _ref: artifact, ) tensor_schema_hash = resolver.compute_descriptor_tensor_schema_hash(descriptor) manifest = _manifest(tensor_schema_hash=tensor_schema_hash) - prepared = PreparedServingArtifact( + prepared = PreparedRuntimeArtifact( source_artifact_ref="mi2:test:source", serving_artifact_ref="mi2:test:serving", serving_manifest_ref="tensor:manifest", representation_contract_hash="repr-hash", serving_build_digest="build-digest", - readiness="serving_published_ready", + readiness="runtime_published_ready", family="demo", tensor_schema_hash=tensor_schema_hash, ) calls = {"read_manifest": 0} - monkeypatch.setattr(resolver_mod, "open_artifact", lambda ref: artifact) - def _read_manifest(*_args, **_kwargs): calls["read_manifest"] += 1 return manifest monkeypatch.setattr( resolver_mod.tc_artifact_manifest, - "read_serving_artifact_manifest_tensor", + "read_runtime_artifact_manifest_tensor", _read_manifest, ) @@ -174,30 +173,30 @@ def _read_manifest(*_args, **_kwargs): def test_resolve_prepared_rejects_manifest_summary_mismatch(monkeypatch) -> None: descriptor = _descriptor() artifact = _FakeArtifact(descriptor) - resolver = ServingArtifactResolver( + resolver = RuntimeArtifactResolver( manifest_tensor_name=tc.SERVING_MANIFEST_TENSOR_NAME, schema_version=int( - tc.ServingArtifactManifest.model_fields["schema_version"].default + tc.RuntimeArtifactManifest.model_fields["schema_version"].default ), + open_artifact_fn=lambda _ref: artifact, ) tensor_schema_hash = resolver.compute_descriptor_tensor_schema_hash(descriptor) manifest = _manifest(tensor_schema_hash=tensor_schema_hash).model_copy( update={"serving_build_digest": "other-build"} ) - prepared = PreparedServingArtifact( + prepared = PreparedRuntimeArtifact( source_artifact_ref="mi2:test:source", serving_artifact_ref="mi2:test:serving", serving_manifest_ref="tensor:manifest", representation_contract_hash="repr-hash", serving_build_digest="build-digest", - readiness="serving_published_ready", + readiness="runtime_published_ready", family="demo", tensor_schema_hash=tensor_schema_hash, ) - monkeypatch.setattr(resolver_mod, "open_artifact", lambda ref: artifact) monkeypatch.setattr( resolver_mod.tc_artifact_manifest, - "read_serving_artifact_manifest_tensor", + "read_runtime_artifact_manifest_tensor", lambda *_args, **_kwargs: manifest, ) diff --git a/tests/python/test_serving_binding_runtime.py b/tests/python/artifact_runtime/binding/test_execution.py similarity index 74% rename from tests/python/test_serving_binding_runtime.py rename to tests/python/artifact_runtime/binding/test_execution.py index 868ea9a5..805c4f6b 100644 --- a/tests/python/test_serving_binding_runtime.py +++ b/tests/python/artifact_runtime/binding/test_execution.py @@ -5,25 +5,24 @@ from types import SimpleNamespace import tensorcast as tc -from tensorcast.serving.binding_runtime import ( - bind_serving_artifact, +from tensorcast.api._config import CollectivePolicyMode +from tensorcast.artifact_runtime.binding.execution import ( + bind_runtime_artifact, build_materialization_execution_context, - swap_serving_artifact, + swap_runtime_artifact, ) from tensorcast.types import CollectivePolicy -def test_bind_and_swap_serving_artifact_delegate_to_artifact_handles() -> None: +def test_bind_and_swap_runtime_artifact_delegate_to_artifact_handles() -> None: calls: list[tuple[str, object]] = [] class _Subset: - def bind(self, **kwargs): calls.append(("bind", kwargs)) return "binding" class _Artifact: - def subset(self, names): calls.append(("subset", tuple(names))) return _Subset() @@ -37,19 +36,25 @@ def swap(self, artifact, **kwargs): resolved = SimpleNamespace(artifact=_Artifact()) - assert bind_serving_artifact( - resolved_artifact=resolved, - tensor_names=("a", "b"), - device="cuda:0", - serving_runtime_policy="policy", - options="options", - ) == "binding" - assert swap_serving_artifact( - binding=_Binding(), - resolved_artifact=resolved, - serving_runtime_policy="policy", - options="options", - ) == "swapped" + assert ( + bind_runtime_artifact( + resolved_artifact=resolved, + tensor_names=("a", "b"), + device="cuda:0", + runtime_artifact_policy="policy", + options="options", + ) + == "binding" + ) + assert ( + swap_runtime_artifact( + binding=_Binding(), + resolved_artifact=resolved, + runtime_artifact_policy="policy", + options="options", + ) + == "swapped" + ) swapped_artifact = calls[3][1][0] assert isinstance(swapped_artifact, _Subset) @@ -59,7 +64,7 @@ def swap(self, artifact, **kwargs): "bind", { "device": "cuda:0", - "serving_runtime_policy": "policy", + "runtime_artifact_policy": "policy", "options": "options", }, ), @@ -69,7 +74,7 @@ def swap(self, artifact, **kwargs): ( swapped_artifact, { - "serving_runtime_policy": "policy", + "runtime_artifact_policy": "policy", "options": "options", }, ), @@ -77,14 +82,13 @@ def swap(self, artifact, **kwargs): ] -def test_swap_serving_artifact_prefers_binding_target_tensor_names() -> None: +def test_swap_runtime_artifact_prefers_binding_target_tensor_names() -> None: calls: list[tuple[str, object]] = [] class _Subset: pass class _Artifact: - def subset(self, names): calls.append(("subset", tuple(names))) return _Subset() @@ -96,15 +100,18 @@ def swap(self, artifact, **kwargs): calls.append(("swap", artifact)) return "swapped" - resolved = SimpleNamespace(artifact=_Artifact(), tensor_names=("a", )) + resolved = SimpleNamespace(artifact=_Artifact(), tensor_names=("a",)) - assert swap_serving_artifact( - binding=_Binding(), - resolved_artifact=resolved, - tensor_names=("a", ), - serving_runtime_policy=None, - options=None, - ) == "swapped" + assert ( + swap_runtime_artifact( + binding=_Binding(), + resolved_artifact=resolved, + tensor_names=("a",), + runtime_artifact_policy=None, + options=None, + ) + == "swapped" + ) assert calls[0] == ( "subset", @@ -113,14 +120,13 @@ def swap(self, artifact, **kwargs): assert isinstance(calls[1][1], _Subset) -def test_swap_serving_artifact_prefers_binding_layout_tensor_order() -> None: +def test_swap_runtime_artifact_prefers_binding_layout_tensor_order() -> None: calls: list[tuple[str, object]] = [] class _Subset: pass class _Artifact: - def subset(self, names): calls.append(("subset", tuple(names))) return _Subset() @@ -147,13 +153,16 @@ def swap(self, artifact, **kwargs): resolved = SimpleNamespace(artifact=_Artifact(), tensor_names=("a", "b")) - assert swap_serving_artifact( - binding=_Binding(), - resolved_artifact=resolved, - tensor_names=("b", ), - serving_runtime_policy=None, - options=None, - ) == "swapped" + assert ( + swap_runtime_artifact( + binding=_Binding(), + resolved_artifact=resolved, + tensor_names=("b",), + runtime_artifact_policy=None, + options=None, + ) + == "swapped" + ) assert calls[0] == ( "subset", @@ -181,12 +190,17 @@ def test_materialization_execution_context_builds_collective_options() -> None: assert isinstance(options, tc.GetArtifactOptions) assert options.execution_topology.collective_group is not None assert options.execution_topology.collective_group.group_id == "group-1" + assert ( + options.execution_topology.collective_policy + is CollectivePolicyMode.COLLECTIVE_FIRST + ) assert profile["collective_requested"] is True assert profile["source_locality"] == "shared_source" -def test_materialization_execution_context_disables_collective_when_unavailable( -) -> None: +def test_materialization_execution_context_disables_collective_when_unavailable() -> ( + None +): options, profile = build_materialization_execution_context( artifact_ref="mi2:test:serving", operation_scope="startup.bind", diff --git a/tests/python/test_serving_builder_binding_plan.py b/tests/python/artifact_runtime/binding/test_plan.py similarity index 97% rename from tests/python/test_serving_builder_binding_plan.py rename to tests/python/artifact_runtime/binding/test_plan.py index 6c9e2d1c..d37f3427 100644 --- a/tests/python/test_serving_builder_binding_plan.py +++ b/tests/python/artifact_runtime/binding/test_plan.py @@ -2,11 +2,11 @@ from __future__ import annotations -from tensorcast.serving.builder.binding_plan import ( +from tensorcast.artifact_runtime.binding.plan import ( lower_trace_plan_for_binding, lower_trace_plan_for_realization, ) -from tensorcast.serving.builder.trace_ir import ( +from tensorcast.artifact_runtime.recipe.trace_ir import ( CopyPlanEntry, MultiRange, Range, diff --git a/tests/python/test_serving_retained_binding_acquire.py b/tests/python/artifact_runtime/binding/test_retained.py similarity index 74% rename from tests/python/test_serving_retained_binding_acquire.py rename to tests/python/artifact_runtime/binding/test_retained.py index dceb8a97..be74b272 100644 --- a/tests/python/test_serving_retained_binding_acquire.py +++ b/tests/python/artifact_runtime/binding/test_retained.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json from contextlib import contextmanager from dataclasses import replace from types import SimpleNamespace @@ -10,32 +11,44 @@ import pytest import torch -import tensorcast as tc -from tensorcast.serving.retained_binding import ( - ParsedRetainedServingBindingAuthority, - RetainedServingBindingAuthority, - RetainedServingBindingExpectedDigests, - acquire_retained_serving_binding, - acquire_retained_serving_binding_lease, - parse_retained_serving_binding_authority, +import tensorcast.retained_realization as retained_realization_module +import tensorcast.retained_realization_authority as retained_authority_module +from tensorcast.artifact_runtime.binding.retained import ( + acquire_retained_binding, + acquire_retained_binding_lease, promote_current_value_and_wait, retained_binding_acquire_mode, - retained_serving_binding_extra_from_prefetched_binding, - retained_serving_binding_extra_json, - retained_serving_binding_trusted_reservation_bytes, +) +from tensorcast.retained_realization import ( + RetainedRealizationClaim, + RetainedRealizationExpectedDigests, + parse_retained_realization_authority, + parse_retained_realization_claim, + retained_realization_claim_extra_from_handoff, + retained_realization_claim_extra_json_from_handoff, + retained_realization_claim_mode, + retained_realization_trusted_reservation_bytes, +) +from tensorcast.retained_realization_authority import ( + ParsedRetainedRealizationAuthority, + RetainedRealizationAuthority, +) +from tensorcast.retained_realization_authority import ( + RetainedRealizationExpectedDigests as RetainedRealizationAuthorityExpectedDigests, ) from tensorcast.types import ( BindingReservationCapability, BindingValueRef, BindingValueVerificationState, GroupRealizationAcquireRef, + PrefetchHandoff, PrefetchRetentionPolicy, - ServingBindingMemberRef, - ServingBindingResolvedLayout, - ServingBindingSourceRef, - ServingBindingSourceReuseDecision, - ServingBindingTarget, - ServingTopologyRef, + RealizationTarget, + RuntimeBindingMemberRef, + RuntimeBindingResolvedLayout, + RuntimeBindingSourceRef, + RuntimeBindingSourceReuseDecision, + RuntimeTopologyRef, ) @@ -45,9 +58,9 @@ def _authority( member_index: int = 0, member_count: int = 1, expires_at_ms: int | None = None, -) -> ParsedRetainedServingBindingAuthority: +) -> ParsedRetainedRealizationAuthority: suffix = member_index + 1 - member = ServingBindingMemberRef( + member = RuntimeBindingMemberRef( member_id=f"member-{member_index}", member_index=member_index, member_count=member_count, @@ -70,7 +83,7 @@ def _authority( scope_digest="scope-1", expires_at_ms=expires_at_ms, ) - return ParsedRetainedServingBindingAuthority( + return ParsedRetainedRealizationAuthority( group_id="group-1", local_serving_ref=f"binding-local:binding-{suffix}:value-{suffix}", binding_value_ref=binding_ref, @@ -80,19 +93,19 @@ def _authority( device_uuid=f"gpu-{member_index}", member=member, reservation_bytes=reservation_bytes, - expected=RetainedServingBindingExpectedDigests( + expected=RetainedRealizationAuthorityExpectedDigests( target_layout_hash="layout-hash", tensor_schema_hash="schema-hash", - serving_build_digest="build-digest", + runtime_build_digest="build-digest", resolved_spec_digest="spec-digest", ), - readiness="serving_local_ready", + readiness="runtime_local_ready", verification_state="local_only", ) def _authority_payload( - authority: ParsedRetainedServingBindingAuthority, + authority: ParsedRetainedRealizationAuthority, ) -> dict[str, object]: return { "group_id": authority.group_id, @@ -125,6 +138,44 @@ def _set_nested( current[path[-1]] = value +def test_serving_retained_binding_does_not_export_legacy_authority_aliases() -> None: + import tensorcast.artifact_runtime.binding.retained as retained_binding_module + + assert not hasattr( + retained_binding_module, "ParsedRetainedServingBindingAuthority" + ) + assert not hasattr(retained_binding_module, "RetainedServingBindingAuthority") + assert not hasattr( + retained_binding_module, "RetainedServingBindingExpectedDigests" + ) + + +def test_retained_realization_authority_module_hides_serving_aliases() -> None: + public_names = set(retained_authority_module.__all__) + + assert "ParsedRetainedServingBindingAuthority" not in public_names + assert "RetainedServingBindingAuthority" not in public_names + assert "RetainedServingBindingExpectedDigests" not in public_names + assert not hasattr( + retained_authority_module, "ParsedRetainedServingBindingAuthority" + ) + assert not hasattr(retained_authority_module, "RetainedServingBindingAuthority") + assert not hasattr( + retained_authority_module, "RetainedServingBindingExpectedDigests" + ) + + +def test_retained_realization_module_hides_prefetched_compat_helpers() -> None: + public_names = set(retained_realization_module.__all__) + + for removed_name in ( + "retained_realization_claim_extra_from_prefetched_binding", + "retained_realization_claim_extra_json", + ): + assert removed_name not in public_names + assert not hasattr(retained_realization_module, removed_name) + + def _response(*, reservation_bytes: int = 4096, lease_token: bytes = b"lease"): return SimpleNamespace( reservation_bytes=reservation_bytes, @@ -138,15 +189,15 @@ def _response(*, reservation_bytes: int = 4096, lease_token: bytes = b"lease"): ) -def _topology() -> ServingTopologyRef: - return ServingTopologyRef( +def _topology() -> RuntimeTopologyRef: + return RuntimeTopologyRef( schema_topology_digest="topology-schema", admission_topology_digest="topology-admission", ) -def _source() -> ServingBindingSourceRef: - return ServingBindingSourceRef( +def _source() -> RuntimeBindingSourceRef: + return RuntimeBindingSourceRef( source_kind="checkpoint_artifact", artifact_selection_digest="selection-digest", source_artifact_ref="mi2:checkpoint", @@ -155,17 +206,17 @@ def _source() -> ServingBindingSourceRef: def _target( - member: ServingBindingMemberRef, + member: RuntimeBindingMemberRef, *, - topology: ServingTopologyRef | None = None, -) -> ServingBindingTarget: + topology: RuntimeTopologyRef | None = None, +) -> RealizationTarget: resolved_topology = topology or _topology() source = _source() - source_reuse = ServingBindingSourceReuseDecision( - mode="checkpoint_to_serving", + source_reuse = RuntimeBindingSourceReuseDecision( + mode="checkpoint_to_runtime", representation_contract_hash="repr-contract", ) - resolved_layout = ServingBindingResolvedLayout( + resolved_layout = RuntimeBindingResolvedLayout( binding_layout_id="layout-1", source=source, source_reuse=source_reuse, @@ -178,7 +229,7 @@ def _target( spec_digest="spec-digest", source_schema_hash="source-schema", ) - return ServingBindingTarget( + return RealizationTarget( runtime="vllm", device="cuda:0", device_uuid="GPU-0", @@ -186,16 +237,16 @@ def _target( topology=resolved_topology, member=member, model_config_digest="model-config", - serving_build_digest="serving-build", + runtime_build_digest="serving-build", resolved_layout=resolved_layout, ) def _prefetched( - member: ServingBindingMemberRef, + member: RuntimeBindingMemberRef, *, reservation_bytes: int = 4096, -) -> tc.PrefetchedServingBinding: +) -> PrefetchHandoff: binding_ref = BindingValueRef( binding_id="binding-1", binding_layout_id="layout-1", @@ -212,7 +263,7 @@ def _prefetched( reservation_bytes=reservation_bytes, scope_digest="scope-1", ) - return tc.PrefetchedServingBinding( + return PrefetchHandoff( local_serving_ref="binding-local:binding-1:value-1", binding_value_ref=binding_ref, daemon_id="daemon-1", @@ -221,7 +272,7 @@ def _prefetched( member=member, reservation_bytes=reservation_bytes, reservation_capability=capability, - readiness="serving_local_ready", + readiness="runtime_local_ready", verification_state=BindingValueVerificationState.LOCAL_ONLY, ) @@ -252,11 +303,11 @@ def ensure_client(self): return self.client -def test_acquire_retained_serving_binding_lease_releases_unrestored_lease_on_context_exit(): +def test_acquire_retained_binding_lease_releases_unrestored_lease_on_context_exit(): authority = _authority() client = _Client(_response()) - with acquire_retained_serving_binding_lease( + with acquire_retained_binding_lease( authority, runtime=_Runtime(client), caller_pid=123, @@ -276,11 +327,11 @@ def test_acquire_retained_serving_binding_lease_releases_unrestored_lease_on_con assert lease.release_contract.released is True -def test_acquire_retained_serving_binding_uses_authority(): +def test_acquire_retained_binding_uses_authority(): authority = _authority() client = _Client(_response()) - with acquire_retained_serving_binding( + with acquire_retained_binding( authority=authority, runtime=_Runtime(client), caller_pid=456, @@ -298,7 +349,7 @@ def test_acquire_retained_binding_rejects_expired_capability_before_daemon_call( with ( pytest.raises(ValueError, match="reservation_capability has expired"), - acquire_retained_serving_binding_lease(authority, runtime=_Runtime(client)), + acquire_retained_binding_lease(authority, runtime=_Runtime(client)), ): pass @@ -310,7 +361,7 @@ def test_retained_binding_debug_status_tracks_capability_ttl_and_lifecycle(): authority = _authority(expires_at_ms=4_102_444_800_000) client = _Client(_response()) - with acquire_retained_serving_binding_lease( + with acquire_retained_binding_lease( authority, runtime=_Runtime(client) ) as lease: acquired_status = lease.debug_status() @@ -318,7 +369,7 @@ def test_retained_binding_debug_status_tracks_capability_ttl_and_lifecycle(): assert acquired_status["state"] == "acquired" assert acquired_status["reservation_capability_id"] == "capability-1" assert acquired_status["reservation_expires_at_ms"] == 4_102_444_800_000 - assert acquired_status["readiness"] == "serving_local_ready" + assert acquired_status["readiness"] == "runtime_local_ready" assert acquired_status["verification_state"] == "local_only" assert acquired_status["lease_token_present"] is True assert acquired_status["release_policy"] == ( @@ -361,8 +412,8 @@ def test_retained_prefetch_retention_policy_round_trips_ttl_and_idle_retire(): assert round_tripped.allow_acquire_after_creator_exit is True -def test_acquire_retained_serving_binding_acquires_local_ready(monkeypatch): - member = ServingBindingMemberRef( +def test_acquire_retained_binding_acquires_local_ready(monkeypatch): + member = RuntimeBindingMemberRef( member_id="member-0", member_index=0, member_count=1, @@ -375,7 +426,7 @@ def test_acquire_retained_serving_binding_acquires_local_ready(monkeypatch): store_api, "device_uuid_for", lambda device_index: f"gpu-{device_index}" ) - with acquire_retained_serving_binding( + with acquire_retained_binding( local_serving_ref="binding-local:binding-1:value-1", target_device=torch.device("cuda:3"), expected_member=member, @@ -384,7 +435,7 @@ def test_acquire_retained_serving_binding_acquires_local_ready(monkeypatch): runtime=_Runtime(client), caller_pid=789, ) as lease: - assert lease.authority.readiness == "serving_local_ready" + assert lease.authority.readiness == "runtime_local_ready" assert ( client.acquire_calls[0]["local_serving_ref"] @@ -404,7 +455,7 @@ def fail_restore(**_kwargs): with ( pytest.raises(RuntimeError, match="restore failed"), - acquire_retained_serving_binding_lease( + acquire_retained_binding_lease( authority, runtime=_Runtime(client) ) as lease, ): @@ -420,7 +471,7 @@ def test_attached_close_releases_once_after_successful_restore(): authority = _authority() client = _Client(_response()) - with acquire_retained_serving_binding_lease( + with acquire_retained_binding_lease( authority, runtime=_Runtime(client) ) as lease: attached = lease.restore( @@ -437,7 +488,7 @@ def test_transfer_to_runtime_moves_close_ownership(): authority = _authority() client = _Client(_response()) - with acquire_retained_serving_binding_lease( + with acquire_retained_binding_lease( authority, runtime=_Runtime(client) ) as lease: attached = lease.restore( @@ -463,7 +514,7 @@ def test_restored_lease_releases_on_context_exit_when_not_transferred(): with ( pytest.raises(RuntimeError, match="attach failed"), - acquire_retained_serving_binding_lease( + acquire_retained_binding_lease( authority, runtime=_Runtime(client) ) as lease, ): @@ -480,7 +531,7 @@ def test_retained_binding_lifecycle_rejects_invalid_transitions(): authority = _authority() client = _Client(_response()) - with acquire_retained_serving_binding_lease( + with acquire_retained_binding_lease( authority, runtime=_Runtime(client) ) as lease: lease.close() @@ -491,7 +542,7 @@ def test_retained_binding_lifecycle_rejects_invalid_transitions(): ) client = _Client(_response()) - with acquire_retained_serving_binding_lease( + with acquire_retained_binding_lease( authority, runtime=_Runtime(client) ) as lease: attached = lease.restore( @@ -508,7 +559,7 @@ def test_retained_binding_lifecycle_rejects_invalid_transitions(): attached.transfer_to_runtime() -def test_acquire_retained_serving_binding_lease_rejects_mismatched_acquire_response(): +def test_acquire_retained_binding_lease_rejects_mismatched_acquire_response(): authority = _authority() response = _response() response.current_value.binding_value_id = "other-value" @@ -516,20 +567,20 @@ def test_acquire_retained_serving_binding_lease_rejects_mismatched_acquire_respo with ( pytest.raises(RuntimeError, match="different binding value"), - acquire_retained_serving_binding_lease(authority, runtime=_Runtime(client)), + acquire_retained_binding_lease(authority, runtime=_Runtime(client)), ): pass assert client.released_tokens == [b"lease"] -def test_acquire_retained_serving_binding_lease_releases_mismatched_reservation_response(): +def test_acquire_retained_binding_lease_releases_mismatched_reservation_response(): authority = _authority(reservation_bytes=4096) client = _Client(_response(reservation_bytes=8192)) with ( pytest.raises(RuntimeError, match="reservation byte mismatch"), - acquire_retained_serving_binding_lease(authority, runtime=_Runtime(client)), + acquire_retained_binding_lease(authority, runtime=_Runtime(client)), ): pass @@ -587,7 +638,7 @@ def test_parse_retained_binding_authority_rejects_inconsistent_authority( } with pytest.raises(ValueError, match=match): - parse_retained_serving_binding_authority(extra) + parse_retained_realization_authority(extra) def test_parse_retained_binding_authority_rejects_member_group_mismatch(): @@ -604,13 +655,13 @@ def test_parse_retained_binding_authority_rejects_member_group_mismatch(): } with pytest.raises(ValueError, match="member_ref.group_id"): - parse_retained_serving_binding_authority(extra) + parse_retained_realization_authority(extra) def test_parse_retained_binding_authority_requires_published_artifact_scope(): authority = replace( _authority(), - readiness="serving_published_ready", + readiness="runtime_published_ready", serving_artifact_id=None, ) extra = { @@ -621,16 +672,16 @@ def test_parse_retained_binding_authority_requires_published_artifact_scope(): } with pytest.raises(ValueError, match="serving_artifact_id"): - parse_retained_serving_binding_authority(extra) + parse_retained_realization_authority(extra) def test_acquire_retained_binding_rejects_reserved_authority_before_daemon_call(): - authority = replace(_authority(), readiness="serving_reserved") + authority = replace(_authority(), readiness="runtime_reserved") client = _Client(_response()) with ( - pytest.raises(ValueError, match="serving_reserved"), - acquire_retained_serving_binding_lease(authority, runtime=_Runtime(client)), + pytest.raises(ValueError, match="runtime_reserved"), + acquire_retained_binding_lease(authority, runtime=_Runtime(client)), ): pass @@ -653,7 +704,7 @@ def test_acquire_retained_binding_requires_group_publish_wait_before_attach(): with ( pytest.raises(ValueError, match="wait for group publish"), - acquire_retained_serving_binding_lease(authority, runtime=_Runtime(client)), + acquire_retained_binding_lease(authority, runtime=_Runtime(client)), ): pass @@ -673,30 +724,30 @@ def test_acquire_retained_binding_passes_group_publish_wait_authority(): authority = replace(_authority(), group_realization_acquire=group_acquire) client = _Client(_response()) - with acquire_retained_serving_binding_lease(authority, runtime=_Runtime(client)): + with acquire_retained_binding_lease(authority, runtime=_Runtime(client)): pass assert client.acquire_calls[0]["group_realization_acquire"] == group_acquire assert client.released_tokens == [b"lease"] -def test_retained_binding_public_helpers_build_extra_from_prefetched_binding(): +def test_retained_binding_public_helpers_build_extra_from_handoff(): member = _authority().member prefetched = _prefetched(member, reservation_bytes=8192) target = _target(member) - extra = retained_serving_binding_extra_from_prefetched_binding( - prefetched=prefetched, + extra = retained_realization_claim_extra_from_handoff( + handoff=prefetched, target=target, expected_member=member, ) - authority = parse_retained_serving_binding_authority(extra) + authority = parse_retained_realization_authority(extra) assert "retained_binding_acquire" in extra assert retained_binding_acquire_mode(extra) == "external" assert isinstance(extra["retained_binding_acquire"]["authority"], dict) assert ( - RetainedServingBindingAuthority.model_validate( + RetainedRealizationAuthority.model_validate( extra["retained_binding_acquire"]["authority"] ).trusted_reservation_bytes == 8192 @@ -705,25 +756,95 @@ def test_retained_binding_public_helpers_build_extra_from_prefetched_binding(): assert authority.reservation_bytes == 8192 assert authority.expected.target_layout_hash == "target-layout-hash" assert authority.expected.tensor_schema_hash == "tensor-schema" - assert authority.expected.serving_build_digest == "serving-build" + assert authority.expected.runtime_build_digest == "serving-build" assert authority.expected.resolved_spec_digest == "spec-digest" - assert retained_serving_binding_trusted_reservation_bytes(extra) == 8192 + assert retained_realization_trusted_reservation_bytes(extra) == 8192 assert ( - retained_serving_binding_trusted_reservation_bytes( + retained_realization_trusted_reservation_bytes( SimpleNamespace(model_loader_extra_config=extra) ) == 8192 ) - assert '"mode":"external"' in retained_serving_binding_extra_json( - prefetched=prefetched, + assert '"mode":"external"' in retained_realization_claim_extra_json_from_handoff( + handoff=prefetched, target=target, expected_member=member, ) - assert '"retained_binding_acquire"' in retained_serving_binding_extra_json( - prefetched=prefetched, + assert '"retained_binding_acquire"' in retained_realization_claim_extra_json_from_handoff( + handoff=prefetched, + target=target, + expected_member=member, + ) + + +def test_retained_realization_claim_helpers_use_primary_authority_contract(): + member = _authority().member + handoff = _prefetched(member, reservation_bytes=8192) + target = _target(member) + + extra = retained_realization_claim_extra_from_handoff( + handoff=handoff, target=target, expected_member=member, ) + claim = parse_retained_realization_claim(extra, expected_member=member) + authority = parse_retained_realization_authority(extra, expected_member=member) + + assert isinstance(claim, RetainedRealizationClaim) + assert claim.authority == authority + assert claim.as_authority() == authority + assert parse_retained_realization_authority(extra, expected_member=member) == ( + authority + ) + assert claim.group_id == authority.group_id + assert claim.local_ref == authority.local_serving_ref + assert claim.binding_value_ref == authority.binding_value_ref + assert claim.reservation_capability == authority.reservation_capability + assert claim.daemon_id == authority.daemon_id + assert claim.daemon_session_id == authority.daemon_session_id + assert claim.device_uuid == authority.device_uuid + assert claim.member == member + assert claim.reservation_bytes == 8192 + assert isinstance(claim.expected, RetainedRealizationExpectedDigests) + assert claim.expected.tensor_schema_hash == "tensor-schema" + assert claim.readiness == "runtime_local_ready" + assert claim.verification_state == "local_only" + assert claim.serving_artifact_id == authority.serving_artifact_id + assert claim.group_realization_acquire == authority.group_realization_acquire + assert retained_realization_claim_mode(extra) == "external" + assert retained_realization_trusted_reservation_bytes(extra) == 8192 + assert ( + retained_realization_trusted_reservation_bytes( + SimpleNamespace(model_loader_extra_config=extra) + ) + == 8192 + ) + assert retained_realization_claim_extra_from_handoff( + handoff=handoff, + target=target, + expected_member=member, + ) == extra + assert json.loads( + retained_realization_claim_extra_json_from_handoff( + handoff=handoff, + target=target, + expected_member=member, + ) + ) == extra + + +def test_retained_realization_claim_trusted_bytes_fail_closed_on_mismatch(): + payload = _authority_payload(_authority()) + _set_nested(payload, ("reservation_capability", "reservation_bytes"), 8192) + extra = { + "retained_binding_acquire": { + "mode": "external", + "authority": payload, + }, + } + + with pytest.raises(ValueError, match="reservation_bytes"): + retained_realization_trusted_reservation_bytes(extra) def test_retained_binding_authority_set_selects_expected_member(): @@ -747,7 +868,7 @@ def test_retained_binding_authority_set_selects_expected_member(): }, } - selected = parse_retained_serving_binding_authority( + selected = parse_retained_realization_authority( extra, expected_member=authority1.member, ) @@ -755,7 +876,7 @@ def test_retained_binding_authority_set_selects_expected_member(): assert selected.member == authority1.member assert selected.reservation_bytes == 8192 assert ( - retained_serving_binding_trusted_reservation_bytes( + retained_realization_trusted_reservation_bytes( extra, expected_member=authority1.member, ) @@ -777,7 +898,7 @@ def test_retained_binding_authority_set_requires_expected_member(): } with pytest.raises(ValueError, match="expected serving member"): - parse_retained_serving_binding_authority(extra) + parse_retained_realization_authority(extra) def test_retained_binding_extra_preserves_group_realization_acquire(): @@ -797,12 +918,12 @@ def test_retained_binding_extra_preserves_group_realization_acquire(): } ) - extra = retained_serving_binding_extra_from_prefetched_binding( - prefetched=prefetched, + extra = retained_realization_claim_extra_from_handoff( + handoff=prefetched, target=target, expected_member=member, ) - authority = parse_retained_serving_binding_authority(extra) + authority = parse_retained_realization_authority(extra) assert authority.group_realization_acquire is not None assert authority.group_realization_acquire.transaction_id == "txn-1" @@ -814,8 +935,8 @@ def test_retained_binding_extra_rejects_unexpected_member(): unexpected = member.model_copy(update={"member_id": "other"}) with pytest.raises(ValueError, match="does not match expected placement"): - retained_serving_binding_extra_from_prefetched_binding( - prefetched=_prefetched(member), + retained_realization_claim_extra_from_handoff( + handoff=_prefetched(member), target=_target(member), expected_member=unexpected, ) diff --git a/tests/python/test_serving_replica_publication.py b/tests/python/artifact_runtime/publication/test_replica.py similarity index 78% rename from tests/python/test_serving_replica_publication.py rename to tests/python/artifact_runtime/publication/test_replica.py index 28572b81..9771248c 100644 --- a/tests/python/test_serving_replica_publication.py +++ b/tests/python/artifact_runtime/publication/test_replica.py @@ -9,18 +9,24 @@ import pytest -import tensorcast.serving._runtime_impl.lifecycle as integration_mod -from tensorcast.serving.config import ServingConfig -from tensorcast.serving.errors import ReplicaPublicationError -from tensorcast.serving.hosts import IntegrationHost -from tensorcast.serving.policy import ServingArtifactLocator -from tensorcast.serving.runtime import RequestContext, ServingRuntimeSession -from tensorcast.serving.runtime_attachment import ( +import tensorcast.artifact_runtime.lifecycle as integration_mod +from tensorcast.artifact_runtime.attachment import ( RuntimeAttachment, RuntimeBindingState, RuntimeBindingView, ) -from tensorcast.serving.runtime_view import ( +from tensorcast.artifact_runtime.errors import ReplicaPublicationError +from tensorcast.artifact_runtime.intent import RuntimeRequestContext +from tensorcast.artifact_runtime.locator import ArtifactLocator +from tensorcast.artifact_runtime.policy import RuntimePolicy +from tensorcast.artifact_runtime.publication.actions import ( + project_runtime_replica_publication_state, + publish_runtime_replica, + retire_runtime_replica, + runtime_replica_publication_settings, +) +from tensorcast.artifact_runtime.reload import reload_runtime_attachment +from tensorcast.artifact_runtime.view import ( PublishedReplicaProjection, RuntimeWorkerView, ) @@ -100,18 +106,56 @@ def publish_replica_operation(self) -> _Operation: raise RuntimeError("publish failed") -def _session( +def _settings( + config: dict[str, object] | None = None, +): + return runtime_replica_publication_settings(config) + + +def _publish( + attachment: RuntimeAttachment, config: dict[str, object] | None = None, *, profile_sink: object | None = None, -) -> ServingRuntimeSession: - return ServingRuntimeSession.from_config( - ServingConfig.from_mapping(config), - host=IntegrationHost( - framework=cast(Any, object()), - placement=cast(Any, object()), +) -> RuntimeAttachment: + settings = _settings(config or {"replica_publication": {"mode": "required"}}) + return publish_runtime_replica( + current_attachment=attachment, + policy=settings.policy, + ensure_runtime_initialized=settings.ensure_runtime_initialized, + profile_sink=cast(Any, profile_sink), + ) + + +def _retire( + attachment: RuntimeAttachment, + config: dict[str, object] | None = None, + *, + reason: str = "retire", + drain_timeout_s: float | None = None, + profile_sink: object | None = None, +) -> RuntimeAttachment: + settings = _settings(config or {"replica_publication": {"mode": "required"}}) + return retire_runtime_replica( + current_attachment=attachment, + reason=reason, + drain_timeout_s=drain_timeout_s, + default_drain_timeout_s=settings.drain_timeout_s, + ensure_runtime_initialized=settings.ensure_runtime_initialized, + profile_sink=cast(Any, profile_sink), + ) + + +def _reload(attachment: RuntimeAttachment) -> RuntimeAttachment: + return reload_runtime_attachment( + current_attachment=attachment, + artifact_locator=ArtifactLocator.artifact_ref("mi2:next"), + policy=RuntimePolicy(), + runtime_host=cast(Any, object()), + runtime_context=RuntimeRequestContext(), + ensure_runtime_initialized=lambda: pytest.fail( + "active publication rejection must precede runtime init" ), - profile_sink=profile_sink, ) @@ -132,7 +176,7 @@ def _attachment( representation_contract_hash="repr-hash", tensor_schema_hash="schema-hash", binding_value_ref=binding_value_ref, - readiness="serving", + readiness="runtime_ready", ) state = RuntimeBindingState( binding=binding, @@ -173,8 +217,8 @@ def _with_published_replica( return replace(attachment, view=replace(attachment.view, endpoint=endpoint)) -def test_serving_config_parses_replica_publication_policy() -> None: - config = ServingConfig.from_mapping( +def test_runtime_config_parses_replica_publication_policy() -> None: + settings = _settings( { "replica_publication": { "mode": "REQUIRED", @@ -184,11 +228,12 @@ def test_serving_config_parses_replica_publication_policy() -> None: }, } ) + policy = settings.policy - assert config.replica_publication.mode == "required" - assert config.replica_publication.trigger == "after_vllm_ready" - assert config.replica_publication.timeout_s == 5 - assert config.replica_publication.drain_timeout_s == 7 + assert policy.mode == "required" + assert policy.trigger == "after_vllm_ready" + assert policy.timeout_s == 5 + assert policy.drain_timeout_s == 7 @pytest.mark.parametrize( @@ -205,11 +250,11 @@ def test_serving_config_parses_replica_publication_policy() -> None: }, ], ) -def test_serving_config_rejects_invalid_replica_publication_policy( +def test_runtime_config_rejects_invalid_replica_publication_policy( payload: dict[str, object], ) -> None: with pytest.raises(ValueError): - ServingConfig.from_mapping({"replica_publication": payload}) + _settings({"replica_publication": payload}) def test_publish_current_replica_rejects_local_ready_attachment( @@ -220,21 +265,21 @@ def test_publish_current_replica_rejects_local_ready_attachment( "ensure_initialized", lambda self: None, ) - session = _session( - { - "replica_publication": { - "mode": "required", - "drain_timeout_s": 3, - }, - } - ) attachment = _attachment( _PublicationBinding(), serving_artifact_ref=None, ) with pytest.raises(ReplicaPublicationError, match="artifact-backed"): - session.publish_current_replica(current_attachment=attachment) + _publish( + attachment, + { + "replica_publication": { + "mode": "required", + "drain_timeout_s": 3, + }, + }, + ) def test_publish_current_replica_rejects_missing_publication_capability( @@ -245,18 +290,15 @@ def test_publish_current_replica_rejects_missing_publication_capability( "ensure_initialized", lambda self: None, ) - session = _session( - { - "replica_publication": { - "mode": "required", - "drain_timeout_s": 3, - }, - } - ) - with pytest.raises(ReplicaPublicationError, match="publish_replica"): - session.publish_current_replica( - current_attachment=_attachment(_MissingPublicationCapabilityBinding()) + _publish( + _attachment(_MissingPublicationCapabilityBinding()), + { + "replica_publication": { + "mode": "required", + "drain_timeout_s": 3, + }, + }, ) @@ -270,10 +312,9 @@ def test_publish_current_replica_rejects_artifact_scope_mismatch( ) binding = _PublicationBinding() binding.artifact_id = "mi2:other:serving" - session = _session({"replica_publication": {"mode": "required"}}) with pytest.raises(ReplicaPublicationError, match="does not match"): - session.publish_current_replica(current_attachment=_attachment(binding)) + _publish(_attachment(binding), {"replica_publication": {"mode": "required"}}) def test_publish_current_replica_returns_published_projection( @@ -288,9 +329,8 @@ def test_publish_current_replica_returns_published_projection( ) binding = _PublicationBinding() attachment = _attachment(binding) - session = _session({"replica_publication": {"mode": "required"}}) - published = session.publish_current_replica(current_attachment=attachment) + published = _publish(attachment, {"replica_publication": {"mode": "required"}}) projection = published.view.endpoint.weight_version.published_replica assert projection is not None @@ -362,14 +402,13 @@ def test_project_current_replica_publication_state_returns_typed_projection( ) binding = _PublicationBinding() attachment = _attachment(binding) - session = _session({"replica_publication": {"mode": "required"}}) - publishing = session.project_current_replica_publication_state( + publishing = project_runtime_replica_publication_state( current_attachment=attachment, state="publishing", reason="after_vllm_ready", ) - published = session.publish_current_replica(current_attachment=publishing) + published = _publish(publishing, {"replica_publication": {"mode": "required"}}) pending = publishing.view.endpoint.weight_version.published_replica assert pending is not None @@ -405,10 +444,9 @@ def test_publish_current_replica_error_carries_failed_projection( lambda self: None, ) attachment = _attachment(_FailingPublicationBinding()) - session = _session({"replica_publication": {"mode": "required"}}) with pytest.raises(ReplicaPublicationError) as raised: - session.publish_current_replica(current_attachment=attachment) + _publish(attachment, {"replica_publication": {"mode": "required"}}) failed = raised.value.attachment assert isinstance(failed, RuntimeAttachment) @@ -428,19 +466,18 @@ def test_publish_and_retire_emit_profile_metrics( ) events: list[dict[str, Any]] = [] binding = _PublicationBinding() - session = _session( - { - "replica_publication": { - "mode": "required", - } - }, - profile_sink=events.append, - ) + config = { + "replica_publication": { + "mode": "required", + } + } - published = session.publish_current_replica(current_attachment=_attachment(binding)) - retired = session.retire_current_replica( - current_attachment=published, + published = _publish(_attachment(binding), config, profile_sink=events.append) + retired = _retire( + published, + config, reason="shutdown", + profile_sink=events.append, ) assert retired.view.endpoint.weight_version.published_replica is not None @@ -465,10 +502,9 @@ def test_publish_current_replica_is_idempotent_for_matching_active_projection( ) binding = _PublicationBinding() attachment = _attachment(binding) - session = _session({"replica_publication": {"mode": "required"}}) - published = session.publish_current_replica(current_attachment=attachment) - replayed = session.publish_current_replica(current_attachment=published) + published = _publish(attachment, {"replica_publication": {"mode": "required"}}) + replayed = _publish(published, {"replica_publication": {"mode": "required"}}) assert replayed is published assert binding.publish_calls == 1 @@ -483,12 +519,14 @@ def test_publish_current_replica_rejects_mismatched_active_projection( lambda self: None, ) binding = _PublicationBinding() - session = _session({"replica_publication": {"mode": "required"}}) - published = session.publish_current_replica(current_attachment=_attachment(binding)) + published = _publish( + _attachment(binding), + {"replica_publication": {"mode": "required"}}, + ) binding.published_lease_id = "lease-2" with pytest.raises(ReplicaPublicationError, match="does not match"): - session.publish_current_replica(current_attachment=published) + _publish(published, {"replica_publication": {"mode": "required"}}) def test_publish_current_replica_rejects_stale_publish_result( @@ -499,12 +537,11 @@ def test_publish_current_replica_rejects_stale_publish_result( "ensure_initialized", lambda self: None, ) - session = _session({"replica_publication": {"mode": "required"}}) binding = _PublicationBinding(seal_generation=2) attachment = _attachment(binding) with pytest.raises(ReplicaPublicationError, match="stale") as raised: - session.publish_current_replica(current_attachment=attachment) + _publish(attachment, {"replica_publication": {"mode": "required"}}) stale = raised.value.attachment assert isinstance(stale, RuntimeAttachment) @@ -532,10 +569,9 @@ def test_publish_current_replica_rejects_binding_value_scope_mismatch( seal_generation=1, serving_artifact_id="mi2:test:serving", ) - session = _session({"replica_publication": {"mode": "required"}}) with pytest.raises(ReplicaPublicationError, match="stale"): - session.publish_current_replica(current_attachment=_attachment(binding)) + _publish(_attachment(binding), {"replica_publication": {"mode": "required"}}) def test_retire_current_replica_is_idempotent_for_unpublished_attachment( @@ -546,10 +582,11 @@ def test_retire_current_replica_is_idempotent_for_unpublished_attachment( "ensure_initialized", lambda self: None, ) - session = _session({"replica_publication": {"mode": "required"}}) attachment = _attachment(_PublicationBinding()) - assert session.retire_current_replica(current_attachment=attachment) is attachment + assert ( + _retire(attachment, {"replica_publication": {"mode": "required"}}) is attachment + ) def test_runtime_binding_state_close_retires_binding_only_publication() -> None: @@ -588,17 +625,16 @@ def test_retire_current_replica_handles_binding_only_publication( binding = _PublicationBinding() binding.published_lease_id = lease_id binding.published_replica_id = replica_id - session = _session( - { - "replica_publication": { - "mode": "required", - "drain_timeout_s": 3, - }, - } - ) + config = { + "replica_publication": { + "mode": "required", + "drain_timeout_s": 3, + }, + } - retired = session.retire_current_replica( - current_attachment=_attachment(binding), + retired = _retire( + _attachment(binding), + config, reason="reload", ) @@ -622,14 +658,12 @@ def test_retire_current_replica_refreshes_stale_terminal_projection( binding = _PublicationBinding() binding.published_lease_id = "lease-orphan" binding.published_replica_id = "replica-orphan" - session = _session( - { - "replica_publication": { - "mode": "required", - "drain_timeout_s": 3, - }, - } - ) + config = { + "replica_publication": { + "mode": "required", + "drain_timeout_s": 3, + }, + } attachment = _with_published_replica( _attachment(binding), state="retired", @@ -637,8 +671,9 @@ def test_retire_current_replica_refreshes_stale_terminal_projection( replica_id=None, ) - retired = session.retire_current_replica( - current_attachment=attachment, + retired = _retire( + attachment, + config, reason="reload", ) @@ -662,21 +697,20 @@ def test_retire_current_replica_terminalizes_publishing_projection( binding = _PublicationBinding() binding.published_lease_id = "lease-1" binding.published_replica_id = "replica-1" - session = _session( - { - "replica_publication": { - "mode": "required", - "drain_timeout_s": 3, - }, - } - ) + config = { + "replica_publication": { + "mode": "required", + "drain_timeout_s": 3, + }, + } publishing = _with_published_replica( _attachment(binding), state="publishing", ) - retired = session.retire_current_replica( - current_attachment=publishing, + retired = _retire( + publishing, + config, reason="reload", ) @@ -696,18 +730,17 @@ def test_retire_current_replica_updates_projection( lambda self: None, ) binding = _PublicationBinding() - session = _session( - { - "replica_publication": { - "mode": "required", - "drain_timeout_s": 3, - }, - } - ) - published = session.publish_current_replica(current_attachment=_attachment(binding)) + config = { + "replica_publication": { + "mode": "required", + "drain_timeout_s": 3, + }, + } + published = _publish(_attachment(binding), config) - retired = session.retire_current_replica( - current_attachment=published, + retired = _retire( + published, + config, reason="reload", ) @@ -726,18 +759,13 @@ def test_reload_rejects_active_published_replica( "ensure_initialized", lambda self: None, ) - session = _session({"replica_publication": {"mode": "required"}}) - published = session.publish_current_replica( - current_attachment=_attachment(_PublicationBinding()) + published = _publish( + _attachment(_PublicationBinding()), + {"replica_publication": {"mode": "required"}}, ) with pytest.raises(ReplicaPublicationError, match="retiring"): - session.reload( - current_attachment=published, - artifact_locator=ServingArtifactLocator.artifact_ref("mi2:next"), - policy=None, - context=RequestContext(), - ) + _reload(published) @pytest.mark.parametrize("projection_state", [None, "retired"]) @@ -761,12 +789,6 @@ def test_reload_rejects_binding_lease_without_active_projection( lease_id="lease-orphan", replica_id="replica-orphan", ) - session = _session({"replica_publication": {"mode": "required"}}) with pytest.raises(ReplicaPublicationError, match="retire"): - session.reload( - current_attachment=attachment, - artifact_locator=ServingArtifactLocator.artifact_ref("mi2:next"), - policy=None, - context=RequestContext(), - ) + _reload(attachment) diff --git a/tests/python/test_serving_recipe_build_session.py b/tests/python/artifact_runtime/recipe/test_build_session.py similarity index 92% rename from tests/python/test_serving_recipe_build_session.py rename to tests/python/artifact_runtime/recipe/test_build_session.py index 53ff206b..aa9d7a46 100644 --- a/tests/python/test_serving_recipe_build_session.py +++ b/tests/python/artifact_runtime/recipe/test_build_session.py @@ -5,27 +5,31 @@ import torch from torch import nn -from tensorcast.serving.dto import ServingPlacement -from tensorcast.serving.recipe_build import ( +from tensorcast.artifact_runtime.dto import RuntimePlacement +from tensorcast.artifact_runtime.recipe.build import ( COMPILED_RECIPE_MEMORY_CACHE, DEFAULT_RECIPE_BUILD_MEMORY_CACHE_ENTRIES, TRACE_PLAN_MEMORY_CACHE, RecipeBuildCacheConfig, RecipeBuildMemoryCache, RecipeBuildSession, - ServingBindingPlan, + RuntimeBindingPlan, compute_recipe_cache_key, compute_trace_cache_key, ) -from tensorcast.types import ServingBindingMemberRef, ServingTopologyRef +from tensorcast.types import ( + RuntimeBindingMemberRef, + RuntimeSupportLevel, + RuntimeTopologyRef, +) def _identity(**updates): - topology = ServingTopologyRef( + topology = RuntimeTopologyRef( schema_topology_digest="topology-a", logical_topology_ref="tensorcast://topology/a", ) - member = ServingBindingMemberRef( + member = RuntimeBindingMemberRef( member_id="dp0:pp0:tp0", member_index=0, member_count=1, @@ -49,7 +53,7 @@ def _identity(**updates): "placement": {"tp_rank": 0}, } payload.update(updates) - return ServingBindingPlan(**payload) + return RuntimeBindingPlan(**payload) def test_recipe_build_session_keys_track_framework_and_placement(): @@ -132,8 +136,8 @@ def test_recipe_build_session_owns_cache_io(monkeypatch): session = RecipeBuildSession(_identity()) calls = [] - import tensorcast.serving.builder.recipe_cache as recipe_cache - import tensorcast.serving.builder.trace_cache as trace_cache + import tensorcast.artifact_runtime.recipe.cache as recipe_cache + import tensorcast.artifact_runtime.recipe.trace_cache as trace_cache monkeypatch.setattr( trace_cache, @@ -285,7 +289,7 @@ def test_default_recipe_build_memory_caches_are_bounded(): def test_recipe_build_session_owns_compile_identity_and_cached_rebind(): session = RecipeBuildSession(_identity(tp_rank=2, tp_world_size=4)) - serving_facts = type( + runtime_facts = type( "Facts", (), { @@ -296,7 +300,7 @@ def test_recipe_build_session_owns_compile_identity_and_cached_rebind(): }, )() - identity = session.compile_identity(serving_facts=serving_facts) + identity = session.compile_identity(runtime_facts=runtime_facts) assert identity.model_id == "model" assert identity.tp_rank == 2 @@ -314,27 +318,27 @@ class _Catalog: canonical_index_hash: str # Use a dataclass recipe so rebind_cached_recipe_template can preserve type - # through dataclasses.replace, matching the real CompiledServingRecipe. + # through dataclasses.replace, matching the real CompiledRuntimeRecipe. import tensorcast as tc - from tensorcast.serving.builder.compiler import ( - CompiledServingRecipe, + from tensorcast.artifact_runtime.recipe.compiler import ( + CompiledRuntimeRecipe, + TensorcastRuntimeFacts, TensorcastSemanticValidationSpec, - TensorcastServingFacts, TensorSchemaEntry, realization_plan_digest, target_tensor_schema_hash, ) - from tensorcast.serving.builder.trace_ir import TracePlan + from tensorcast.artifact_runtime.recipe.trace_ir import TracePlan - real_recipe = CompiledServingRecipe( + real_recipe = CompiledRuntimeRecipe( compile_key="old", source_artifact_ref="old", source_metadata_fingerprint="old", - serving_facts=TensorcastServingFacts( + runtime_facts=TensorcastRuntimeFacts( framework_name="vllm", adapter_version="adapter-v1", serving_abi_version="abi-v1", - support_level=tc.ServingSupportLevel.BUILDER_PUBLICATION_READY, + support_level=RuntimeSupportLevel.BUILDER_PUBLICATION_READY, runtime_only_tensor_names=(), process_after_load_class=tc.FinalizeClass.RUNTIME_ONLY, post_bind_finalize_class=tc.FinalizeClass.RUNTIME_ONLY, @@ -358,7 +362,7 @@ class _Catalog: compile_payload = identity.compile_payload( source_artifact_ref="mi2:test:source", source_metadata_fingerprint="meta-b", - serving_facts=real_recipe.serving_facts, + runtime_facts=real_recipe.runtime_facts, tensor_schema=( TensorSchemaEntry( name="w", @@ -411,7 +415,7 @@ class _Catalog: assert rebound.binding_plan.realization_plan_digest == realization_plan_digest( rebound.realization_plan_proto ) - assert rebound.binding_plan.serving_facts is rebound.serving_facts + assert rebound.binding_plan.runtime_facts is rebound.runtime_facts assert rebound.binding_plan.trace_plan is rebound.trace_plan assert rebound.binding_plan.tensor_schema == rebound.tensor_schema assert rebound.binding_plan.source_hull == rebound.source_hull @@ -433,14 +437,14 @@ class _Catalog: resolved_compile_payload = rebound.binding_plan.compile_payload( source_artifact_ref=rebound.source_artifact_ref, source_metadata_fingerprint=rebound.source_metadata_fingerprint, - serving_facts=rebound.serving_facts, + runtime_facts=rebound.runtime_facts, tensor_schema=rebound.tensor_schema, semantic_validation_spec=rebound.semantic_validation_spec, ) identity_compile_payload = identity.compile_payload( source_artifact_ref=rebound.source_artifact_ref, source_metadata_fingerprint=rebound.source_metadata_fingerprint, - serving_facts=rebound.serving_facts, + runtime_facts=rebound.runtime_facts, tensor_schema=rebound.tensor_schema, semantic_validation_spec=rebound.semantic_validation_spec, ) @@ -456,7 +460,7 @@ class _Catalog: def test_recipe_build_session_owns_recipe_metadata_collection(): import tensorcast as tc - from tensorcast.serving.builder.compiler import ( + from tensorcast.artifact_runtime.recipe.compiler import ( TensorcastSemanticValidationSpec, ) @@ -482,7 +486,7 @@ def serving_abi_version(self, model_config): def support_level(self, model, model_config): assert model_config == "model-config" - return tc.ServingSupportLevel.BUILDER_PUBLICATION_READY + return RuntimeSupportLevel.BUILDER_PUBLICATION_READY def runtime_only_tensor_names(self, model): return ("runtime_only",) @@ -500,14 +504,14 @@ def semantic_probes(self, model, model_config): model = _Model() adapter = _Adapter() - facts = session.collect_serving_facts(model, "model-config", adapter) + facts = session.collect_runtime_facts(model, "model-config", adapter) assert facts.framework_name == "fakefw" assert facts.runtime_only_tensor_names == ("runtime_only",) schema = session.collect_tensor_schema( model, runtime_only_tensor_names=facts.runtime_only_tensor_names, - is_reserved_serving_tensor_name=lambda name: name.startswith( + is_reserved_runtime_tensor_name=lambda name: name.startswith( "__tensorcast_meta__." ), ) @@ -536,8 +540,8 @@ def semantic_probes(self, model, model_config): def test_recipe_build_session_build_recipe_runs_core_orchestration(): import tensorcast as tc - from tensorcast.serving.builder.trace_ir import CopyPlanEntry, TracePlan - from tensorcast.serving.source_catalog import ( + from tensorcast.artifact_runtime.recipe.trace_ir import CopyPlanEntry, TracePlan + from tensorcast.artifact_runtime.source import ( SourceCatalog, SourceTensorMeta, compute_source_metadata_fingerprint, @@ -567,7 +571,7 @@ def serving_abi_version(self, model_config): def support_level(self, model, model_config): assert model_config.model == "fake-model" - return tc.ServingSupportLevel.BUILDER_PUBLICATION_READY + return RuntimeSupportLevel.BUILDER_PUBLICATION_READY def runtime_only_tensor_names(self, model): return ("runtime_only",) @@ -641,7 +645,7 @@ def cleanup_after_recipe_build( trace_tp_slices=True, trace_cache_schema_version=7, ), - is_reserved_serving_tensor_name=lambda name: name.startswith( + is_reserved_runtime_tensor_name=lambda name: name.startswith( "__tensorcast_meta__." ), trace_capture_fn=lambda *_args: trace_plan, @@ -672,7 +676,7 @@ def test_recipe_build_session_owns_cached_recipe_context_match(): { "source_metadata_fingerprint": "meta-a", "topology_ref": None, - "member_ref": ServingBindingMemberRef( + "member_ref": RuntimeBindingMemberRef( member_id="member-1", member_index=1, member_count=2, @@ -683,7 +687,7 @@ def test_recipe_build_session_owns_cached_recipe_context_match(): "Placement", (), { - "member": ServingBindingMemberRef( + "member": RuntimeBindingMemberRef( member_id="member-1", member_index=1, member_count=2, @@ -714,7 +718,7 @@ def test_recipe_build_session_owns_cached_recipe_context_match(): "Placement", (), { - "member": ServingBindingMemberRef( + "member": RuntimeBindingMemberRef( member_id="member-0", member_index=0, member_count=2, @@ -730,19 +734,19 @@ def test_recipe_cache_match_uses_serving_member_identity(): recipe = SimpleNamespace( source_metadata_fingerprint="meta-a", topology_ref=None, - member_ref=ServingBindingMemberRef( + member_ref=RuntimeBindingMemberRef( member_id="dp0:pp0:tp1", member_index=9, member_count=16, group_id="group-1", ), ) - placement = ServingPlacement( - topology=ServingTopologyRef( + placement = RuntimePlacement( + topology=RuntimeTopologyRef( schema_topology_digest="topology-digest", logical_topology_ref="tensorcast://placement/topology", ), - member=ServingBindingMemberRef( + member=RuntimeBindingMemberRef( member_id="dp0:pp0:tp1", member_index=9, member_count=16, @@ -766,9 +770,9 @@ def test_recipe_cache_match_uses_serving_member_identity(): assert not session.cached_recipe_matches_context( recipe, source_catalog=source_catalog, - placement=ServingPlacement( + placement=RuntimePlacement( topology=placement.topology, - member=ServingBindingMemberRef( + member=RuntimeBindingMemberRef( member_id="dp0:pp0:tp2", member_index=10, member_count=16, diff --git a/tests/python/test_serving_builder_recipe_cache.py b/tests/python/artifact_runtime/recipe/test_cache.py similarity index 86% rename from tests/python/test_serving_builder_recipe_cache.py rename to tests/python/artifact_runtime/recipe/test_cache.py index 8fb15d6c..ad7db2dc 100644 --- a/tests/python/test_serving_builder_recipe_cache.py +++ b/tests/python/artifact_runtime/recipe/test_cache.py @@ -6,28 +6,28 @@ from tensorcast.api.store import BindingRealizationEntry from tensorcast.api.store import Range as StoreRange -from tensorcast.serving.builder.compiler import ( - CompiledServingRecipe, - SourceHullEntry, - TensorcastSemanticValidationSpec, - TensorcastServingFacts, - TensorSchemaEntry, -) -from tensorcast.serving.builder.recipe_cache import ( +from tensorcast.artifact_runtime.recipe.cache import ( RECIPE_CACHE_PAYLOAD_VERSION, load_compiled_recipe_cache, write_compiled_recipe_cache, ) -from tensorcast.serving.builder.trace_ir import CopyPlanEntry, Range, TracePlan +from tensorcast.artifact_runtime.recipe.compiler import ( + CompiledRuntimeRecipe, + SourceHullEntry, + TensorcastRuntimeFacts, + TensorcastSemanticValidationSpec, + TensorSchemaEntry, +) +from tensorcast.artifact_runtime.recipe.trace_ir import CopyPlanEntry, Range, TracePlan from tensorcast.types import ( FinalizeClass, - ServingBindingMemberRef, - ServingSupportLevel, - ServingTopologyRef, + RuntimeBindingMemberRef, + RuntimeSupportLevel, + RuntimeTopologyRef, ) -def _recipe() -> CompiledServingRecipe: +def _recipe() -> CompiledRuntimeRecipe: trace_plan = TracePlan( copy_plan=[ CopyPlanEntry( @@ -43,16 +43,16 @@ def _recipe() -> CompiledServingRecipe: tensorcast_slices={"x": Range(dim=0, start=0, end=4)}, src_hull={"x": Range(dim=0, start=0, end=4)}, ) - return CompiledServingRecipe( + return CompiledRuntimeRecipe( compile_key="compile-key", source_artifact_ref="msa1:test-source", source_metadata_fingerprint="metadata-fingerprint", - serving_facts=TensorcastServingFacts( + runtime_facts=TensorcastRuntimeFacts( framework_name="vllm", framework_version="vllm-test", adapter_version="adapter-v1", serving_abi_version="abi-v1", - support_level=ServingSupportLevel.RUNTIME_BIND_SWAP_READY, + support_level=RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY, runtime_only_tensor_names=("runtime",), process_after_load_class=FinalizeClass.RUNTIME_ONLY, post_bind_finalize_class=FinalizeClass.RUNTIME_ONLY, @@ -77,11 +77,11 @@ def _recipe() -> CompiledServingRecipe: ), ), realization_fallback_plan=(), - topology_ref=ServingTopologyRef( + topology_ref=RuntimeTopologyRef( schema_topology_digest="topology-digest", logical_topology_ref="tensorcast://topology/topology-digest", ), - member_ref=ServingBindingMemberRef( + member_ref=RuntimeBindingMemberRef( member_id="dp0:pp0:tp0", member_index=0, member_count=2, @@ -119,7 +119,7 @@ def test_compiled_recipe_cache_round_trips(tmp_path: Path) -> None: assert payload["compiled_recipe"]["trace_plan_summary"]["expected_dst_names"] == [ "w" ] - assert payload["compiled_recipe"]["serving_facts"]["framework_version"] == ( + assert payload["compiled_recipe"]["runtime_facts"]["framework_version"] == ( "vllm-test" ) assert payload["compiled_recipe"]["topology_ref"]["schema_topology_digest"] == ( diff --git a/tests/python/test_serving_builder_compiler.py b/tests/python/artifact_runtime/recipe/test_compiler.py similarity index 79% rename from tests/python/test_serving_builder_compiler.py rename to tests/python/artifact_runtime/recipe/test_compiler.py index 77dd8da9..eecc1b45 100644 --- a/tests/python/test_serving_builder_compiler.py +++ b/tests/python/artifact_runtime/recipe/test_compiler.py @@ -7,23 +7,23 @@ import pytest import torch -from tensorcast.serving.builder.compiler import ( +from tensorcast.artifact_runtime.recipe.compiler import ( RecipeCompileInputs, - ServingBindingPlan, + RuntimeBindingPlan, + TensorcastRuntimeFacts, TensorcastSemanticValidationSpec, - TensorcastServingFacts, TensorSchemaEntry, - compile_serving_recipe, + compile_runtime_recipe, realization_plan_digest, target_tensor_schema_hash, ) -from tensorcast.serving.builder.trace_ir import CopyPlanEntry, Range, TracePlan -from tensorcast.serving.source_catalog import SourceCatalog, SourceTensorMeta +from tensorcast.artifact_runtime.recipe.trace_ir import CopyPlanEntry, Range, TracePlan +from tensorcast.artifact_runtime.source import SourceCatalog, SourceTensorMeta from tensorcast.types import ( FinalizeClass, - ServingBindingMemberRef, - ServingSupportLevel, - ServingTopologyRef, + RuntimeBindingMemberRef, + RuntimeSupportLevel, + RuntimeTopologyRef, ) @@ -64,28 +64,28 @@ def _trace_plan() -> TracePlan: ) -def _serving_facts(adapter_version: str = "adapter-v1") -> TensorcastServingFacts: - return TensorcastServingFacts( +def _runtime_facts(adapter_version: str = "adapter-v1") -> TensorcastRuntimeFacts: + return TensorcastRuntimeFacts( framework_name="vllm", framework_version="vllm-test", adapter_version=adapter_version, serving_abi_version="abi-v1", - support_level=ServingSupportLevel.BUILDER_PUBLICATION_READY, + support_level=RuntimeSupportLevel.BUILDER_PUBLICATION_READY, runtime_only_tensor_names=("runtime_only",), process_after_load_class=FinalizeClass.RUNTIME_ONLY, post_bind_finalize_class=FinalizeClass.RUNTIME_ONLY, ) -def _topology_ref(digest: str = "topology-digest") -> ServingTopologyRef: - return ServingTopologyRef( +def _topology_ref(digest: str = "topology-digest") -> RuntimeTopologyRef: + return RuntimeTopologyRef( schema_topology_digest=digest, logical_topology_ref=f"tensorcast://topology/{digest}", ) -def _member_ref(index: int = 0, count: int = 1) -> ServingBindingMemberRef: - return ServingBindingMemberRef( +def _member_ref(index: int = 0, count: int = 1) -> RuntimeBindingMemberRef: + return RuntimeBindingMemberRef( member_id=f"dp0:pp0:tp{index}", member_index=index, member_count=count, @@ -93,8 +93,8 @@ def _member_ref(index: int = 0, count: int = 1) -> ServingBindingMemberRef: ) -def _identity() -> ServingBindingPlan: - return ServingBindingPlan( +def _identity() -> RuntimeBindingPlan: + return RuntimeBindingPlan( model_id="fake-model", model_revision=None, dtype="torch.float16", @@ -113,7 +113,7 @@ def _inputs(**overrides) -> RecipeCompileInputs: values = { "source_catalog": _source_catalog(), "trace_plan": _trace_plan(), - "serving_facts": _serving_facts(), + "runtime_facts": _runtime_facts(), "tensor_schema": ( TensorSchemaEntry( name="w", @@ -142,10 +142,10 @@ def event(self, name: str, payload) -> None: self.events.append((name, dict(payload))) -def test_compile_serving_recipe_assembles_recipe_from_pure_inputs() -> None: +def test_compile_runtime_recipe_assembles_recipe_from_pure_inputs() -> None: observer = _Observer() - recipe = compile_serving_recipe( + recipe = compile_runtime_recipe( identity=_identity(), inputs=_inputs(), observer=observer, @@ -154,7 +154,7 @@ def test_compile_serving_recipe_assembles_recipe_from_pure_inputs() -> None: assert recipe.compile_key assert recipe.source_artifact_ref == "mi2:test:source" assert recipe.source_metadata_fingerprint == "metadata-fingerprint" - assert recipe.serving_facts.framework_name == "vllm" + assert recipe.runtime_facts.framework_name == "vllm" assert recipe.trace_plan.expected_src_names == {"x"} assert [entry.name for entry in recipe.tensor_schema] == ["w"] assert [entry.name for entry in recipe.source_hull] == ["x"] @@ -173,7 +173,7 @@ def test_compile_serving_recipe_assembles_recipe_from_pure_inputs() -> None: assert recipe.binding_plan.realization_plan_digest == realization_plan_digest( recipe.realization_plan_proto ) - assert recipe.binding_plan.serving_facts is recipe.serving_facts + assert recipe.binding_plan.runtime_facts is recipe.runtime_facts assert recipe.binding_plan.trace_plan is recipe.trace_plan assert recipe.binding_plan.tensor_schema == recipe.tensor_schema assert recipe.binding_plan.source_hull == recipe.source_hull @@ -210,13 +210,13 @@ def test_compile_serving_recipe_assembles_recipe_from_pure_inputs() -> None: ] -def test_compile_serving_recipe_compile_key_invalidates_on_pure_inputs() -> None: - recipe_a = compile_serving_recipe(identity=_identity(), inputs=_inputs()) - recipe_b = compile_serving_recipe( +def test_compile_runtime_recipe_compile_key_invalidates_on_pure_inputs() -> None: + recipe_a = compile_runtime_recipe(identity=_identity(), inputs=_inputs()) + recipe_b = compile_runtime_recipe( identity=replace(_identity(), adapter_version="adapter-v2"), - inputs=_inputs(serving_facts=_serving_facts("adapter-v2")), + inputs=_inputs(runtime_facts=_runtime_facts("adapter-v2")), ) - recipe_c = compile_serving_recipe( + recipe_c = compile_runtime_recipe( identity=replace( _identity(), topology_ref=_topology_ref("topology-digest-b"), @@ -229,25 +229,25 @@ def test_compile_serving_recipe_compile_key_invalidates_on_pure_inputs() -> None assert recipe_a.compile_key != recipe_c.compile_key -def test_compile_serving_recipe_rejects_identity_fact_mismatch() -> None: - with pytest.raises(ValueError, match="ServingBindingPlan must match"): - compile_serving_recipe( +def test_compile_runtime_recipe_rejects_identity_fact_mismatch() -> None: + with pytest.raises(ValueError, match="RuntimeBindingPlan must match"): + compile_runtime_recipe( identity=replace(_identity(), adapter_version="adapter-v2"), inputs=_inputs(), ) -def test_compile_serving_recipe_rejects_missing_destination_schema() -> None: +def test_compile_runtime_recipe_rejects_missing_destination_schema() -> None: with pytest.raises(ValueError, match="tensor_schema is missing"): - compile_serving_recipe( + compile_runtime_recipe( identity=_identity(), inputs=_inputs(tensor_schema=()), ) -def test_compile_serving_recipe_rejects_synthetic_source_identity() -> None: +def test_compile_runtime_recipe_rejects_synthetic_source_identity() -> None: with pytest.raises(ValueError, match="real imported source artifact"): - compile_serving_recipe( + compile_runtime_recipe( identity=_identity(), inputs=_inputs(source_catalog=_source_catalog("disk:/tmp/fake")), ) diff --git a/tests/python/test_serving_builder_materialization.py b/tests/python/artifact_runtime/recipe/test_materialization.py similarity index 95% rename from tests/python/test_serving_builder_materialization.py rename to tests/python/artifact_runtime/recipe/test_materialization.py index 74cea99d..d2013998 100644 --- a/tests/python/test_serving_builder_materialization.py +++ b/tests/python/artifact_runtime/recipe/test_materialization.py @@ -3,11 +3,11 @@ import pytest import torch -from tensorcast.serving.builder.materialization import ( +from tensorcast.artifact_runtime.recipe.materialization import ( apply_copy_plan, validate_dst_coverage, ) -from tensorcast.serving.builder.trace_ir import ( +from tensorcast.artifact_runtime.recipe.trace_ir import ( CopyPlanEntry, MultiRange, Range, diff --git a/tests/python/test_serving_builder_publication.py b/tests/python/artifact_runtime/recipe/test_publication.py similarity index 93% rename from tests/python/test_serving_builder_publication.py rename to tests/python/artifact_runtime/recipe/test_publication.py index f082ff34..cd8f3852 100644 --- a/tests/python/test_serving_builder_publication.py +++ b/tests/python/artifact_runtime/recipe/test_publication.py @@ -2,10 +2,12 @@ import pytest -from tensorcast.serving.builder.publication import ( +from tensorcast.artifact_runtime.publication.context import ( RecipePublicationContext, - build_binding_finalize_publication_bundle_from_context, build_pure_transform_build_intent, +) +from tensorcast.artifact_runtime.recipe.publication import ( + build_binding_finalize_publication_bundle_from_context, build_pure_transform_publication_spec_from_context, ) from tensorcast.types import BindingValueRef, BuilderMode @@ -46,8 +48,7 @@ def test_publication_context_passes_framework_topology_json() -> None: assert spec.logical_topology_json == '{"topology":"framework-owned"}' -def test_binding_finalize_publication_requires_explicit_admission_facts( -) -> None: +def test_binding_finalize_publication_requires_explicit_admission_facts() -> None: with pytest.raises(ValueError, match="explicit admission_facts"): build_binding_finalize_publication_bundle_from_context( _context(), diff --git a/tests/python/test_serving_builder_tensor_parity.py b/tests/python/artifact_runtime/recipe/test_tensor_parity.py similarity index 98% rename from tests/python/test_serving_builder_tensor_parity.py rename to tests/python/artifact_runtime/recipe/test_tensor_parity.py index 5fa321bb..3ebf50a7 100644 --- a/tests/python/test_serving_builder_tensor_parity.py +++ b/tests/python/artifact_runtime/recipe/test_tensor_parity.py @@ -6,8 +6,7 @@ from tensorcast.api.store import BindingRealizationEntry from tensorcast.api.store import Range as StoreRange -from tensorcast.proto.daemon.v2 import store_daemon_pb2 -from tensorcast.serving.builder.tensor_parity import ( +from tensorcast.artifact_runtime.recipe.tensor_parity import ( build_tensor_parity_probes_from_realization_plan, build_tensor_parity_probes_from_realization_plan_proto, build_tensor_parity_probes_from_recipe, @@ -15,12 +14,13 @@ evaluate_recipe_tensor_parity, evaluate_tensor_parity_probes, ) -from tensorcast.serving.builder.trace_ir import ( +from tensorcast.artifact_runtime.recipe.trace_ir import ( CopyPlanEntry, MultiRange, Range, TracePlan, ) +from tensorcast.proto.daemon.v2 import store_daemon_pb2 def _trace_plan() -> TracePlan: diff --git a/tests/python/test_serving_builder_trace_cache.py b/tests/python/artifact_runtime/recipe/test_trace_cache.py similarity index 90% rename from tests/python/test_serving_builder_trace_cache.py rename to tests/python/artifact_runtime/recipe/test_trace_cache.py index 4e2d034b..44075777 100644 --- a/tests/python/test_serving_builder_trace_cache.py +++ b/tests/python/artifact_runtime/recipe/test_trace_cache.py @@ -4,13 +4,13 @@ import json -from tensorcast.serving.builder.trace_cache import ( +from tensorcast.artifact_runtime.recipe.trace_cache import ( dump_trace_plan_debug, load_trace_plan_cache, trace_plan_debug_payload, write_trace_plan_cache, ) -from tensorcast.serving.builder.trace_ir import CopyPlanEntry, Range, TracePlan +from tensorcast.artifact_runtime.recipe.trace_ir import CopyPlanEntry, Range, TracePlan def _trace_plan() -> TracePlan: @@ -42,7 +42,7 @@ def test_trace_plan_cache_round_trips_versioned_payload(tmp_path) -> None: def test_trace_plan_cache_rejects_unversioned_raw_payload(tmp_path) -> None: - from tensorcast.serving.builder.trace_ir import trace_plan_to_dict + from tensorcast.artifact_runtime.recipe.trace_ir import trace_plan_to_dict path = tmp_path / "unversioned.json" trace_plan = _trace_plan() diff --git a/tests/python/test_serving_builder_trace_ir.py b/tests/python/artifact_runtime/recipe/test_trace_ir.py similarity index 95% rename from tests/python/test_serving_builder_trace_ir.py rename to tests/python/artifact_runtime/recipe/test_trace_ir.py index 5f03898c..2d8a474a 100644 --- a/tests/python/test_serving_builder_trace_ir.py +++ b/tests/python/artifact_runtime/recipe/test_trace_ir.py @@ -1,6 +1,6 @@ # Copyright (c) 2026, TensorCast Team. -from tensorcast.serving.builder.trace_ir import ( +from tensorcast.artifact_runtime.recipe.trace_ir import ( CopyPlanEntry, MultiRange, Range, diff --git a/tests/python/test_serving_builder_validation.py b/tests/python/artifact_runtime/recipe/test_validation.py similarity index 70% rename from tests/python/test_serving_builder_validation.py rename to tests/python/artifact_runtime/recipe/test_validation.py index 2eed9055..a7c64ddf 100644 --- a/tests/python/test_serving_builder_validation.py +++ b/tests/python/artifact_runtime/recipe/test_validation.py @@ -7,35 +7,35 @@ import pytest import torch -from tensorcast.serving.builder.compiler import ( - CompiledServingRecipe, +from tensorcast.artifact_runtime.recipe.compiler import ( + CompiledRuntimeRecipe, + TensorcastRuntimeFacts, TensorcastSemanticValidationSpec, - TensorcastServingFacts, TensorSchemaEntry, ) -from tensorcast.serving.builder.recipe_validation import ( - validate_recipe_for_builder_mode, -) -from tensorcast.serving.builder.semantic_validation import ( +from tensorcast.artifact_runtime.recipe.semantic_validation import ( evaluate_semantic_validation_spec, ) -from tensorcast.serving.builder.tensor_schema import ( +from tensorcast.artifact_runtime.recipe.tensor_schema import ( validate_tensor_schema_against_tensors, ) -from tensorcast.serving.builder.trace_ir import TracePlan -from tensorcast.types import BuilderMode, FinalizeClass, ServingSupportLevel +from tensorcast.artifact_runtime.recipe.trace_ir import TracePlan +from tensorcast.artifact_runtime.recipe.validation import ( + validate_recipe_for_builder_mode, +) +from tensorcast.types import BuilderMode, FinalizeClass, RuntimeSupportLevel -def _recipe() -> CompiledServingRecipe: - return CompiledServingRecipe( +def _recipe() -> CompiledRuntimeRecipe: + return CompiledRuntimeRecipe( compile_key="compile-key", source_artifact_ref="mi2:test:source", source_metadata_fingerprint="metadata-fingerprint", - serving_facts=TensorcastServingFacts( + runtime_facts=TensorcastRuntimeFacts( framework_name="vllm", adapter_version="adapter-v1", serving_abi_version="abi-v1", - support_level=ServingSupportLevel.BUILDER_PUBLICATION_READY, + support_level=RuntimeSupportLevel.BUILDER_PUBLICATION_READY, runtime_only_tensor_names=("runtime_only",), process_after_load_class=FinalizeClass.RUNTIME_ONLY, post_bind_finalize_class=FinalizeClass.RUNTIME_ONLY, @@ -79,9 +79,9 @@ def test_validate_recipe_for_builder_mode_rejects_binding_finalize_fact_mismatch def test_validate_recipe_for_builder_mode_rejects_non_publication_ready() -> None: recipe = replace( _recipe(), - serving_facts=replace( - _recipe().serving_facts, - support_level=ServingSupportLevel.SOURCE_BIND_BOOTSTRAP_ONLY, + runtime_facts=replace( + _recipe().runtime_facts, + support_level=RuntimeSupportLevel.SOURCE_BIND_BOOTSTRAP_ONLY, ), ) @@ -116,6 +116,28 @@ def test_evaluate_semantic_validation_spec_rejects_explicit_mismatch() -> None: evaluate_semantic_validation_spec(spec, {"values": [3, 2, 1]}) +def test_evaluate_semantic_validation_spec_compares_framework_probes() -> None: + spec = TensorcastSemanticValidationSpec( + kind="framework_semantic_probes", + payload={"values": [1, 2, 3]}, + ) + + assert evaluate_semantic_validation_spec( + spec, + _ProbePayload(values=(1, 2, 3)), + ) == {"values": [1, 2, 3]} + + +def test_evaluate_semantic_validation_spec_rejects_framework_probe_mismatch() -> None: + spec = TensorcastSemanticValidationSpec( + kind="framework_semantic_probes", + payload={"values": [1, 2, 3]}, + ) + + with pytest.raises(RuntimeError, match="semantic validation failed"): + evaluate_semantic_validation_spec(spec, {"values": [1, 2]}) + + def test_validate_tensor_schema_against_tensors_checks_names_shape_stride_dtype() -> ( None ): diff --git a/tests/python/test_serving_config.py b/tests/python/artifact_runtime/test_config.py similarity index 74% rename from tests/python/test_serving_config.py rename to tests/python/artifact_runtime/test_config.py index 156455d9..7fed30a9 100644 --- a/tests/python/test_serving_config.py +++ b/tests/python/artifact_runtime/test_config.py @@ -2,50 +2,48 @@ from __future__ import annotations +import importlib.util + import pytest -from tensorcast.serving import ( - ArtifactBindStartPlan, +import tensorcast.artifact_runtime.dto as serving_dto +from tensorcast.artifact_runtime.config import ( RetainedBindingAcquireSettings, - RetainedBindingAcquireStartPlan, - ServingArtifactLocator, - ServingConfig, - ServingPolicy, - ServingStartPlanError, - SourceBootstrapToBindingStartPlan, - merge_serving_reload_extra_config, - plan_serving_start, + RuntimeArtifactBindStartPlan, + RuntimeRetainedRealizationStartPlan, + RuntimeSourceBootstrapStartPlan, + RuntimeStartPlanError, + TensorCastRuntimeConfig, + plan_runtime_start, +) +from tensorcast.artifact_runtime.locator import ( + ArtifactLocator, ranked_version_key_for_member, ) -from tensorcast.serving import dto as serving_dto -from tensorcast.serving.retained_binding import parse_retained_serving_binding_authority -from tensorcast.types import ServingBindingMemberRef, ServingTopologyRef +from tensorcast.artifact_runtime.policy import ( + RuntimePolicy, + merge_runtime_reload_extra_config, +) +from tensorcast.retained_realization import parse_retained_realization_authority +from tensorcast.types import RuntimeBindingMemberRef, RuntimeTopologyRef FrameworkIntegrationContext = serving_dto.FrameworkIntegrationContext -PreparedServingArtifact = serving_dto.PreparedServingArtifact +PreparedRuntimeArtifact = serving_dto.PreparedRuntimeArtifact RuntimeTensorView = serving_dto.RuntimeTensorView -ServingBindingValue = serving_dto.ServingBindingValue -ServingPlacement = serving_dto.ServingPlacement - - -def test_serving_root_does_not_reexport_lifecycle_dtos() -> None: - import tensorcast.serving as serving - - for name in ( - "BootstrapEndpointProjection", - "BootstrapSummary", - "BindingValueRefProjection", - "FrameworkIntegrationContext", - "MaterializationDiagnosticsProjection", - "PreparedServingArtifact", - "ReloadRequestProjection", - "RuntimeTensorView", - "ServingBindingValue", - "ServingPlacement", - "SourceBoundContractProjection", - ): - with pytest.raises(AttributeError): - getattr(serving, name) +RuntimeBindingValue = serving_dto.RuntimeBindingValue +RuntimePlacement = serving_dto.RuntimePlacement + + +def _find_spec_or_none(module_name: str): + try: + return importlib.util.find_spec(module_name) + except ModuleNotFoundError: + return None + + +def test_serving_public_package_is_removed() -> None: + assert _find_spec_or_none("tensorcast.serving") is None + assert _find_spec_or_none("tensorcast.serving.runtime") is None def _retained_binding_acquire_config() -> dict: @@ -81,13 +79,13 @@ def _retained_binding_acquire_config() -> dict: "reservation_bytes": 4096, "scope_digest": "scope-1", }, - "readiness": "serving_published_ready", + "readiness": "runtime_published_ready", "serving_artifact_id": "mi2:test:serving", "trusted_reservation_bytes": 4096, "expected": { "target_layout_hash": "layout-hash", "tensor_schema_hash": "schema-hash", - "serving_build_digest": "build-digest", + "runtime_build_digest": "build-digest", "resolved_spec_digest": "spec-digest", }, }, @@ -95,8 +93,8 @@ def _retained_binding_acquire_config() -> dict: } -def test_serving_config_parses_nested_schema_defaults() -> None: - config = ServingConfig.from_mapping( +def test_runtime_config_parses_nested_schema_defaults() -> None: + config = TensorCastRuntimeConfig.from_mapping( { "runtime": { "mode": "CONNECT", @@ -107,7 +105,7 @@ def test_serving_config_parses_nested_schema_defaults() -> None: "address": "127.0.0.1:50051", }, }, - "serving": { + "runtime_artifact": { "artifact_locator": { "kind": "version_key", "value": " models/demo/serving/v1 ", @@ -126,11 +124,11 @@ def test_serving_config_parses_nested_schema_defaults() -> None: assert config.runtime.mode == "connect" assert config.runtime.daemon.show_logs is True assert config.runtime.global_store.resolved_mode("connect") == "connect" - assert config.serving.artifact_locator == ServingArtifactLocator( + assert config.runtime_artifact.artifact_locator == ArtifactLocator( kind="version_key", value="models/demo/serving/v1", ) - assert config.to_mapping()["serving"]["artifact_locator"] == { + assert config.to_mapping()["runtime_artifact"]["artifact_locator"] == { "kind": "version_key", "value": "models/demo/serving/v1", "schema_version": 1, @@ -140,11 +138,25 @@ def test_serving_config_parses_nested_schema_defaults() -> None: assert config.materialization.collective_policy_value() == "require_collective" -def test_serving_config_rejects_selector_alias() -> None: - with pytest.raises(ValueError, match="artifact_locator"): - ServingConfig.from_mapping( +def test_runtime_config_rejects_removed_serving_section() -> None: + with pytest.raises(ValueError, match="serving.*removed"): + TensorCastRuntimeConfig.from_mapping( { "serving": { + "artifact_locator": { + "kind": "artifact_ref", + "value": "mi2:test:serving", + }, + }, + } + ) + + +def test_runtime_config_rejects_runtime_artifact_selector_alias() -> None: + with pytest.raises(ValueError, match="runtime_artifact.artifact_locator"): + TensorCastRuntimeConfig.from_mapping( + { + "runtime_artifact": { "selector": { "kind": "artifact_ref", "value": "mi2:test:serving", @@ -154,8 +166,8 @@ def test_serving_config_rejects_selector_alias() -> None: ) -def test_serving_config_emits_retained_binding_acquire_canonical_field() -> None: - config = ServingConfig.from_mapping(_retained_binding_acquire_config()) +def test_runtime_config_emits_retained_binding_acquire_canonical_field() -> None: + config = TensorCastRuntimeConfig.from_mapping(_retained_binding_acquire_config()) assert isinstance(config.retained_binding_acquire, RetainedBindingAcquireSettings) mapping = config.to_mapping() @@ -163,9 +175,9 @@ def test_serving_config_emits_retained_binding_acquire_canonical_field() -> None assert mapping["retained_binding_acquire"]["mode"] == "external" -def test_serving_config_rejects_preload_key() -> None: - with pytest.raises(ValueError, match="Unexpected TensorCast serving config"): - ServingConfig.from_mapping( +def test_runtime_config_rejects_preload_key() -> None: + with pytest.raises(ValueError, match="Unexpected TensorCast runtime config"): + TensorCastRuntimeConfig.from_mapping( { "preload": { "mode": "external", @@ -175,13 +187,13 @@ def test_serving_config_rejects_preload_key() -> None: def test_ranked_version_key_locator_scopes_by_serving_member(monkeypatch) -> None: - member = ServingBindingMemberRef( + member = RuntimeBindingMemberRef( member_id="dp0:pp0:tp1", member_index=1, member_count=2, group_id="group-1", ) - locator = ServingArtifactLocator.ranked_version_key("models/demo/serving/v1/") + locator = ArtifactLocator.ranked_version_key("models/demo/serving/v1/") assert ( ranked_version_key_for_member( @@ -193,8 +205,8 @@ def test_ranked_version_key_locator_scopes_by_serving_member(monkeypatch) -> Non assert locator.resolve_version_key(member=member) == ( "models/demo/serving/v1/members/dp0:pp0:tp1" ) - placement = ServingPlacement( - topology=ServingTopologyRef( + placement = RuntimePlacement( + topology=RuntimeTopologyRef( schema_topology_digest="topology-digest", logical_topology_ref="fake://topology", ), @@ -218,18 +230,18 @@ def resolve_key_mapping_cached(self, *, key): assert locator.resolve_artifact_ref(member=member) == "mi2:test:serving-rank-1" -def test_serving_artifact_locator_is_canonical_name() -> None: - locator = ServingArtifactLocator.ranked_version_key("models/demo/serving/v1") +def test_artifact_locator_is_runtime_canonical_name() -> None: + locator = ArtifactLocator.ranked_version_key("models/demo/serving/v1") - assert isinstance(locator, ServingArtifactLocator) + assert isinstance(locator, ArtifactLocator) assert locator.kind == "ranked_version_key" assert locator.value == "models/demo/serving/v1" -def test_plan_serving_start_classifies_three_canonical_variants() -> None: - artifact_config = ServingConfig.from_mapping( +def test_plan_runtime_start_classifies_three_canonical_variants() -> None: + artifact_config = TensorCastRuntimeConfig.from_mapping( { - "serving": { + "runtime_artifact": { "artifact_locator": { "kind": "artifact_ref", "value": "mi2:test:serving", @@ -237,57 +249,57 @@ def test_plan_serving_start_classifies_three_canonical_variants() -> None: }, } ) - artifact_plan = plan_serving_start( + artifact_plan = plan_runtime_start( config=artifact_config, source_selector=object(), ) - assert isinstance(artifact_plan, ArtifactBindStartPlan) + assert isinstance(artifact_plan, RuntimeArtifactBindStartPlan) assert artifact_plan.kind == "artifact_bind" assert artifact_plan.artifact_locator.value == "mi2:test:serving" source_selector = object() - source_plan = plan_serving_start( - config=ServingConfig.from_mapping({}), + source_plan = plan_runtime_start( + config=TensorCastRuntimeConfig.from_mapping({}), source_selector=source_selector, ) - assert isinstance(source_plan, SourceBootstrapToBindingStartPlan) + assert isinstance(source_plan, RuntimeSourceBootstrapStartPlan) assert source_plan.kind == "source_bootstrap_to_binding" assert source_plan.source_selector is source_selector - retained_plan = plan_serving_start( - config=ServingConfig.from_mapping(_retained_binding_acquire_config()), + retained_plan = plan_runtime_start( + config=TensorCastRuntimeConfig.from_mapping(_retained_binding_acquire_config()), source_selector=source_selector, ) - assert isinstance(retained_plan, RetainedBindingAcquireStartPlan) + assert isinstance(retained_plan, RuntimeRetainedRealizationStartPlan) assert retained_plan.kind == "retained_binding_acquire" assert retained_plan.authority.binding_value_ref.binding_id == "binding-1" -def test_plan_serving_start_reports_no_selected_candidate() -> None: - with pytest.raises(ServingStartPlanError, match="rejected candidates"): - plan_serving_start( - config=ServingConfig.from_mapping({}), +def test_plan_runtime_start_reports_no_selected_candidate() -> None: + with pytest.raises(RuntimeStartPlanError, match="rejected candidates"): + plan_runtime_start( + config=TensorCastRuntimeConfig.from_mapping({}), source_selector=None, ) def test_ranked_version_key_locator_requires_member() -> None: - locator = ServingArtifactLocator.ranked_version_key("models/demo/serving/v1") + locator = ArtifactLocator.ranked_version_key("models/demo/serving/v1") - with pytest.raises(ValueError, match="requires a serving member"): + with pytest.raises(ValueError, match="requires a member"): locator.resolve_version_key() -def test_serving_config_rejects_unknown_top_level_keys() -> None: - with pytest.raises(ValueError, match="Unexpected TensorCast serving config"): - ServingConfig.from_mapping({"unrelated": "unexpected"}) +def test_runtime_config_rejects_unknown_top_level_keys() -> None: + with pytest.raises(ValueError, match="Unexpected TensorCast runtime config"): + TensorCastRuntimeConfig.from_mapping({"unrelated": "unexpected"}) -def test_serving_policy_pinned_requires_identity_fields() -> None: +def test_runtime_policy_pinned_requires_identity_fields() -> None: with pytest.raises(ValueError, match="manifest_ref"): - ServingPolicy(mode="pinned") + RuntimePolicy(mode="pinned") - policy = ServingPolicy( + policy = RuntimePolicy( mode="pinned", manifest_ref="tensor:__tensorcast_meta__.manifest_json", representation_contract_hash="repr-hash", @@ -297,19 +309,19 @@ def test_serving_policy_pinned_requires_identity_fields() -> None: assert policy.manifest_ref == "tensor:__tensorcast_meta__.manifest_json" -def test_merge_serving_reload_extra_config_normalizes_wire_shape() -> None: +def test_merge_runtime_reload_extra_config_normalizes_wire_shape() -> None: extra = { "runtime": { "mode": "connect", }, - "serving": { + "runtime_artifact": { "policy": { "mode": "from_manifest", }, }, } - merged = merge_serving_reload_extra_config( + merged = merge_runtime_reload_extra_config( extra, artifact_locator={ "kind": "artifact_ref", @@ -324,21 +336,21 @@ def test_merge_serving_reload_extra_config_normalizes_wire_shape() -> None: ) assert merged["runtime"] == {"mode": "connect"} - assert merged["serving"]["artifact_locator"] == { + assert merged["runtime_artifact"]["artifact_locator"] == { "kind": "artifact_ref", "value": "mi2:test:serving", } - assert merged["serving"]["policy"] == { + assert merged["runtime_artifact"]["policy"] == { "mode": "pinned", "manifest_ref": "tensor:manifest", "representation_contract_hash": "repr-hash", "serving_build_digest": "build-digest", } - assert extra["serving"]["policy"] == {"mode": "from_manifest"} + assert extra["runtime_artifact"]["policy"] == {"mode": "from_manifest"} -def test_serving_config_parses_retained_binding_authority() -> None: - config = ServingConfig.from_mapping( +def test_runtime_config_parses_retained_binding_authority() -> None: + config = TensorCastRuntimeConfig.from_mapping( { "retained_binding_acquire": { "mode": "external", @@ -362,13 +374,13 @@ def test_serving_config_parses_retained_binding_authority() -> None: "reservation_capability": { "capability_id": "capability-1", }, - "readiness": "serving_published_ready", + "readiness": "runtime_published_ready", "serving_artifact_id": "mi2:test:serving", "trusted_reservation_bytes": 4096, "expected": { "target_layout_hash": "layout-hash", "tensor_schema_hash": "schema-hash", - "serving_build_digest": "build-digest", + "runtime_build_digest": "build-digest", "resolved_spec_digest": "spec-digest", }, }, @@ -419,21 +431,21 @@ def test_retained_binding_authority_parses_typed_refs() -> None: "scope_digest": "scope-1", }, "local_serving_ref": "binding-local:binding-1:value-1", - "readiness": "serving_published_ready", + "readiness": "runtime_published_ready", "verification_state": "local_only", "serving_artifact_id": "mi2:test:serving", "trusted_reservation_bytes": 4096, "expected": { "target_layout_hash": "layout-hash", "tensor_schema_hash": "schema-hash", - "serving_build_digest": "build-digest", + "runtime_build_digest": "build-digest", "resolved_spec_digest": "spec-digest", }, }, }, } - authority = parse_retained_serving_binding_authority(config) + authority = parse_retained_realization_authority(config) assert authority.binding_value_ref.binding_id == "binding-1" assert authority.reservation_capability.reservation_bytes == 4096 @@ -448,14 +460,14 @@ def test_prepared_serving_artifact_serializes_without_bootstrap_projection() -> "binding_value_id": "value-1", "seal_generation": 1, } - prepared = PreparedServingArtifact( + prepared = PreparedRuntimeArtifact( source_artifact_ref="disk:/model", serving_artifact_ref=None, serving_manifest_ref="tensor:manifest", representation_contract_hash="repr-hash", serving_build_digest="build-digest", binding_value_ref=binding_value_ref, - readiness="serving_local_ready", + readiness="runtime_local_ready", family="dummy", tensor_schema_hash="schema-hash", binding_layout_id="layout-1", @@ -466,20 +478,20 @@ def test_prepared_serving_artifact_serializes_without_bootstrap_projection() -> assert payload["serving_manifest_ref"] == "tensor:manifest" assert payload["serving_artifact_ref"] is None - assert payload["readiness"] == "serving_local_ready" + assert payload["readiness"] == "runtime_local_ready" assert payload["binding_value_ref"] == binding_value_ref assert "bootstrap_summary" not in payload binding_value = prepared.to_binding_value() - assert isinstance(binding_value, ServingBindingValue) + assert isinstance(binding_value, RuntimeBindingValue) assert binding_value.source_artifact_ref == "disk:/model" - assert binding_value.readiness == "serving_local_ready" + assert binding_value.readiness == "runtime_local_ready" assert binding_value.tensor_schema_hash == "schema-hash" assert binding_value.to_dict()["binding_value_ref"] == binding_value_ref def test_prepared_serving_artifact_builds_reload_request() -> None: - artifact = PreparedServingArtifact( + artifact = PreparedRuntimeArtifact( source_artifact_ref="disk:/model", serving_artifact_ref="mi2:test:serving", manifest_ref="tensor:manifest", @@ -505,7 +517,7 @@ def test_prepared_serving_artifact_builds_reload_request() -> None: def test_local_ready_prepared_serving_artifact_cannot_build_reload_request() -> None: - artifact = PreparedServingArtifact( + artifact = PreparedRuntimeArtifact( source_artifact_ref="disk:/model", serving_artifact_ref=None, manifest_ref="tensor:manifest", @@ -517,7 +529,7 @@ def test_local_ready_prepared_serving_artifact_cannot_build_reload_request() -> "binding_value_id": "value-1", "seal_generation": 1, }, - readiness="serving_local_ready", + readiness="runtime_local_ready", local_serving_ref="binding-local:binding-1:value-1", family="demo", tensor_schema_hash="schema-hash", @@ -529,17 +541,17 @@ def test_local_ready_prepared_serving_artifact_cannot_build_reload_request() -> payload = artifact.to_dict() assert payload["serving_artifact_ref"] is None assert payload["binding_value_ref"]["binding_value_id"] == "value-1" - assert payload["readiness"] == "serving_local_ready" + assert payload["readiness"] == "runtime_local_ready" assert payload["reload_request"] is None def test_framework_context_and_runtime_tensor_view_are_identity_only() -> None: - placement = ServingPlacement( - topology=ServingTopologyRef( + placement = RuntimePlacement( + topology=RuntimeTopologyRef( schema_topology_digest="topology-digest", logical_topology_ref="vllm://parallelism?tp=2&pp=1&dp=1", ), - member=ServingBindingMemberRef( + member=RuntimeBindingMemberRef( member_id="dp0:pp0:tp1", member_index=1, member_count=2, diff --git a/tests/python/test_serving_contract.py b/tests/python/artifact_runtime/test_contract.py similarity index 80% rename from tests/python/test_serving_contract.py rename to tests/python/artifact_runtime/test_contract.py index b61cdeeb..ba5f5904 100644 --- a/tests/python/test_serving_contract.py +++ b/tests/python/artifact_runtime/test_contract.py @@ -5,18 +5,18 @@ import pytest import torch -from tensorcast.api.store.serving_builder import compute_serving_tensor_schema_hash from tensorcast.api.store.types import CanonicalIndex, CanonicalIndexEntry -from tensorcast.serving.contract import ( +from tensorcast.artifact_runtime.contract import ( collect_runtime_tensor_schema, + compute_canonical_runtime_tensor_schema_hash, compute_runtime_representation_contract_hash, compute_runtime_tensor_schema_hash, logical_topology_json, ) -from tensorcast.types import ServingBindingMemberRef, ServingTopologyRef +from tensorcast.types import RuntimeBindingMemberRef, RuntimeTopologyRef -def test_runtime_tensor_schema_hash_matches_serving_builder_contract() -> None: +def test_runtime_tensor_schema_hash_matches_canonical_runtime_contract() -> None: tensor = torch.empty((2, 3), dtype=torch.float16) schema = collect_runtime_tensor_schema( {"weights": tensor}, @@ -39,18 +39,19 @@ def test_runtime_tensor_schema_hash_matches_serving_builder_contract() -> None: ) assert compute_runtime_tensor_schema_hash( - schema) == compute_serving_tensor_schema_hash(canonical_index) + schema + ) == compute_canonical_runtime_tensor_schema_hash(canonical_index) def test_runtime_tensor_schema_requires_zero_storage_offset() -> None: - view = torch.empty((4, ), dtype=torch.float32)[1:] + view = torch.empty((4,), dtype=torch.float32)[1:] with pytest.raises(ValueError, match="storage_offset == 0"): collect_runtime_tensor_schema({"view": view}, remove_duplicate=False) def test_runtime_tensor_schema_duplicate_filter_is_explicit() -> None: - tensor = torch.empty((2, ), dtype=torch.float32) + tensor = torch.empty((2,), dtype=torch.float32) full = collect_runtime_tensor_schema( { @@ -68,17 +69,15 @@ def test_runtime_tensor_schema_duplicate_filter_is_explicit() -> None: ) assert tuple(entry.name for entry in full) == ("a", "b") - assert tuple(entry.name for entry in deduped) == ("a", ) + assert tuple(entry.name for entry in deduped) == ("a",) def test_logical_topology_json_is_canonicalized_by_core() -> None: - topology = ServingTopologyRef(schema_topology_digest="topology-digest") + topology = RuntimeTopologyRef(schema_topology_digest="topology-digest") payload_a = { - "family": - "vllm_tensor_parallel", - "version": - "v1", + "family": "vllm_tensor_parallel", + "version": "v1", "dimensions": [ { "name": "pipeline_parallel", @@ -101,22 +100,21 @@ def test_logical_topology_json_is_canonicalized_by_core() -> None: "name": "pipeline_parallel", }, ], - "version": - "v1", - "family": - "vllm_tensor_parallel", + "version": "v1", + "family": "vllm_tensor_parallel", } - assert logical_topology_json(topology, framework_payload=payload_a) == \ - logical_topology_json(topology, framework_payload=payload_b) + assert logical_topology_json( + topology, framework_payload=payload_a + ) == logical_topology_json(topology, framework_payload=payload_b) def test_runtime_representation_contract_hash_is_versioned_and_stable() -> None: - topology = ServingTopologyRef( + topology = RuntimeTopologyRef( schema_topology_digest="topology-digest", logical_topology_ref="vllm://parallelism?tp=2", ) - member = ServingBindingMemberRef( + member = RuntimeBindingMemberRef( member_id="tp1", member_index=1, member_count=2, diff --git a/tests/python/artifact_runtime/test_fake_framework_boundary.py b/tests/python/artifact_runtime/test_fake_framework_boundary.py new file mode 100644 index 00000000..fb8c2b61 --- /dev/null +++ b/tests/python/artifact_runtime/test_fake_framework_boundary.py @@ -0,0 +1,1407 @@ +# Copyright (c) 2026, TensorCast Team. + +import weakref +from contextlib import contextmanager +from types import SimpleNamespace + +import pytest +import torch + +import tensorcast as tc +import tensorcast.artifact_runtime.lifecycle as integration_mod +import tensorcast.artifact_runtime.recipe.local_ready as local_ready_mod +from tensorcast.api.store.artifact import Artifact +from tensorcast.artifact_runtime.admin import AdminLocalSourceBootstrap +from tensorcast.artifact_runtime.host import ( + FrameworkIdentity, + IntegrationHost, + MaterializationExecutionFacts, + PlacementAdmissionFacts, + PlacementIdentityFacts, + PlacementMemberFacts, + SourceSelector, +) +from tensorcast.artifact_runtime.intent import ( + BootstrapPolicy, + ExistingRuntimeArtifact, + RequestContext, + RetainedBindingAcquire, +) +from tensorcast.artifact_runtime.lifecycle import ArtifactRuntimeIntegration +from tensorcast.artifact_runtime.locator import ArtifactLocator +from tensorcast.artifact_runtime.recipe.build import ( + RecipeBuildSession, + RuntimeBindingPlan, +) +from tensorcast.artifact_runtime.recipe.compiler import ( + CompiledRuntimeRecipe, + TensorcastRuntimeFacts, + TensorcastSemanticValidationSpec, + TensorSchemaEntry, +) +from tensorcast.artifact_runtime.recipe.trace_ir import TracePlan +from tensorcast.retained_realization_authority import ( + ParsedRetainedRealizationAuthority, + RetainedRealizationExpectedDigests, +) +from tensorcast.types import ( + BindingReservationCapability, + BindingValueRef, + FinalizeClass, + RuntimeArtifactManifest, + RuntimeBindingMemberRef, + RuntimeSupportLevel, +) + + +class _FakeArtifactView: + def __init__(self, parent, names=None): + self.parent = parent + self.names = tuple(names or ()) + + def bind(self, **kwargs): + binding = _FakeBinding() + binding.names = self.names + binding.kwargs = kwargs + return binding + + +class _FakeArtifact: + def subset(self, names): + return _FakeArtifactView(self, names) + + +class _FakeBinding: + def __init__(self): + self.tensors = {"w": torch.ones((1,), dtype=torch.float16)} + self.binding_layout_id = "layout-1" + self.realized = None + self.swapped = None + self.closed = False + + def realize_from(self, source_view, *, realization_plan, options): + self.realized = (source_view, realization_plan, options) + return "epoch-1" + + def swap(self, artifact, **kwargs): + self.swapped = (artifact, kwargs) + self.tensors = {"w": torch.full((1,), 2.0, dtype=torch.float16)} + return self + + def freeze_current(self, *, update_epoch, source_artifact_ref): + return SimpleNamespace( + binding_id="binding-1", + binding_layout_id=self.binding_layout_id, + binding_value_id="value-1", + seal_generation=1, + update_epoch=update_epoch, + source_artifact_ref=source_artifact_ref, + local_serving_ref="binding-local:fake", + ) + + def close(self): + self.closed = True + + +class _FakeRestoredRetainedBinding: + def __init__(self): + self.tensors = {"w": torch.ones((1,), dtype=torch.float16)} + self.binding_layout_id = "layout-1" + self.binding_value_ref = SimpleNamespace( + binding_id="binding-1", + binding_layout_id="layout-1", + binding_value_id="value-1", + seal_generation=1, + ) + self.reservation_bytes = 4096 + self.closed = False + self.transferred = False + + def transfer_to_runtime(self): + self.transferred = True + return SimpleNamespace(close=lambda: None) + + def close(self): + self.closed = True + + +def _retained_authority() -> ParsedRetainedRealizationAuthority: + member = RuntimeBindingMemberRef( + member_id="member-0", + member_index=0, + member_count=1, + group_id="group-1", + ) + binding_ref = BindingValueRef( + binding_id="binding-1", + binding_layout_id="layout-1", + binding_value_id="value-1", + seal_generation=1, + ) + capability = BindingReservationCapability( + capability_id="capability-1", + binding_value_ref=binding_ref, + daemon_id="daemon-1", + daemon_session_id="session-1", + device_uuid="gpu-0", + member=member, + reservation_bytes=4096, + scope_digest="scope-1", + ) + return ParsedRetainedRealizationAuthority( + group_id="group-1", + local_serving_ref="binding-local:fake", + binding_value_ref=binding_ref, + reservation_capability=capability, + daemon_id="daemon-1", + daemon_session_id="session-1", + device_uuid="gpu-0", + member=member, + reservation_bytes=4096, + expected=RetainedRealizationExpectedDigests( + target_layout_hash="layout-hash", + tensor_schema_hash="fake-schema", + runtime_build_digest="build-digest", + resolved_spec_digest="spec-digest", + ), + readiness="runtime_local_ready", + verification_state="local_only", + ) + + +class _FakeSource: + def subset(self, names): + return ("subset", tuple(names)) + + +class _FakeRuntimeModel: + def __init__(self): + self.tensors = {"w": torch.empty((1,), dtype=torch.float16, device="meta")} + + +class _FakeFrameworkHost: + def identity(self, model_config): + del model_config + return FrameworkIdentity( + framework_name="fakefw", + framework_version="fakefw-v1", + adapter_version="adapter-v1", + serving_abi_version="abi-v1", + ) + + def prepare_model_construction(self, framework_config, model_config): + del framework_config, model_config + + def build_meta_model(self, framework_config, model_config): + del framework_config, model_config + return _FakeRuntimeModel() + + def build_runtime_model(self, framework_config, model_config, target_device): + del framework_config, model_config, target_device + return _FakeRuntimeModel() + + def assert_model_ready_for_runtime_binding(self, model, *, context): + del context + assert "w" in model.tensors + + def semantic_probes(self, model, model_config): + del model, model_config + return {} + + +class _FakePlacementHost: + def identity_facts(self, framework_config): + del framework_config + return PlacementIdentityFacts( + tensor_parallel_rank=0, + tensor_parallel_size=1, + pipeline_parallel_rank=0, + pipeline_parallel_size=1, + data_parallel_rank=0, + data_parallel_size=1, + ) + + def admission_facts(self, framework_config): + del framework_config + return PlacementAdmissionFacts() + + def member_facts(self, framework_config): + del framework_config + return PlacementMemberFacts( + runtime_rank=0, + runtime_world_size=1, + member_id="member-0", + member_index=0, + member_count=1, + group_id_hint="group-1", + ) + + def execution_facts(self, framework_config): + del framework_config + return MaterializationExecutionFacts( + collective_rank=0, + collective_world_size=1, + tensor_parallel_ranks=(0,), + ) + + +class _FakeTensorSurface: + def runtime_only_tensor_names(self, model): + del model + return () + + def align_runtime_tensor_names(self, model, expected_names): + assert set(expected_names) == set(model.tensors) + return 0 + + def collect_runtime_tensors(self, model, *, remove_duplicate=False): + del remove_duplicate + return dict(model.tensors) + + def collect_runtime_tensor_view(self, tensors): + del tensors + return () + + def compute_runtime_tensor_schema_hash(self, tensors, *, remove_duplicate=False): + del tensors, remove_duplicate + return "fake-schema" + + def attach_bound_tensors(self, model, tensors, *, replace_meta_params): + del replace_meta_params + model.tensors.update(tensors) + return model + + def allocate_runtime_only_tensors(self, model, target_device): + del model, target_device + return {} + + def snapshot_tensor_invariants(self, tensors): + return tuple(sorted(tensors)) + + def validate_tensor_invariants(self, before, after): + assert before == tuple(sorted(after)) + + +def _realization_plan_proto(): + from tensorcast.proto.daemon.v2 import store_daemon_pb2 + + plan = store_daemon_pb2.BindingRealizationPlan() + entry = plan.entries.add(dst_name="w") + entry.op_kind = store_daemon_pb2.BINDING_REALIZATION_OP_KIND_COPY + entry.source_name = "w" + return plan.SerializeToString(deterministic=True) + + +def _recipe(source_artifact_ref="mi2:source"): + return CompiledRuntimeRecipe( + compile_key="compile", + source_artifact_ref=source_artifact_ref, + source_metadata_fingerprint="meta", + runtime_facts=TensorcastRuntimeFacts( + framework_name="fakefw", + framework_version="fakefw-v1", + adapter_version="adapter-v1", + serving_abi_version="abi-v1", + support_level=RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY, + runtime_only_tensor_names=(), + process_after_load_class=FinalizeClass.RUNTIME_ONLY, + post_bind_finalize_class=FinalizeClass.RUNTIME_ONLY, + ), + trace_plan=TracePlan( + copy_plan=[], + expected_src_names={"w"}, + expected_dst_names={"w"}, + tensorcast_slices={}, + src_hull={}, + ), + tensor_schema=( + TensorSchemaEntry( + name="w", + dtype="torch.float16", + shape=(1,), + stride=(1,), + ), + ), + source_hull=(), + realization_plan=(), + realization_fallback_plan=(), + topology_ref=None, + member_ref=None, + semantic_validation_spec=TensorcastSemanticValidationSpec.empty(), + realization_plan_proto=_realization_plan_proto(), + realization_plan_count=1, + ) + + +def test_fake_second_framework_core_generated_ids_are_framework_neutral(): + group_id = integration_mod.build_collective_group_id( + artifact_ref="mi2:fake:serving", + operation_scope="fakefw.realize", + tp_ranks=(0, 1), + contract_identity="repr", + ) + assert group_id.startswith("tensorcast-") + assert "vllm" not in group_id + + _contract_hash, manifest_bytes = ( + local_ready_mod.prepare_same_binding_manifest_carrier( + _recipe(), + manifest_tensor_name="__tensorcast_meta__.manifest", + representation_contract_hash="repr", + topology_admission_digest="topology-digest", + ) + ) + manifest = RuntimeArtifactManifest.from_bytes(manifest_bytes) + lower_manifest = manifest_bytes.lower() + assert integration_mod.LOCAL_READY_BOOTSTRAP_BUILD_PIPELINE_VERSION == ( + "tensorcast-bootstrap-v1" + ) + assert manifest.topology_admission_digest == "topology-digest" + assert ( + integration_mod.LOCAL_READY_BOOTSTRAP_BUILD_PIPELINE_VERSION.encode() + in manifest_bytes + ) + assert b"vllm" not in lower_manifest + + +def test_fake_second_framework_uses_host_intent_lifecycle(monkeypatch): + identity = RuntimeBindingPlan( + model_hash="hash", + model_id="fake-model", + model_revision=None, + dtype="torch.float16", + runtime_version="fake-runtime-v1", + framework_name="fakefw", + framework_version="fakefw-v1", + adapter_version="adapter-v1", + serving_abi_version="abi-v1", + trace_cache_schema_version=1, + tp_rank=0, + tp_world_size=1, + ) + session = RecipeBuildSession(identity) + assert session.recipe_cache_key(metadata_fingerprint="meta") + + monkeypatch.setattr( + integration_mod, + "read_source_bound_contract_state", + lambda: SimpleNamespace( + source_bound_contract_ready=True, + source_bound_contract_version=4, + source_bound_capability_names=("collective",), + ), + ) + monkeypatch.setattr( + ArtifactRuntimeIntegration, + "build_materialization_options", + lambda self, **kwargs: ("realize-options", kwargs), + ) + direct_resolve_calls = [] + + class _FakeResolver: + def resolve(self, artifact_ref): + direct_resolve_calls.append(("resolve", artifact_ref)) + return SimpleNamespace( + artifact=_FakeArtifact(), + artifact_ref=artifact_ref, + tensor_names=("w",), + manifest=SimpleNamespace( + representation_contract_hash="repr-direct", + source_artifact_ref="mi2:source", + serving_build_digest="build-direct", + ), + ) + + def cross_check(self, resolved_artifact, **kwargs): + direct_resolve_calls.append(("cross_check", kwargs)) + return resolved_artifact + + host = IntegrationHost( + framework=_FakeFrameworkHost(), + placement=_FakePlacementHost(), + tensor_surface=_FakeTensorSurface(), + ) + direct_attachment = ArtifactRuntimeIntegration( + resolver=_FakeResolver(), + host=host, + ).start( + ExistingRuntimeArtifact(ArtifactLocator.artifact_ref("mi2:serving")), + RequestContext( + framework_config=SimpleNamespace(), + model_config=SimpleNamespace(model="fake-model"), + target_device=torch.device("cuda:0"), + ), + ) + direct_payload = direct_attachment.view.endpoint.to_weight_version_payload() + assert direct_attachment.state.runtime_view.readiness == "runtime_ready" + assert direct_payload["serving_artifact_ref"] == "mi2:serving" + assert direct_payload["source_artifact_ref"] == "mi2:source" + assert direct_resolve_calls[1][1]["expected_tensor_schema_hash"] == "fake-schema" + reload_attachment = ArtifactRuntimeIntegration( + resolver=_FakeResolver(), + host=host, + ).reload( + direct_attachment.state, + ExistingRuntimeArtifact(ArtifactLocator.artifact_ref("mi2:serving-next")), + RequestContext( + framework_config=SimpleNamespace(), + model_config=SimpleNamespace(model="fake-model"), + ), + model=direct_attachment.model, + ) + reload_payload = reload_attachment.view.endpoint.to_weight_version_payload() + reload_response = reload_attachment.view.endpoint.to_reload_response_payload() + assert reload_payload["serving_artifact_ref"] == "mi2:serving-next" + assert reload_response == { + "schema_version": 1, + "serving_artifact_ref": "mi2:serving-next", + "representation_contract_hash": "repr-direct", + "serving_build_digest": "build-direct", + "readiness": "runtime_ready", + } + assert direct_attachment.state.binding.swapped[1]["options"] == "realize-options" + described = ArtifactRuntimeIntegration(host=host).describe(reload_attachment.state) + assert ( + described.endpoint.to_weight_version_payload()["serving_artifact_ref"] + == "mi2:serving-next" + ) + + host_binding = _FakeBinding() + host_model = _FakeRuntimeModel() + attachment = ArtifactRuntimeIntegration(host=host).start( + AdminLocalSourceBootstrap( + source_selector=SourceSelector.local_path("/tmp/fake-model"), + bootstrap_policy=BootstrapPolicy(), + recipe=_recipe(), + source_subject=_FakeSource(), + source_artifact_ref="mi2:source", + model=host_model, + binding_factory=lambda *args, **kwargs: host_binding, + ), + RequestContext( + framework_config=SimpleNamespace(), + model_config=SimpleNamespace(model="fake-model"), + target_device=torch.device("cuda:0"), + ), + ) + assert attachment.model is host_model + assert attachment.state.runtime_view.readiness == "runtime_local_ready" + payload = attachment.view.endpoint.to_weight_version_payload() + assert payload["source_artifact_ref"] == "mi2:source" + assert payload["family"] == "generic" + assert payload["tp_rank"] == 0 + assert attachment.prepared is not None + assert host_binding.realized is not None + assert host_binding.realized[2] == "realize-options" + + retained_calls = [] + restored = _FakeRestoredRetainedBinding() + + @contextmanager + def fake_restore_retained(**kwargs): + retained_calls.append(kwargs) + yield restored + + monkeypatch.setattr( + integration_mod, "restore_retained_binding", fake_restore_retained + ) + retained_attachment = ArtifactRuntimeIntegration(host=host).start( + RetainedBindingAcquire(authority=_retained_authority()), + RequestContext( + framework_config=SimpleNamespace(), + model_config=SimpleNamespace(model="fake-model"), + target_device=torch.device("cuda:0"), + ), + ) + retained_payload = retained_attachment.view.endpoint.to_weight_version_payload() + assert retained_attachment.state.runtime_view.readiness == "runtime_local_ready" + assert retained_payload["local_serving_ref"] == "binding-local:fake" + assert retained_payload["binding_value_ref"]["binding_value_id"] == "value-1" + assert retained_calls[0]["expected_member"].member_index == 0 + assert restored.transferred + + +def test_artifact_realize_model_runtime_uses_direct_runtime_host(monkeypatch): + monkeypatch.setattr( + integration_mod, + "read_source_bound_contract_state", + lambda: SimpleNamespace( + source_bound_contract_ready=True, + source_bound_contract_version=4, + source_bound_capability_names=("collective",), + ), + ) + materialization_calls = [] + + def build_materialization_options(_self, **kwargs): + materialization_calls.append(kwargs) + return "realize-options", kwargs + + monkeypatch.setattr( + ArtifactRuntimeIntegration, + "build_materialization_options", + build_materialization_options, + ) + + class _RecordingArtifactView: + def __init__(self, parent, names): + self.parent = parent + self.names = tuple(names) + + def bind(self, **kwargs): + binding = _FakeBinding() + binding.last_materialization_diagnostics = { + "source": "p2p", + "operation_id": "op-direct", + "total_bytes": 2, + "retry_reason_buckets": {"none": 0}, + "ipc_open_sec": 0.001, + "restore_tensors_sec": 0.002, + } + binding.last_execution_diagnostics = SimpleNamespace( + actual_collective_committed_bytes=0, + actual_local_typed_bytes=2, + actual_generic_backend_bytes=0, + fallback_bytes=0, + residual_bytes=0, + direct_write_supported=True, + dominant_executor="local_typed", + ) + self.parent.bind_calls.append((self.names, kwargs, binding)) + return binding + + def tensor_dict(self, **_kwargs): + raise AssertionError("direct model-runtime path must not use TensorDict") + + def tensor_dict_with_diagnostics(self, **_kwargs): + raise AssertionError("direct model-runtime path must not use TensorDict") + + def tensor_dict_into(self, *_args, **_kwargs): + raise AssertionError("direct model-runtime path must not use TensorDict") + + def state_dict(self): + raise AssertionError("direct model-runtime path must not build state dict") + + class _RecordingArtifact: + def __init__(self): + self.bind_calls = [] + + def subset(self, names): + return _RecordingArtifactView(self, names) + + def tensor_dict(self, **_kwargs): + raise AssertionError("direct model-runtime path must not use TensorDict") + + def state_dict(self): + raise AssertionError("direct model-runtime path must not build state dict") + + resolved_artifact = _RecordingArtifact() + resolver_calls = [] + + class _Resolver: + def resolve(self, artifact_ref): + resolver_calls.append(("resolve", artifact_ref)) + return SimpleNamespace( + artifact=resolved_artifact, + artifact_ref=artifact_ref, + tensor_names=("w",), + manifest=SimpleNamespace( + representation_contract_hash="repr-direct", + source_artifact_ref="mi2:source", + serving_build_digest="build-direct", + ), + ) + + def cross_check(self, resolved, **kwargs): + resolver_calls.append(("cross_check", kwargs)) + return resolved + + class _Store: + pass + + def reject_runtime_session(*_args, **_kwargs): + raise AssertionError("direct model-runtime path must not start a session") + + monkeypatch.setattr( + integration_mod.ArtifactRuntimeSession, + "from_config", + classmethod(reject_runtime_session), + ) + monkeypatch.setattr( + integration_mod.ArtifactRuntimeSession, + "start", + reject_runtime_session, + ) + + artifact = Artifact( + store_ref=weakref.ref(_Store()), + artifact_id="mi2:serving", + canonical_index_bytes=b"index", + ) + host = tc.RuntimeHostCapabilities( + framework=_FakeFrameworkHost(), + placement=_FakePlacementHost(), + tensor_surface=_FakeTensorSurface(), + ) + profile_events = [] + + handle = artifact.realize( + tc.ArtifactRealizationSpec.model_runtime( + framework="fakefw", + device=torch.device("cuda:0"), + adapter_version="adapter-v1", + runtime_abi_version="abi-v1", + ), + runtime_host=host, + runtime_context=RequestContext( + framework_config=SimpleNamespace(), + model_config=SimpleNamespace(model="fake-model"), + ), + runtime_resolver=_Resolver(), + profile_sink=profile_events.append, + ) + attachment = handle.attachment() + + assert handle.attach() is attachment + assert attachment.state.model_runtime_handle is handle + assert handle.report.target_kind == "model_runtime" + assert handle.report.artifact_id == "mi2:serving" + assert handle.report.artifact_profile == "durable_artifact" + assert handle.report.authority_scope == "daemon_mediated_durable" + assert handle.report.source_selection_digest + assert handle.report.model_runtime is not None + assert handle.report.model_runtime.framework == "fakefw" + assert handle.report.model_runtime.adapter_version == "adapter-v1" + assert handle.report.model_runtime.runtime_abi_version == "abi-v1" + assert handle.report.runtime_attach_sec is not None + assert handle.report.runtime_attach_sec >= 0.0 + assert handle.report.runtime_finalize_sec is not None + assert handle.report.runtime_finalize_sec >= 0.0 + assert handle.report.total_sec is not None + assert handle.report.total_sec >= handle.report.runtime_attach_sec + assert ( + attachment.state.realization_handle.report.target_kind == "runtime_attachment" + ) + assert attachment.state.realization_handle.report.runtime_attach_sec == ( + handle.report.runtime_attach_sec + ) + assert attachment.state.realization_handle.report.runtime_finalize_sec == ( + handle.report.runtime_finalize_sec + ) + assert torch.equal( + attachment.model.tensors["w"], torch.ones((1,), dtype=torch.float16) + ) + assert resolved_artifact.bind_calls + bind_names, bind_kwargs, _binding = resolved_artifact.bind_calls[0] + assert bind_names == ("w",) + assert bind_kwargs["device"] == torch.device("cuda:0") + assert bind_kwargs["options"] == "realize-options" + assert len(materialization_calls) == 1 + assert materialization_calls[0]["artifact_ref"] == "mi2:serving" + assert ( + materialization_calls[0]["operation_scope"] + == "startup.direct_artifact_runtime.bind" + ) + assert materialization_calls[0][ + "source_bound_contract_state" + ].source_bound_contract_ready + assert handle.report.source == "p2p" + assert handle.report.operation_id == "op-direct" + assert handle.report.materialization_diagnostics["ipc_open_sec"] == 0.001 + assert handle.report.execution_commit is not None + assert handle.report.execution_commit.actual_executor_path == "local_typed" + assert handle.report.execution_commit.direct_write_bytes == 2 + assert handle.report.execution_commit.fallback_bytes == 0 + assert handle.report.envelope.copy_bytes == 0 + assert handle.report.envelope.temporary_replica_bytes == 0 + assert handle.report.envelope.retained_bytes == 0 + assert handle.report.envelope.cuda_ipc_open_count == 0 + assert [event["event"] for event in profile_events] == [ + "runtime_materialization.attach.start", + "runtime_materialization.attach.done", + ] + assert resolver_calls[0] == ("resolve", "mi2:serving") + assert resolver_calls[1][0] == "cross_check" + + serving_attachment = ArtifactRuntimeIntegration( + resolver=_Resolver(), + host=host, + ).start( + ExistingRuntimeArtifact(ArtifactLocator.artifact_ref("mi2:serving")), + RequestContext( + framework_config=SimpleNamespace(), + model_config=SimpleNamespace(model="fake-model"), + target_device=torch.device("cuda:0"), + ), + ) + serving_handle = serving_attachment.state.model_runtime_handle + assert serving_handle.attach() is serving_attachment + assert serving_handle.report.target_kind == handle.report.target_kind + assert serving_handle.report.operation_backend == handle.report.operation_backend + assert serving_handle.report.envelope == handle.report.envelope + assert serving_handle.report.target_plan == handle.report.target_plan + assert serving_handle.report.model_runtime == handle.report.model_runtime + assert serving_handle.release_contract.release_policy == ( + handle.release_contract.release_policy + ) + assert serving_handle.release_contract.release_strictness == ( + handle.release_contract.release_strictness + ) + + +def test_artifact_realize_model_runtime_uses_same_store_when_resolver_omitted( + monkeypatch, +): + import tensorcast.api.store as store_api + import tensorcast.artifact_runtime.artifact.resolver as resolver_mod + + monkeypatch.setattr( + integration_mod, + "read_source_bound_contract_state", + lambda: SimpleNamespace( + source_bound_contract_ready=True, + source_bound_contract_version=4, + source_bound_capability_names=("collective",), + ), + ) + monkeypatch.setattr( + store_api, + "artifact", + lambda *args, **kwargs: (_ for _ in ()).throw( + AssertionError("direct model-runtime resolver must use artifact store") + ), + ) + monkeypatch.setattr( + resolver_mod.tc_artifact_manifest, + "read_runtime_artifact_manifest_tensor", + lambda *_args, **_kwargs: SimpleNamespace( + representation_contract_hash="repr-direct", + source_artifact_ref="mi2:source", + serving_build_digest="build-direct", + local_serving_ref=None, + ), + ) + manifest_cross_checks = [] + monkeypatch.setattr( + resolver_mod.tc_artifact_manifest, + "cross_check_runtime_artifact_manifest", + lambda **kwargs: manifest_cross_checks.append(kwargs), + ) + + class _StoreArtifactView: + def __init__(self, parent, names): + self.parent = parent + self.names = tuple(names) + + def bind(self, **kwargs): + binding = _FakeBinding() + self.parent.bind_calls.append((self.names, kwargs, binding)) + return binding + + class _StoreArtifact: + def __init__(self): + self.bind_calls = [] + self.descriptor = SimpleNamespace( + artifact_id="mi2:serving", + tensor_names=("w", tc.SERVING_MANIFEST_TENSOR_NAME), + tensor_metas={ + "w": SimpleNamespace( + shape=(1,), + dtype=torch.float16, + stride=(1,), + storage_offset=0, + size_bytes=2, + ) + }, + total_bytes=2, + ) + + def describe(self): + return self.descriptor + + def subset(self, names): + return _StoreArtifactView(self, names) + + opened_artifact = _StoreArtifact() + store_calls = [] + + class _Store: + closed = False + _runtime = object() + _materialization = object() + + def artifact(self, **kwargs): + store_calls.append(kwargs) + return opened_artifact + + store = _Store() + host = tc.RuntimeHostCapabilities( + framework=_FakeFrameworkHost(), + placement=_FakePlacementHost(), + tensor_surface=_FakeTensorSurface(), + ) + materialization_options = tc.GetArtifactOptions() + artifact = Artifact( + store_ref=weakref.ref(store), + artifact_id="mi2:serving", + ) + + handle = artifact.realize( + tc.ArtifactRealizationSpec.model_runtime( + framework="fakefw", + device=torch.device("cuda:0"), + adapter_version="adapter-v1", + runtime_abi_version="abi-v1", + options=materialization_options, + ), + runtime_host=host, + runtime_context=RequestContext( + framework_config=SimpleNamespace(), + model_config=SimpleNamespace(model="fake-model"), + ), + runtime_resolver=None, + ) + + assert handle.report.target_kind == "model_runtime" + assert store_calls == [{"ref": "mi2:serving"}] + assert manifest_cross_checks + bind_names, bind_kwargs, _binding = opened_artifact.bind_calls[0] + assert bind_names == ("w",) + assert bind_kwargs["options"] is materialization_options + + +def test_model_runtime_rejects_spec_context_device_mismatch(): + class _Store: + pass + + artifact = Artifact( + store_ref=weakref.ref(_Store()), + artifact_id="mi2:serving", + ) + + with pytest.raises(tc.ArtifactError) as exc_info: + artifact.realize( + tc.ArtifactRealizationSpec.model_runtime( + framework="fakefw", + device=torch.device("cuda:0"), + ), + runtime_host=object(), + runtime_context=RequestContext(target_device=torch.device("cuda:1")), + ) + + assert exc_info.value.status_code == "INVALID_ARGUMENT" + assert "target_device facts disagree" in str(exc_info.value) + + +def test_model_runtime_options_and_runtime_artifact_policy_are_separate( + monkeypatch, +): + monkeypatch.setattr( + integration_mod, + "read_source_bound_contract_state", + lambda: SimpleNamespace( + source_bound_contract_ready=True, + source_bound_contract_version=4, + source_bound_capability_names=("collective",), + ), + ) + host = tc.RuntimeHostCapabilities( + framework=_FakeFrameworkHost(), + placement=_FakePlacementHost(), + tensor_surface=_FakeTensorSurface(), + ) + + class _RecordingArtifactView: + def __init__(self, parent, names): + self.parent = parent + self.names = tuple(names) + + def bind(self, **kwargs): + binding = _FakeBinding() + self.parent.bind_calls.append((self.names, kwargs, binding)) + return binding + + class _RecordingArtifact: + def __init__(self): + self.bind_calls = [] + + def subset(self, names): + return _RecordingArtifactView(self, names) + + class _Resolver: + def __init__(self): + self.cross_checks = [] + self.artifact = _RecordingArtifact() + + def resolve(self, artifact_ref): + return SimpleNamespace( + artifact=self.artifact, + artifact_ref=artifact_ref, + tensor_names=("w",), + manifest=SimpleNamespace( + representation_contract_hash="repr-direct", + source_artifact_ref="mi2:source", + serving_build_digest="build-direct", + ), + ) + + def cross_check(self, resolved_artifact, **kwargs): + self.cross_checks.append(kwargs) + return resolved_artifact + + def realize_with(runtime_artifact_policy=None): + resolver = _Resolver() + materialization_options = tc.GetArtifactOptions() + + class _Store: + pass + + artifact = Artifact( + store_ref=weakref.ref(_Store()), + artifact_id="mi2:serving", + ) + handle = artifact.realize( + tc.ArtifactRealizationSpec.model_runtime( + framework="fakefw", + device=torch.device("cuda:0"), + adapter_version="adapter-v1", + runtime_abi_version="abi-v1", + options=materialization_options, + runtime_artifact_policy=runtime_artifact_policy, + ), + runtime_host=host, + runtime_context=RequestContext( + framework_config=SimpleNamespace(), + model_config=SimpleNamespace(model="fake-model"), + ), + runtime_resolver=resolver, + ) + assert handle.report.target_kind == "model_runtime" + return resolver, materialization_options + + resolver, materialization_options = realize_with() + policy_seen = resolver.cross_checks[0]["runtime_artifact_policy"] + assert policy_seen is not materialization_options + assert resolver.artifact.bind_calls[0][1]["options"] is materialization_options + + runtime_policy = tc.RuntimeArtifactPolicy( + expected_representation_contract_hash="repr-direct", + ) + resolver, materialization_options = realize_with(runtime_policy) + policy_seen = resolver.cross_checks[0]["runtime_artifact_policy"] + assert policy_seen.expected_representation_contract_hash == "repr-direct" + bind_kwargs = resolver.artifact.bind_calls[0][1] + assert bind_kwargs["runtime_artifact_policy"].expected_representation_contract_hash + assert bind_kwargs["options"] is materialization_options + + +def test_artifact_realize_model_runtime_uses_local_ready_restore(monkeypatch): + monkeypatch.setattr( + integration_mod, + "read_source_bound_contract_state", + lambda: SimpleNamespace( + source_bound_contract_ready=True, + source_bound_contract_version=4, + source_bound_capability_names=("collective",), + ), + ) + + def reject_runtime_session(*_args, **_kwargs): + raise AssertionError("direct model-runtime path must not start a session") + + monkeypatch.setattr( + integration_mod.ArtifactRuntimeSession, + "from_config", + classmethod(reject_runtime_session), + ) + monkeypatch.setattr( + integration_mod.ArtifactRuntimeSession, + "start", + reject_runtime_session, + ) + + class _NoMaterializeArtifact: + def subset(self, _names): + raise AssertionError( + "local-ready direct path must not bind source artifact" + ) + + def tensor_dict(self, **_kwargs): + raise AssertionError("local-ready direct path must not use TensorDict") + + def state_dict(self): + raise AssertionError("local-ready direct path must not build state dict") + + restored = _FakeRestoredRetainedBinding() + restore_calls = [] + + @contextmanager + def fake_restore_prepared(**kwargs): + restore_calls.append(kwargs) + assert kwargs["expected_member"].member_id == "member-0" + yield restored + + monkeypatch.setattr( + integration_mod, + "restore_prepared_local_ready_binding", + fake_restore_prepared, + ) + + class _Resolver: + def resolve(self, artifact_ref): + return SimpleNamespace( + artifact=_NoMaterializeArtifact(), + artifact_ref=artifact_ref, + tensor_names=("w",), + manifest=SimpleNamespace( + representation_contract_hash="repr-local", + source_artifact_ref="mi2:source", + serving_build_digest="build-local", + local_serving_ref="binding-local:binding-1:value-1", + ), + ) + + def cross_check(self, resolved, **_kwargs): + return resolved + + class _Store: + pass + + artifact = Artifact( + store_ref=weakref.ref(_Store()), + artifact_id="mi2:serving-local", + canonical_index_bytes=b"index", + ) + handle = artifact.realize( + tc.ArtifactRealizationSpec.model_runtime( + framework="fakefw", + device=torch.device("cuda:0"), + ), + runtime_host=tc.RuntimeHostCapabilities( + framework=_FakeFrameworkHost(), + placement=_FakePlacementHost(), + tensor_surface=_FakeTensorSurface(), + ), + runtime_context=RequestContext( + framework_config=SimpleNamespace(), + model_config=SimpleNamespace(model="fake-model"), + ), + runtime_resolver=_Resolver(), + ) + + attachment = handle.attachment() + assert attachment.state.runtime_view.readiness == "runtime_local_ready" + assert attachment.state.runtime_view.local_serving_ref == ( + "binding-local:binding-1:value-1" + ) + assert torch.equal( + attachment.model.tensors["w"], torch.ones((1,), dtype=torch.float16) + ) + assert restored.transferred + assert not restored.closed + assert restore_calls + assert restore_calls[0]["resolved_artifact"].artifact_ref == "mi2:serving-local" + assert handle.report.artifact_id == "mi2:serving-local" + assert handle.report.artifact_profile == "durable_artifact" + assert handle.report.authority_scope == "daemon_mediated_durable" + assert handle.report.lifecycle_plan is not None + assert handle.report.lifecycle_plan.retained is True + assert handle.report.runtime_attach_sec is not None + assert handle.report.runtime_attach_sec >= 0.0 + assert handle.report.runtime_finalize_sec is not None + assert handle.report.runtime_finalize_sec >= 0.0 + assert handle.report.total_sec is not None + assert handle.report.total_sec >= handle.report.runtime_attach_sec + assert handle.report.envelope.retained_bytes == restored.reservation_bytes + assert handle.report.envelope.release_policy == ( + "close_runtime_attachment", + "release_placement_lease", + ) + + +def test_artifact_realize_model_runtime_uses_mounted_source_artifact(monkeypatch): + source_artifact_ref = "msa1:test-source" + calls = [] + host_binding = _FakeBinding() + + monkeypatch.setattr( + integration_mod, + "read_source_bound_contract_state", + lambda: SimpleNamespace( + source_bound_contract_ready=True, + source_bound_contract_version=4, + source_bound_capability_names=("collective",), + ), + ) + monkeypatch.setattr( + ArtifactRuntimeIntegration, + "resolve_source_subject", + lambda *_args, **_kwargs: (_ for _ in ()).throw( + AssertionError("mounted-source artifact already owns the source subject") + ), + ) + monkeypatch.setattr( + ArtifactRuntimeIntegration, + "build_materialization_options", + lambda *_args, **_kwargs: (_ for _ in ()).throw( + AssertionError("mounted-source model_runtime must preserve spec options") + ), + ) + + class _Provider: + def build_catalog(self, request): + calls.append(("catalog", request)) + return SimpleNamespace( + source_artifact_ref=request.source_artifact_ref, + metadata_fingerprint="meta", + ordered_names=("w",), + meta_by_name={}, + selected_files=(), + ) + + class _RecipeSession: + def build_recipe(self, **kwargs): + calls.append(("recipe", kwargs)) + return SimpleNamespace( + recipe=_recipe(source_artifact_ref=source_artifact_ref), + diagnostics={"compile_key": "compile"}, + ) + + monkeypatch.setattr( + ArtifactRuntimeIntegration, + "build_recipe_session", + lambda self, request: calls.append(("session", request)) or _RecipeSession(), + ) + + def fake_realize_local_ready_binding_from_source(**kwargs): + calls.append(("prepare", kwargs)) + update_epoch = host_binding.realize_from( + kwargs["source_subject"], + realization_plan=kwargs["recipe"].realization_plan_proto, + options=kwargs["options"], + ) + return SimpleNamespace( + binding=host_binding, + update_epoch=update_epoch, + layout=SimpleNamespace(binding_layout_id="layout-1"), + realization_entry_count=1, + ) + + monkeypatch.setattr( + local_ready_mod, + "realize_local_ready_binding_from_source", + fake_realize_local_ready_binding_from_source, + ) + + class _Store: + pass + + source_handle = tc.PublicDiskSourceHandle( + path="/tmp/fake-model", + canonical_index_bytes=b"index", + artifact_id=source_artifact_ref, + generation=1, + ) + artifact = Artifact( + store_ref=weakref.ref(_Store()), + artifact_id=source_artifact_ref, + source_subject=source_handle, + ) + host = tc.RuntimeHostCapabilities( + framework=_FakeFrameworkHost(), + placement=_FakePlacementHost(), + tensor_surface=_FakeTensorSurface(), + source_catalog=_Provider(), + ) + materialization_options = tc.GetArtifactOptions() + + handle = artifact.realize( + tc.ArtifactRealizationSpec.model_runtime( + framework="fakefw", + device=torch.device("cuda:0"), + adapter_version="adapter-v1", + options=materialization_options, + ), + runtime_host=host, + runtime_context=RequestContext( + framework_config=SimpleNamespace(), + model_config=SimpleNamespace(model="fake-model"), + ), + ) + attachment = handle.attachment() + + assert attachment.state.runtime_view.readiness == "runtime_local_ready" + assert attachment.state.runtime_view.source_artifact_ref == source_artifact_ref + assert handle.report.target_kind == "model_runtime" + assert handle.report.model_runtime.framework == "fakefw" + assert handle.report.model_runtime.adapter_version == "adapter-v1" + assert handle.report.artifact_profile == "mounted_source" + assert handle.report.authority_scope == "daemon_local_mounted_source" + assert handle.report.logical_layout_hash + assert calls[0][0] == "catalog" + assert calls[0][1].source_selector == SourceSelector.local_path("/tmp/fake-model") + assert calls[0][1].source_subject.subject is source_handle + assert calls[0][1].source_artifact_ref == source_artifact_ref + assert calls[2][0] == "recipe" + assert calls[2][1]["source_catalog"].source_artifact_ref == source_artifact_ref + assert calls[3][0] == "prepare" + assert calls[3][1]["source_subject"] is source_handle + assert host_binding.realized[2] is materialization_options + + +def test_fake_second_framework_uses_direct_artifact_runtime_api(monkeypatch): + monkeypatch.setattr( + integration_mod, + "read_source_bound_contract_state", + lambda: SimpleNamespace( + source_bound_contract_ready=True, + source_bound_contract_version=4, + source_bound_capability_names=("collective",), + ), + ) + monkeypatch.setattr( + integration_mod.ArtifactRuntimeIntegration, + "build_materialization_options", + lambda self, **kwargs: ("runtime-options", kwargs), + ) + + def reject_runtime_session(*_args, **_kwargs): + raise AssertionError("second-runtime proof must use artifact runtime API") + + monkeypatch.setattr( + integration_mod.ArtifactRuntimeSession, + "from_config", + classmethod(reject_runtime_session), + ) + monkeypatch.setattr( + integration_mod.ArtifactRuntimeSession, + "start", + reject_runtime_session, + ) + monkeypatch.setattr( + integration_mod.ArtifactRuntimeSession, + "reload", + reject_runtime_session, + ) + + resolver_calls = [] + + class _Resolver: + def resolve(self, artifact_ref): + resolver_calls.append(("resolve", artifact_ref)) + return SimpleNamespace( + artifact=_FakeArtifact(), + artifact_ref=artifact_ref, + tensor_names=("w",), + manifest=SimpleNamespace( + representation_contract_hash=f"repr:{artifact_ref}", + source_artifact_ref="mi2:source", + serving_build_digest=f"build:{artifact_ref}", + ), + ) + + def cross_check(self, resolved_artifact, **kwargs): + resolver_calls.append(("cross_check", kwargs)) + return resolved_artifact + + host = tc.RuntimeHostCapabilities( + framework=_FakeFrameworkHost(), + placement=_FakePlacementHost(), + tensor_surface=_FakeTensorSurface(), + ) + resolver = _Resolver() + + class _Store: + pass + + artifact = Artifact( + store_ref=weakref.ref(_Store()), + artifact_id="mi2:serving", + ) + + handle = artifact.realize( + tc.ArtifactRealizationSpec.model_runtime( + framework="fakefw", + device=torch.device("cuda:0"), + adapter_version="adapter-v1", + runtime_abi_version="abi-v1", + ), + runtime_host=host, + runtime_context=tc.RuntimeRequestContext( + framework_config=SimpleNamespace(), + model_config=SimpleNamespace(model="fake-model"), + ), + runtime_resolver=resolver, + ) + attachment = handle.attachment() + reloaded = tc.reload_runtime_attachment( + current_attachment=attachment, + artifact_locator=tc.ArtifactLocator.artifact_ref("mi2:serving-next"), + policy=tc.RuntimePolicy(), + runtime_host=host, + runtime_context=tc.RuntimeRequestContext( + framework_config=SimpleNamespace(), + model_config=SimpleNamespace(model="fake-model"), + ), + ensure_runtime_initialized=lambda: None, + model=attachment.model, + runtime_resolver=resolver, + ) + + assert ( + attachment.view.endpoint.to_weight_version_payload()["serving_artifact_ref"] + == "mi2:serving" + ) + assert ( + reloaded.view.endpoint.to_reload_response_payload()["serving_artifact_ref"] + == "mi2:serving-next" + ) + assert handle.report.target_kind == "model_runtime" + assert handle.report.model_runtime.framework == "fakefw" + assert reloaded.state.runtime_view.serving_artifact_ref == "mi2:serving-next" + assert resolver_calls[0] == ("resolve", "mi2:serving") + assert ("resolve", "mi2:serving-next") in resolver_calls + + +def test_fake_second_framework_artifact_runtime_conformance_kit(): + from tensorcast.artifact_runtime.testing import ( + assert_level1_artifact_runtime_conformance, + ) + + result = assert_level1_artifact_runtime_conformance(tc) + + assert result.checks["direct_start"] + assert result.checks["artifact_realization_report"] + assert result.checks["runtime_session_not_required"] + assert result.checks["target_layout_from_runtime_binding"] + assert result.checks["runtime_only_tensors_allocated"] + assert result.checks["runtime_publication_actions"] + assert result.checks["reload"] + assert result.checks["describe"] + assert result.checks["source_capability_not_required"] + assert result.checks["source_catalog_not_required"] + assert result.checks["resolver_uses_artifact_refs"] + assert result.checks["rejects_local_reload_artifact_locator"] + assert result.checks["rejects_untyped_reload_artifact_locator"] + assert result.checks["rejects_untyped_reload_policy"] + + +def test_conformance_failure_summary_includes_onboarding_hint(): + from tensorcast.artifact_runtime.testing import ConformanceResult + + result = ConformanceResult( + checks={"direct_start": False}, + messages={"direct_start": "provide a tensor surface"}, + level="level1-runtime", + ) + + try: + result.assert_passed() + except AssertionError as exc: + message = str(exc) + else: + raise AssertionError("expected conformance failure") + + assert "level1-runtime" in message + assert "direct_start" in message + assert "provide a tensor surface" in message diff --git a/tests/python/test_serving_integration.py b/tests/python/artifact_runtime/test_lifecycle.py similarity index 88% rename from tests/python/test_serving_integration.py rename to tests/python/artifact_runtime/test_lifecycle.py index 15c0899b..75db16ba 100644 --- a/tests/python/test_serving_integration.py +++ b/tests/python/artifact_runtime/test_lifecycle.py @@ -2,6 +2,7 @@ from __future__ import annotations +import importlib.util import json from contextlib import contextmanager from dataclasses import fields @@ -11,9 +12,18 @@ import torch from torch import nn -import tensorcast.serving._runtime_impl.lifecycle as integration_mod -from tensorcast.pytorch.module_binding import TorchModuleAdapterMixin -from tensorcast.serving._runtime_impl.lifecycle import ( +import tensorcast.artifact_runtime.contract as contract_mod +import tensorcast.artifact_runtime.lifecycle as integration_mod +import tensorcast.artifact_runtime.recipe.local_ready as local_ready_mod +from tensorcast.artifact_runtime.admin import AdminLocalSourceBootstrap +from tensorcast.artifact_runtime.config import TensorCastRuntimeConfig +from tensorcast.artifact_runtime.contract import logical_topology_json +from tensorcast.artifact_runtime.diagnostics import ( + binding_layout_debug_payload, + binding_layout_profile_fields, + binding_layout_tensor_count, +) +from tensorcast.artifact_runtime.lifecycle import ( PLACEMENT_ADMISSION_FACTS_SCHEMA_VERSION, PLACEMENT_IDENTITY_FACTS_SCHEMA_VERSION, RECIPE_CACHE_POLICY_SCHEMA_VERSION, @@ -25,13 +35,17 @@ AdmissionRejectedError, AdmissionRequest, ArtifactLocatorResolutionError, + ArtifactRuntimeIntegration, + ArtifactRuntimeIntegrationError, + ArtifactRuntimeNotImplementedError, + ArtifactRuntimeSession, AttachFinalizeError, AuthorityValidationError, BootstrapPolicy, CapabilityMissingError, ConfigConflictError, DefaultAdmissionPolicy, - ExistingServingArtifact, + ExistingRuntimeArtifact, FinalizeClass, FrameworkIdentity, IntegrationHost, @@ -53,24 +67,18 @@ RetainedBindingAcquire, RuntimeAttachment, RuntimeBindingMaterialization, + RuntimeBindingPlan, RuntimeBindingResult, RuntimeBindingState, RuntimeBindingView, + RuntimeLoadResult, + RuntimePlacement, RuntimeProfile, + RuntimeReloadResult, RuntimeStateSeed, + RuntimeSupportLevel, RuntimeWorkerView, SchemaMismatchError, - ServingArtifactLocator, - ServingBindingPlan, - ServingConfig, - ServingIntegration, - ServingIntegrationError, - ServingIntegrationNotImplementedError, - ServingLoadResult, - ServingPlacement, - ServingReloadResult, - ServingRuntimeSession, - ServingSupportLevel, SourceCatalogRequest, SourceDownloadPolicy, SourceProviderError, @@ -78,55 +86,57 @@ SourceSubject, TensorcastSemanticValidationSpec, TensorSchemaEntry, - _DirectServingLoad, + _DirectRuntimeLoad, _LocalReadyBootstrap, _LocalReadyFinalize, _RetainedBindingAcquire, - _ServingReload, - bind_serving_artifact, + _RuntimeReload, + bind_runtime_artifact, build_local_ready_prepared_artifact, is_runtime_binding_swap_capable, local_ready_current_value_summary_fields, restore_prepared_local_ready_binding, restore_retained_binding, runtime_binding_state_from_runtime_view, - serving_placement_from_framework_facts, + runtime_placement_from_framework_facts, source_selection_projection_from_artifact_realization_report, source_selection_projection_from_execution_diagnostics, source_selection_projection_from_materialization_diagnostics, source_subject_broadcast_payload, source_subject_from_broadcast_payload, - swap_serving_artifact, + swap_runtime_artifact, ) -from tensorcast.serving._runtime_impl.lifecycle import ( +from tensorcast.artifact_runtime.lifecycle import ( BindingValueRef as IntegrationBindingValueRef, ) -from tensorcast.serving._runtime_impl.lifecycle import ( - ServingBindingMemberRef as IntegrationServingBindingMemberRef, -) -from tensorcast.serving.admin import AdminLocalSourceBootstrap -from tensorcast.serving.contract import logical_topology_json -from tensorcast.serving.diagnostics import ( - binding_layout_debug_payload, - binding_layout_profile_fields, - binding_layout_tensor_count, +from tensorcast.artifact_runtime.lifecycle import ( + RuntimeBindingMemberRef as IntegrationRuntimeBindingMemberRef, ) -from tensorcast.serving.local_ready import ( +from tensorcast.artifact_runtime.locator import ArtifactLocator +from tensorcast.artifact_runtime.recipe.local_ready import ( canonical_index_entries_from_tensor_schema, logical_topology_json_from_recipe, ) -from tensorcast.serving.retained_binding import ( - ParsedRetainedServingBindingAuthority, - RetainedServingBindingExpectedDigests, +from tensorcast.pytorch.module_binding import TorchModuleAdapterMixin +from tensorcast.retained_realization_authority import ( + ParsedRetainedRealizationAuthority, + RetainedRealizationExpectedDigests, ) from tensorcast.types import ( BindingReservationCapability, BindingValueRef, - ServingBindingMemberRef, - ServingTopologyRef, + RuntimeBindingMemberRef, + RuntimeTopologyRef, ) +def _find_spec_or_none(module_name: str): + try: + return importlib.util.find_spec(module_name) + except ModuleNotFoundError: + return None + + def _profile_records(tmp_path) -> list[dict[str, object]]: return [ json.loads(line) @@ -167,7 +177,7 @@ def _matrix_placement( pp_size: int = 1, dp_size: int = 1, eplb_digest: str | None = None, -) -> ServingPlacement: +) -> RuntimePlacement: framework_payload = { "family": "vllm_parallelism", "version": "v1", @@ -200,7 +210,7 @@ def _matrix_placement( eplb_physical_to_logical_digest=eplb_digest, semantic_placement_digests=framework_payload["semantic_placement_digests"], ) - return serving_placement_from_framework_facts( + return runtime_placement_from_framework_facts( identity_facts=PlacementIdentityFacts( tensor_parallel_rank=0, tensor_parallel_size=tp_size, @@ -281,14 +291,12 @@ def test_integration_host_contract_skeleton_and_default_admission(): framework=_ContractFrameworkHost(), placement=_ContractPlacementHost(), ) - service = ServingIntegration(host=host) + service = ArtifactRuntimeIntegration(host=host) assert service.host is host decision = DefaultAdmissionPolicy().admit( AdmissionRequest( - intent=ExistingServingArtifact( - ServingArtifactLocator.artifact_ref("artifact:1") - ), + intent=ExistingRuntimeArtifact(ArtifactLocator.artifact_ref("artifact:1")), framework_identity=FrameworkIdentity( framework_name="fake", framework_version="1", @@ -330,7 +338,7 @@ def identity_payload(self, framework_config): "tp_world_size": 1, } - integration = ServingIntegration( + integration = ArtifactRuntimeIntegration( host=IntegrationHost( framework=_ContractFrameworkHost(), placement=_PayloadPlacementHost(), @@ -384,7 +392,7 @@ def test_placement_identity_payload_includes_schema_versions(): expert_parallel_enabled=True, semantic_placement_digests={"expert_mapping": "expert-digest"}, ) - placement = serving_placement_from_framework_facts( + placement = runtime_placement_from_framework_facts( identity_facts=identity, admission_facts=admission, member_facts=PlacementMemberFacts( @@ -410,15 +418,15 @@ def test_placement_identity_payload_includes_schema_versions(): def test_existing_serving_artifact_rejects_local_source_selector(): - service = ServingIntegration() - with pytest.raises(ServingIntegrationError, match="LocalSourceBootstrap"): + service = ArtifactRuntimeIntegration() + with pytest.raises(ArtifactRuntimeIntegrationError, match="LocalSourceBootstrap"): service.start( - ExistingServingArtifact(SourceSelector.local_path("/tmp/model")), + ExistingRuntimeArtifact(SourceSelector.local_path("/tmp/model")), RequestContext(), ) - with pytest.raises(ServingIntegrationError, match="local_path"): + with pytest.raises(ArtifactRuntimeIntegrationError, match="local_path"): service.start( - ExistingServingArtifact( + ExistingRuntimeArtifact( { "kind": "local_path", "value": "/tmp/model", @@ -430,10 +438,10 @@ def test_existing_serving_artifact_rejects_local_source_selector(): def test_retained_binding_acquire_rejects_arbitrary_authority_object(): with pytest.raises( - ServingIntegrationError, - match="ParsedRetainedServingBindingAuthority", + ArtifactRuntimeIntegrationError, + match="ParsedRetainedRealizationAuthority", ): - RetainedBindingAcquire(SimpleNamespace(readiness="serving_local_ready")) + RetainedBindingAcquire(SimpleNamespace(readiness="runtime_local_ready")) authority = _authority() assert RetainedBindingAcquire(authority).authority is authority @@ -455,12 +463,12 @@ def test_public_local_source_bootstrap_excludes_admin_override_fields(): ) -def test_serving_runtime_session_plans_direct_start_from_config(monkeypatch): +def test_artifact_runtime_session_plans_direct_start_from_config(monkeypatch): captured = {} state = RuntimeBindingState( runtime_view=RuntimeBindingView( serving_artifact_ref="mi2:serving", - readiness="serving", + readiness="runtime_ready", ) ) attachment = RuntimeAttachment( @@ -480,15 +488,15 @@ def fake_start(self, intent, context): "ensure_initialized", lambda self: captured.setdefault("runtime_initialized", self), ) - monkeypatch.setattr(ServingIntegration, "start", fake_start) + monkeypatch.setattr(ArtifactRuntimeIntegration, "start", fake_start) - session = ServingRuntimeSession.from_config( - ServingConfig.from_mapping( + session = ArtifactRuntimeSession.from_config( + TensorCastRuntimeConfig.from_mapping( { "bootstrap": { "mode": "disabled", }, - "serving": { + "runtime_artifact": { "artifact_locator": { "kind": "artifact_ref", "value": "mi2:serving", @@ -504,17 +512,17 @@ def fake_start(self, intent, context): result = session.start(RequestContext(model_config=object())) assert result is attachment - assert captured["runtime_initialized"] is session.serving_config.runtime - assert isinstance(captured["intent"], ExistingServingArtifact) + assert captured["runtime_initialized"] is session.runtime_config.runtime + assert isinstance(captured["intent"], ExistingRuntimeArtifact) assert captured["intent"].artifact_locator.kind == "artifact_ref" -def test_serving_runtime_session_private_intent_initializes_runtime(monkeypatch): +def test_artifact_runtime_session_private_intent_initializes_runtime(monkeypatch): captured = {} state = RuntimeBindingState( runtime_view=RuntimeBindingView( serving_artifact_ref="mi2:serving", - readiness="serving", + readiness="runtime_ready", ) ) attachment = RuntimeAttachment( @@ -535,13 +543,13 @@ def fake_start(self, intent, context): captured["context"] = context return attachment - monkeypatch.setattr(ServingIntegration, "start", fake_start) - session = ServingRuntimeSession.from_config( + monkeypatch.setattr(ArtifactRuntimeIntegration, "start", fake_start) + session = ArtifactRuntimeSession.from_config( { "bootstrap": { "mode": "disabled", }, - "serving": { + "runtime_artifact": { "artifact_locator": { "kind": "artifact_ref", "value": "mi2:serving", @@ -561,11 +569,11 @@ def fake_start(self, intent, context): result = session._start_intent(intent, RequestContext(model_config=object())) assert result is attachment - assert captured["runtime_initialized"] is session.serving_config.runtime + assert captured["runtime_initialized"] is session.runtime_config.runtime assert captured["intent"] is intent -def test_serving_runtime_session_rejects_conflicting_start_config(monkeypatch): +def test_artifact_runtime_session_rejects_conflicting_start_config(monkeypatch): initialized = False def fail_if_initialized(self): @@ -578,13 +586,13 @@ def fail_if_initialized(self): "ensure_initialized", fail_if_initialized, ) - session = ServingRuntimeSession.from_config( - ServingConfig.from_mapping( + session = ArtifactRuntimeSession.from_config( + TensorCastRuntimeConfig.from_mapping( { "bootstrap": { "mode": "required", }, - "serving": { + "runtime_artifact": { "artifact_locator": { "kind": "artifact_ref", "value": "mi2:serving", @@ -603,7 +611,7 @@ def fail_if_initialized(self): assert not initialized -def test_serving_runtime_session_uses_source_host_for_local_bootstrap(monkeypatch): +def test_artifact_runtime_session_uses_source_host_for_local_bootstrap(monkeypatch): captured = {} attachment = RuntimeAttachment( model=object(), @@ -634,10 +642,10 @@ def fake_start(self, intent, context): "ensure_initialized", lambda self: captured.setdefault("runtime_initialized", self), ) - monkeypatch.setattr(ServingIntegration, "start", fake_start) + monkeypatch.setattr(ArtifactRuntimeIntegration, "start", fake_start) - session = ServingRuntimeSession.from_config( - ServingConfig.from_mapping( + session = ArtifactRuntimeSession.from_config( + TensorCastRuntimeConfig.from_mapping( { "bootstrap": { "mode": "required", @@ -658,7 +666,7 @@ def fake_start(self, intent, context): ) assert result is attachment - assert captured["runtime_initialized"] is session.serving_config.runtime + assert captured["runtime_initialized"] is session.runtime_config.runtime assert captured["source_selector_args"][0] == "framework-config" assert isinstance(captured["intent"], LocalSourceBootstrap) assert captured["intent"].source_selector == SourceSelector.local_path( @@ -666,19 +674,19 @@ def fake_start(self, intent, context): ) -def test_serving_runtime_session_rejects_local_reload_artifact_locator(monkeypatch): +def test_artifact_runtime_session_rejects_local_reload_artifact_locator(monkeypatch): monkeypatch.setattr( integration_mod.tc_runtime_config.RuntimeSettings, "ensure_initialized", lambda self: pytest.fail("local artifact locator rejection must precede init"), ) - session = ServingRuntimeSession.from_config( - ServingConfig.from_mapping( + session = ArtifactRuntimeSession.from_config( + TensorCastRuntimeConfig.from_mapping( { "bootstrap": { "mode": "disabled", }, - "serving": { + "runtime_artifact": { "artifact_locator": { "kind": "artifact_ref", "value": "mi2:serving", @@ -714,7 +722,7 @@ def test_serving_runtime_session_rejects_local_reload_artifact_locator(monkeypat policy=None, context=RequestContext(), ) - with pytest.raises(ConfigConflictError, match="ServingArtifactLocator"): + with pytest.raises(ConfigConflictError, match="ArtifactLocator"): session.reload( current_attachment=attachment, artifact_locator={ @@ -724,10 +732,10 @@ def test_serving_runtime_session_rejects_local_reload_artifact_locator(monkeypat policy=None, context=RequestContext(), ) - with pytest.raises(ConfigConflictError, match="ServingPolicy"): + with pytest.raises(ConfigConflictError, match="RuntimePolicy"): session.reload( current_attachment=attachment, - artifact_locator=ServingArtifactLocator.artifact_ref("mi2:serving-next"), + artifact_locator=ArtifactLocator.artifact_ref("mi2:serving-next"), policy={"mode": "from_manifest"}, context=RequestContext(), ) @@ -768,14 +776,14 @@ def admit(self, request): endpoint_fields={}, ) - decision = ServingIntegration( + decision = ArtifactRuntimeIntegration( host=IntegrationHost( framework=_ContractFrameworkHost(), placement=_MultiDimPlacementHost(), admission=_Admission(), ) )._admit_intent( - ExistingServingArtifact(ServingArtifactLocator.artifact_ref("mi2:serving")), + ExistingRuntimeArtifact(ArtifactLocator.artifact_ref("mi2:serving")), RequestContext(framework_config=object(), model_config=object()), ) @@ -791,7 +799,7 @@ def test_runtime_worker_view_projection_is_typed_not_diagnostics_only(): representation_contract_hash="repr", tensor_schema_hash="schema", local_serving_ref="local:ready", - readiness="serving_local_ready", + readiness="runtime_local_ready", diagnostics={ "serving_build_digest": "build", "family": "fake-family", @@ -813,7 +821,7 @@ def test_runtime_worker_view_projection_is_typed_not_diagnostics_only(): runtime_view=runtime_view, ) - worker_view = ServingIntegration().describe(state) + worker_view = ArtifactRuntimeIntegration().describe(state) assert isinstance(worker_view, RuntimeWorkerView) payload = worker_view.endpoint.to_weight_version_payload() @@ -823,7 +831,7 @@ def test_runtime_worker_view_projection_is_typed_not_diagnostics_only(): assert payload["representation_contract_hash"] == "repr" assert payload["serving_build_digest"] == "build" assert payload["tensor_schema_hash"] == "schema" - assert payload["readiness"] == "serving_local_ready" + assert payload["readiness"] == "runtime_local_ready" assert payload["family"] == "fake-family" assert payload["tp_rank"] == 1 assert payload["tp_world_size"] == 2 @@ -839,12 +847,12 @@ def test_runtime_worker_view_projection_is_typed_not_diagnostics_only(): assert worker_view.diagnostics["verification_job_id"] == "job-1" -def test_runtime_worker_view_ignores_redundant_source_selection_diagnostics(): +def test_runtime_worker_view_preserves_explicit_source_selection_diagnostics(): runtime_view = RuntimeBindingView( serving_artifact_ref="mi2:serving", representation_contract_hash="repr", tensor_schema_hash="schema", - readiness="serving", + readiness="runtime_ready", diagnostics={ "source_selection": { "selected_source_kind": "canonical_fallback", @@ -857,7 +865,15 @@ def test_runtime_worker_view_ignores_redundant_source_selection_diagnostics(): worker_view = RuntimeWorkerView.from_runtime_view(runtime_view) payload = worker_view.endpoint.to_weight_version_payload() - assert "source_selection" not in payload + assert payload["source_selection"] == { + "schema_version": 1, + "selected_source_kind": "canonical_fallback", + "p2p_bytes": 0, + "fallback_bytes": 2048, + "disk_bytes": 0, + "reselection_attempts": 0, + "fallback_reason_bucket": "transport_unavailable", + } def test_source_selection_projection_from_materialization_diagnostics(): @@ -931,7 +947,7 @@ def test_execution_diagnostics_seed_runtime_source_selection_projection(): serving_build_digest="build", ), ) - seed = ServingIntegration._state_seed( + seed = ArtifactRuntimeIntegration._state_seed( resolved, tensor_schema_hash="schema", execution_diagnostics=SimpleNamespace( @@ -985,7 +1001,7 @@ def test_artifact_realization_report_seeds_runtime_source_selection_projection() serving_artifact_ref="mi2:serving", representation_contract_hash="repr", tensor_schema_hash="schema", - readiness="serving", + readiness="runtime_ready", diagnostics={"artifact_realization_report": report}, ) worker_view = RuntimeWorkerView.from_runtime_view(runtime_view) @@ -1023,7 +1039,7 @@ def test_artifact_realization_report_fallback_uses_strategy_and_envelope_facts() serving_artifact_ref="mi2:serving", representation_contract_hash="repr", tensor_schema_hash="schema", - readiness="serving", + readiness="runtime_ready", diagnostics={"artifact_realization_report": report}, ) worker_view = RuntimeWorkerView.from_runtime_view(runtime_view) @@ -1042,7 +1058,7 @@ def test_materialization_diagnostics_seed_runtime_source_selection_projection(): ), ) - seed = ServingIntegration._state_seed( + seed = ArtifactRuntimeIntegration._state_seed( resolved, tensor_schema_hash="schema", materialization_diagnostics={ @@ -1096,7 +1112,7 @@ def test_runtime_binding_result_captures_materialization_diagnostics(): def test_local_bootstrap_requires_host_source_catalog_provider(): - service = ServingIntegration( + service = ArtifactRuntimeIntegration( host=IntegrationHost( framework=_ContractFrameworkHost(), placement=_ContractPlacementHost(), @@ -1140,7 +1156,7 @@ def build_catalog(self, request): placement=_ContractPlacementHost(), source_catalog=provider, ) - service = ServingIntegration(host=host) + service = ArtifactRuntimeIntegration(host=host) source_subject = SourceSubject( artifact_ref="mi2:source", subject=object(), @@ -1181,7 +1197,7 @@ def build_catalog(request): return SimpleNamespace() return SimpleNamespace(source_artifact_ref=provider_ref) - service = ServingIntegration( + service = ArtifactRuntimeIntegration( host=IntegrationHost( framework=_ContractFrameworkHost(), placement=_ContractPlacementHost(), @@ -1193,7 +1209,7 @@ def build_catalog(request): subject=object(), ) - with pytest.raises(ServingIntegrationError, match=expected): + with pytest.raises(ArtifactRuntimeIntegrationError, match=expected): service._local_ready_source_catalog( _LocalReadyBootstrap( source_selector=SourceSelector.local_path("/tmp/model"), @@ -1206,9 +1222,11 @@ def build_catalog(request): def test_local_ready_build_recipe_requires_real_source_subject_artifact_ref(): - service = ServingIntegration() + service = ArtifactRuntimeIntegration() - with pytest.raises(ServingIntegrationError, match="real source artifact identity"): + with pytest.raises( + ArtifactRuntimeIntegrationError, match="real source artifact identity" + ): service._local_ready_prepare_with_built_recipe( _LocalReadyBootstrap( source_selector=SourceSelector.local_path("/tmp/model"), @@ -1268,7 +1286,7 @@ def test_recipe_cache_policy_builds_model_adjacent_cache_config(tmp_path): } ) - config = ServingIntegration._local_ready_recipe_cache_config( + config = ArtifactRuntimeIntegration._local_ready_recipe_cache_config( _LocalReadyBootstrap(cache_config=policy), source_catalog=source_catalog, ) @@ -1308,7 +1326,7 @@ def test_local_source_bootstrap_start_derives_request_from_host(monkeypatch): source_artifact_ref="mi2:source", representation_contract_hash="repr", tensor_schema_hash="schema", - readiness="serving_local_ready", + readiness="runtime_local_ready", ) runtime_state = RuntimeBindingState( binding=object(), @@ -1353,11 +1371,11 @@ def recipe_cache_policy(self, framework_config, model_config): def fake_prepare(self, request): del self captured["request"] = request - return integration_mod.LocalReadyServingResult( + return integration_mod.LocalReadyRuntimeResult( model=model, runtime_state=runtime_state, runtime_view=runtime_view, - prepared=integration_mod.PreparedServingArtifact( + prepared=integration_mod.PreparedRuntimeArtifact( source_artifact_ref="mi2:source", serving_manifest_ref="manifest-ref", representation_contract_hash="repr", @@ -1369,10 +1387,10 @@ def fake_prepare(self, request): ) monkeypatch.setattr( - ServingIntegration, "_prepare_local_source_bootstrap", fake_prepare + ArtifactRuntimeIntegration, "_prepare_local_source_bootstrap", fake_prepare ) - attachment = ServingIntegration( + attachment = ArtifactRuntimeIntegration( host=IntegrationHost( framework=_ContractFrameworkHost(), placement=_ContractPlacementHost(), @@ -1440,7 +1458,7 @@ def resolve(self, artifact_ref): tensor_names=(), ) - service = ServingIntegration( + service = ArtifactRuntimeIntegration( resolver=_Resolver(), host=IntegrationHost( framework=_ContractFrameworkHost(), @@ -1448,9 +1466,11 @@ def resolve(self, artifact_ref): ), ) - with pytest.raises(ServingIntegrationError, match="TensorSurfaceHost") as exc_info: + with pytest.raises( + ArtifactRuntimeIntegrationError, match="TensorSurfaceHost" + ) as exc_info: service.start( - ExistingServingArtifact(ServingArtifactLocator.artifact_ref("mi2:serving")), + ExistingRuntimeArtifact(ArtifactLocator.artifact_ref("mi2:serving")), RequestContext( framework_config=object(), model_config=SimpleNamespace(model="fake"), @@ -1489,7 +1509,7 @@ def cleanup_after_recipe_build( def support_level(self, model, model_config): self.events.append(("support", model, model_config)) - return ServingSupportLevel.RUNTIME_BIND_SWAP_READY + return RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY def process_after_load_class(self, model, model_config): self.events.append(("process_class", model, model_config)) @@ -1500,7 +1520,7 @@ def post_bind_finalize_class(self, model, model_config): return FinalizeClass.RUNTIME_ONLY framework = _TraceFrameworkHost() - integration = ServingIntegration( + integration = ArtifactRuntimeIntegration( host=IntegrationHost( framework=framework, placement=_ContractPlacementHost(), @@ -1521,7 +1541,7 @@ def post_bind_finalize_class(self, model, model_config): ) assert ( integration.support_level("model", "model-config") - is ServingSupportLevel.RUNTIME_BIND_SWAP_READY + is RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY ) assert ( integration.process_after_load_class("model", "model-config") @@ -1541,7 +1561,7 @@ def post_bind_finalize_class(self, model, model_config): def test_integration_host_fails_recipe_trace_miss_clearly(): - integration = ServingIntegration( + integration = ArtifactRuntimeIntegration( host=IntegrationHost( framework=_ContractFrameworkHost(), placement=_ContractPlacementHost(), @@ -1549,7 +1569,7 @@ def test_integration_host_fails_recipe_trace_miss_clearly(): ) ) - with pytest.raises(ServingIntegrationError, match="RecipeTraceHost"): + with pytest.raises(ArtifactRuntimeIntegrationError, match="RecipeTraceHost"): integration.trace_model_load(object(), (), {}) @@ -1634,7 +1654,7 @@ def test_local_ready_current_value_summary_is_core_owned(): "verification_state": "local_only", "local_serving_ref": "binding-local:binding-1:value-1", } - with pytest.raises(integration_mod.ServingIntegrationError): + with pytest.raises(integration_mod.ArtifactRuntimeIntegrationError): local_ready_current_value_summary_fields( SimpleNamespace(binding_value_id="value-1"), require_local_serving_ref=True, @@ -1699,14 +1719,15 @@ def test_build_local_ready_prepared_artifact_returns_runtime_state_and_view(): assert result.runtime_state.artifact_ref == "mi2:test:source" assert result.runtime_view.source_artifact_ref == "mi2:test:source" assert result.runtime_view.serving_artifact_ref is None - assert result.runtime_view.readiness == "serving_local_ready" + assert result.runtime_view.readiness == "runtime_local_ready" assert result.runtime_view.local_serving_ref == ("binding-local:binding-1:value-1") assert result.runtime_view.tensor_schema_hash == "schema" - report = result.runtime_view.diagnostics["serving_realization_report"] + report = result.runtime_view.diagnostics["runtime_realization_report"] + assert result.runtime_view.diagnostics["serving_realization_report"] is report assert report["realization"]["binding_value"]["verification_state"] == "local_only" assert "verification_state" not in result.runtime_view.diagnostics assert result.binding_value is not None - assert result.binding_value.readiness == "serving_local_ready" + assert result.binding_value.readiness == "runtime_local_ready" assert result.binding_value.local_serving_ref == "binding-local:binding-1:value-1" worker_view = RuntimeWorkerView.from_runtime_view(result.runtime_view) payload = worker_view.endpoint.to_weight_version_payload() @@ -1725,28 +1746,28 @@ def test_build_local_ready_prepared_artifact_returns_runtime_state_and_view(): def test_serving_integration_builds_local_ready_manifest_contract_in_core(monkeypatch): calls = [] - integration = ServingIntegration() + integration = ArtifactRuntimeIntegration() recipe = SimpleNamespace(topology_ref=object(), member_ref=object()) monkeypatch.setattr( - integration_mod, + local_ready_mod, "canonical_index_from_recipe", lambda seen_recipe: calls.append(("canonical", seen_recipe)) or "canonical", ) monkeypatch.setattr( - integration_mod, - "compute_serving_tensor_schema_hash", + contract_mod, + "compute_canonical_runtime_tensor_schema_hash", lambda canonical, **kwargs: calls.append(("schema", canonical, kwargs)) or "schema-hash", ) monkeypatch.setattr( - integration_mod, + local_ready_mod, "logical_topology_json_from_recipe", lambda seen_recipe, **kwargs: calls.append(("topology", seen_recipe, kwargs)) or '{"topology": true}', ) monkeypatch.setattr( - integration_mod, + local_ready_mod, "prepare_same_binding_manifest_carrier", lambda seen_recipe, **kwargs: calls.append(("carrier", seen_recipe, kwargs)) or ("manifest-ref", b"manifest"), @@ -1797,10 +1818,10 @@ def test_serving_integration_builds_local_ready_manifest_contract_in_core(monkey def test_local_ready_logical_topology_requires_topology_ref(): recipe = SimpleNamespace( - topology_ref=ServingTopologyRef(schema_topology_digest="a") + topology_ref=RuntimeTopologyRef(schema_topology_digest="a") ) - with pytest.raises(ValueError, match="requires ServingTopologyRef"): + with pytest.raises(ValueError, match="requires RuntimeTopologyRef"): logical_topology_json_from_recipe(recipe) @@ -1820,10 +1841,10 @@ def test_serving_integration_builds_local_ready_manifest_from_framework_context( adapter_version=lambda: "adapter-v1", serving_abi_version=lambda _model_config: "abi-v1", ) - integration = ServingIntegration(host=_host_for_adapter(adapter)) + integration = ArtifactRuntimeIntegration(host=_host_for_adapter(adapter)) recipe = SimpleNamespace(topology_ref=object(), member_ref=object()) - placement = ServingPlacement( - topology=ServingTopologyRef( + placement = RuntimePlacement( + topology=RuntimeTopologyRef( schema_topology_digest="digest", logical_topology_ref="fake://topology", ), @@ -1837,29 +1858,29 @@ def test_serving_integration_builds_local_ready_manifest_from_framework_context( ) monkeypatch.setattr( - integration_mod, + local_ready_mod, "canonical_index_from_recipe", lambda seen_recipe: calls.append(("canonical", seen_recipe)) or "canonical", ) monkeypatch.setattr( - integration_mod, - "compute_serving_tensor_schema_hash", + contract_mod, + "compute_canonical_runtime_tensor_schema_hash", lambda canonical, **kwargs: calls.append(("schema", canonical, kwargs)) or "schema-hash", ) monkeypatch.setattr( - integration_mod, + contract_mod, "compute_runtime_representation_contract_hash", lambda **kwargs: calls.append(("repr", kwargs)) or "repr-hash", ) monkeypatch.setattr( - integration_mod, + local_ready_mod, "logical_topology_json_from_recipe", lambda seen_recipe, **kwargs: calls.append(("topology", seen_recipe, kwargs)) or '{"topology": true}', ) monkeypatch.setattr( - integration_mod, + local_ready_mod, "prepare_same_binding_manifest_carrier", lambda seen_recipe, **kwargs: calls.append(("carrier", seen_recipe, kwargs)) or ("manifest-ref", b"manifest"), @@ -1913,9 +1934,9 @@ def test_serving_integration_prepares_manifest_carrier_result(monkeypatch): adapter_version=lambda: "adapter-v1", serving_abi_version=lambda _model_config: "abi-v1", ) - integration = ServingIntegration(host=_host_for_adapter(adapter)) - placement = ServingPlacement( - topology=ServingTopologyRef( + integration = ArtifactRuntimeIntegration(host=_host_for_adapter(adapter)) + placement = RuntimePlacement( + topology=RuntimeTopologyRef( schema_topology_digest="digest", logical_topology_ref="fake://topology", ), @@ -1926,12 +1947,12 @@ def test_serving_integration_prepares_manifest_carrier_result(monkeypatch): carrier_bytes = b"manifest-bytes" monkeypatch.setattr( - ServingIntegration, + ArtifactRuntimeIntegration, "build_local_ready_manifest_carrier_from_framework_context", lambda _self, **_kwargs: ("repr-hash", carrier_bytes), ) monkeypatch.setattr( - integration_mod.ServingArtifactManifest, + integration_mod.RuntimeArtifactManifest, "from_bytes", lambda seen: SimpleNamespace( serving_manifest_ref=f"manifest:{seen!r}", @@ -1956,10 +1977,10 @@ def test_serving_integration_prepares_manifest_carrier_result(monkeypatch): def test_serving_integration_builds_local_ready_binding_contract(monkeypatch): - integration = ServingIntegration() + integration = ArtifactRuntimeIntegration() monkeypatch.setattr( - integration_mod, - "compute_serving_binding_tensor_schema_hash", + local_ready_mod, + "compute_runtime_binding_tensor_schema_hash", lambda *_args, **_kwargs: "schema-hash", ) recipe = SimpleNamespace( @@ -1998,7 +2019,7 @@ def test_serving_integration_builds_local_ready_binding_contract(monkeypatch): def test_serving_integration_owns_local_ready_recipe_fields(): - integration = ServingIntegration() + integration = ArtifactRuntimeIntegration() recipe = SimpleNamespace( trace_plan=SimpleNamespace( copy_plan=(1, 2), @@ -2012,7 +2033,7 @@ def test_serving_integration_owns_local_ready_recipe_fields(): realization_fallback_plan=(1,), source_artifact_ref="mi2:test:source", source_metadata_fingerprint="meta-fingerprint", - serving_facts=SimpleNamespace( + runtime_facts=SimpleNamespace( process_after_load_class=FinalizeClass.REPRESENTATION_CHANGING ), ) @@ -2165,7 +2186,7 @@ def support_level(self, model, model_config): support_level = getattr(self.adapter, "support_level", None) if callable(support_level): return support_level(model, model_config) - return ServingSupportLevel.RUNTIME_BIND_SWAP_READY + return RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY def process_after_load_class(self, model, model_config): process_after_load = getattr(self.adapter, "process_after_load_class", None) @@ -2268,8 +2289,8 @@ def close(self): self.closed = True -def _member() -> ServingBindingMemberRef: - return ServingBindingMemberRef( +def _member() -> RuntimeBindingMemberRef: + return RuntimeBindingMemberRef( member_id="member-0", member_index=0, member_count=1, @@ -2286,7 +2307,7 @@ def _binding_ref() -> BindingValueRef: ) -def _authority() -> ParsedRetainedServingBindingAuthority: +def _authority() -> ParsedRetainedRealizationAuthority: member = _member() binding_ref = _binding_ref() capability = BindingReservationCapability( @@ -2299,7 +2320,7 @@ def _authority() -> ParsedRetainedServingBindingAuthority: reservation_bytes=4096, scope_digest="scope-1", ) - return ParsedRetainedServingBindingAuthority( + return ParsedRetainedRealizationAuthority( group_id="group-1", local_serving_ref="binding-local:binding-1:value-1", binding_value_ref=binding_ref, @@ -2309,23 +2330,23 @@ def _authority() -> ParsedRetainedServingBindingAuthority: device_uuid="gpu-0", member=member, reservation_bytes=4096, - expected=RetainedServingBindingExpectedDigests( + expected=RetainedRealizationExpectedDigests( target_layout_hash="layout-hash", tensor_schema_hash="schema-hash", - serving_build_digest="build-digest", + runtime_build_digest="build-digest", resolved_spec_digest="spec-digest", ), - readiness="serving_local_ready", + readiness="runtime_local_ready", verification_state="local_only", ) def test_framework_boundary_reexports_serving_identity_types(): assert IntegrationBindingValueRef is BindingValueRef - assert IntegrationServingBindingMemberRef is ServingBindingMemberRef + assert IntegrationRuntimeBindingMemberRef is RuntimeBindingMemberRef assert SERVING_MANIFEST_TENSOR_NAME.startswith("__tensorcast_meta__.") assert FinalizeClass.RUNTIME_ONLY.value == "runtime_only" - assert ServingSupportLevel.RUNTIME_BIND_SWAP_READY.value + assert RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY.value def test_retained_binding_authority_uses_parsed_retained_authority(): @@ -2339,9 +2360,9 @@ def test_retained_binding_authority_uses_parsed_retained_authority(): assert parsed.member.group_id == "group-1" assert parsed.expected.target_layout_hash == "layout-hash" assert parsed.expected.tensor_schema_hash == "schema-hash" - assert parsed.expected.serving_build_digest == "build-digest" + assert parsed.expected.runtime_build_digest == "build-digest" assert parsed.expected.resolved_spec_digest == "spec-digest" - assert parsed.readiness == "serving_local_ready" + assert parsed.readiness == "runtime_local_ready" assert parsed.local_serving_ref == "binding-local:binding-1:value-1" @@ -2359,8 +2380,8 @@ def test_serving_integration_p15_request_contract_smoke(): state.close() assert closed == ["binding"] - placement = ServingPlacement( - topology=ServingTopologyRef( + placement = RuntimePlacement( + topology=RuntimeTopologyRef( schema_topology_digest="digest", logical_topology_ref="fake://topology", ), @@ -2369,13 +2390,13 @@ def test_serving_integration_p15_request_contract_smoke(): identity_payload={"rank": 0}, ) adapter = _MaterializationAdapter() - integration = ServingIntegration( + integration = ArtifactRuntimeIntegration( host=_host_for_adapter(adapter), profile_sink=lambda _event: None, ) assert integration.host.framework.identity(None).framework_name == "fakefw" - identity = ServingBindingPlan( + identity = RuntimeBindingPlan( model_hash="hash", model_id="fake-model", model_revision=None, @@ -2404,13 +2425,13 @@ def test_serving_integration_p15_request_contract_smoke(): ), ) for request, method in request_and_method: - with pytest.raises(ServingIntegrationNotImplementedError): + with pytest.raises(ArtifactRuntimeNotImplementedError): method(request) def test_serving_integration_builds_recipe_session_identity_from_request(): - placement = ServingPlacement( - topology=ServingTopologyRef( + placement = RuntimePlacement( + topology=RuntimeTopologyRef( schema_topology_digest="digest", logical_topology_ref="fake://topology", ), @@ -2424,7 +2445,7 @@ def test_serving_integration_builds_recipe_session_identity_from_request(): adapter_version=lambda: "adapter-v1", serving_abi_version=lambda _model_config: "abi-v1", ) - integration = ServingIntegration(host=_host_for_adapter(adapter)) + integration = ArtifactRuntimeIntegration(host=_host_for_adapter(adapter)) model_config = SimpleNamespace( model="fake-model", revision="rev-a", @@ -2454,7 +2475,7 @@ def test_serving_integration_builds_recipe_session_identity_from_request(): def test_serving_integration_load_and_reload_use_materialization(): adapter = _MaterializationAdapter() - integration = ServingIntegration(host=_host_for_adapter(adapter)) + integration = ArtifactRuntimeIntegration(host=_host_for_adapter(adapter)) resolved = SimpleNamespace( artifact=_Artifact(), artifact_ref="mi2:test:serving-a", @@ -2465,8 +2486,8 @@ def test_serving_integration_load_and_reload_use_materialization(): ), ) - load_result = integration._load_existing_serving_artifact( - _DirectServingLoad( + load_result = integration._load_existing_runtime_artifact( + _DirectRuntimeLoad( resolved_artifact=resolved, framework_config=SimpleNamespace(name="framework"), model_config=SimpleNamespace(name="model"), @@ -2474,12 +2495,12 @@ def test_serving_integration_load_and_reload_use_materialization(): ) ) - assert isinstance(load_result, ServingLoadResult) + assert isinstance(load_result, RuntimeLoadResult) assert isinstance(load_result.runtime_state, RuntimeBindingState) assert load_result.runtime_view.serving_artifact_ref == "mi2:test:serving-a" assert load_result.runtime_view.source_artifact_ref == "mi2:test:source" assert load_result.runtime_view.representation_contract_hash == "repr-a" - assert load_result.runtime_view.readiness == "serving" + assert load_result.runtime_view.readiness == "runtime_ready" load_report = load_result.runtime_view.diagnostics["artifact_realization_report"] assert load_report["target_kind"] == "runtime_attachment" assert load_report["artifact_id"] == "mi2:test:serving-a" @@ -2503,8 +2524,8 @@ def test_serving_integration_load_and_reload_use_materialization(): serving_build_digest="build-b", ), ) - reload_result = integration._reload_existing_serving_artifact( - _ServingReload( + reload_result = integration._reload_existing_runtime_artifact( + _RuntimeReload( current_state=load_result.runtime_state, resolved_artifact=next_resolved, model=load_result.model, @@ -2513,7 +2534,7 @@ def test_serving_integration_load_and_reload_use_materialization(): ) ) - assert isinstance(reload_result, ServingReloadResult) + assert isinstance(reload_result, RuntimeReloadResult) assert reload_result.runtime_view.serving_artifact_ref == "mi2:test:serving-b" assert reload_result.runtime_view.representation_contract_hash == "repr-b" reload_report = reload_result.runtime_view.diagnostics[ @@ -2566,14 +2587,14 @@ def fake_build_options(self, **kwargs): return "bind-options", {"profile": True} monkeypatch.setattr( - ServingIntegration, "build_materialization_options", fake_build_options + ArtifactRuntimeIntegration, "build_materialization_options", fake_build_options ) - result = ServingIntegration( + result = ArtifactRuntimeIntegration( resolver=_Resolver(), host=_host_for_adapter(adapter), - )._load_existing_serving_artifact( - _DirectServingLoad( + )._load_existing_runtime_artifact( + _DirectRuntimeLoad( artifact_ref="mi2:test:serving", target_device=torch.device("cpu"), configured_collective_policy="collective-policy", @@ -2595,14 +2616,14 @@ def fake_build_options(self, **kwargs): resolved, { "expected_tensor_schema_hash": result.runtime_view.tensor_schema_hash, - "serving_runtime_policy": "manifest-policy", + "runtime_artifact_policy": "manifest-policy", }, ), ( "options", { "artifact_ref": "mi2:test:serving", - "operation_scope": "startup.direct_serving_artifact.bind", + "operation_scope": "startup.direct_runtime_artifact.bind", "configured_policy": "collective-policy", "source_bound_contract_state": SimpleNamespace( source_bound_contract_ready=True @@ -2641,15 +2662,15 @@ def cross_check(self, resolved_artifact, **kwargs): return resolved_artifact binding = _Bound() - integration = ServingIntegration(resolver=_Resolver()) + integration = ArtifactRuntimeIntegration(resolver=_Resolver()) current_state = RuntimeBindingState( binding=binding, artifact_ref="mi2:test:serving-current", runtime_view=RuntimeBindingView(tensor_schema_hash="schema-hash"), ) - result = integration._reload_existing_serving_artifact( - _ServingReload( + result = integration._reload_existing_runtime_artifact( + _RuntimeReload( current_state=current_state, artifact_ref="mi2:test:serving-next", target_device=torch.device("cpu"), @@ -2667,7 +2688,7 @@ def cross_check(self, resolved_artifact, **kwargs): resolved, { "expected_tensor_schema_hash": "schema-hash", - "serving_runtime_policy": "manifest-policy", + "runtime_artifact_policy": "manifest-policy", }, ), ] @@ -2692,14 +2713,14 @@ def resolve(self, artifact_ref): calls.append(("resolve", artifact_ref)) return resolved - member = ServingBindingMemberRef( + member = RuntimeBindingMemberRef( member_id="dp0:pp0:tp1", member_index=1, member_count=2, group_id="group-1", ) - placement = ServingPlacement( - topology=ServingTopologyRef( + placement = RuntimePlacement( + topology=RuntimeTopologyRef( group_id="group-1", schema_topology_digest="topology-digest", logical_topology_ref="fake://topology", @@ -2718,14 +2739,12 @@ def resolve_key_mapping_cached(self, *, key): "tensorcast.api.store.get_runtime_context", lambda: _RuntimeContext() ) - result = ServingIntegration(resolver=_Resolver())._resolved_artifact( + result = ArtifactRuntimeIntegration(resolver=_Resolver())._resolved_artifact( resolved_artifact=None, artifact_ref=None, - artifact_locator=ServingArtifactLocator.ranked_version_key( - "models/demo/serving/v1" - ), + artifact_locator=ArtifactLocator.ranked_version_key("models/demo/serving/v1"), expected_tensor_schema_hash=None, - serving_runtime_policy=None, + runtime_artifact_policy=None, placement=placement, ) @@ -2744,12 +2763,12 @@ def test_serving_integration_rejects_resolved_artifact_ref_mismatch(): ) with pytest.raises(ManifestMismatchError, match="artifact ref mismatch"): - ServingIntegration()._resolved_artifact( + ArtifactRuntimeIntegration()._resolved_artifact( resolved_artifact=resolved, artifact_ref="mi2:test:serving-rank-1", artifact_locator=None, expected_tensor_schema_hash=None, - serving_runtime_policy=None, + runtime_artifact_policy=None, ) @@ -2768,12 +2787,12 @@ def test_serving_integration_accepts_matching_topology_digest_and_logical_topolo ) assert ( - ServingIntegration()._resolved_artifact( + ArtifactRuntimeIntegration()._resolved_artifact( resolved_artifact=resolved, artifact_ref="mi2:test:serving-rank-0", artifact_locator=None, expected_tensor_schema_hash=None, - serving_runtime_policy=None, + runtime_artifact_policy=None, placement=placement, ) is resolved @@ -2812,12 +2831,12 @@ def test_serving_integration_rejects_topology_mismatch_matrix( ) with pytest.raises(ManifestMismatchError, match=match): - ServingIntegration()._resolved_artifact( + ArtifactRuntimeIntegration()._resolved_artifact( resolved_artifact=resolved, artifact_ref="mi2:test:serving-rank-0", artifact_locator=None, expected_tensor_schema_hash=None, - serving_runtime_policy=None, + runtime_artifact_policy=None, placement=current_placement, ) @@ -2837,12 +2856,12 @@ def test_serving_integration_rejects_logical_topology_mismatch_without_digest(): ) with pytest.raises(ManifestMismatchError, match="logical topology mismatch"): - ServingIntegration()._resolved_artifact( + ArtifactRuntimeIntegration()._resolved_artifact( resolved_artifact=resolved, artifact_ref="mi2:test:serving-rank-0", artifact_locator=None, expected_tensor_schema_hash=None, - serving_runtime_policy=None, + runtime_artifact_policy=None, placement=current_placement, ) @@ -2871,7 +2890,7 @@ def fake_build_options(self, **kwargs): return "swap-options", {"profile": True} monkeypatch.setattr( - ServingIntegration, "build_materialization_options", fake_build_options + ArtifactRuntimeIntegration, "build_materialization_options", fake_build_options ) binding = _Bound() current_state = RuntimeBindingState( @@ -2880,8 +2899,8 @@ def fake_build_options(self, **kwargs): runtime_view=RuntimeBindingView(tensor_schema_hash="schema-hash"), ) - ServingIntegration(resolver=_Resolver())._reload_existing_serving_artifact( - _ServingReload( + ArtifactRuntimeIntegration(resolver=_Resolver())._reload_existing_runtime_artifact( + _RuntimeReload( current_state=current_state, artifact_ref="mi2:test:serving-next", target_device=torch.device("cpu"), @@ -2916,16 +2935,16 @@ def fake_build_options(self, **kwargs): def test_serving_integration_reload_rejects_non_swap_capable_binding(): - integration = ServingIntegration() + integration = ArtifactRuntimeIntegration() current_state = RuntimeBindingState( binding=SimpleNamespace(), artifact_ref="mi2:test:serving-current", runtime_view=RuntimeBindingView(tensor_schema_hash="schema-hash"), ) - with pytest.raises(ServingIntegrationError, match="swap-capable"): - integration._reload_existing_serving_artifact( - _ServingReload( + with pytest.raises(ArtifactRuntimeIntegrationError, match="swap-capable"): + integration._reload_existing_runtime_artifact( + _RuntimeReload( current_state=current_state, artifact_ref="mi2:test:serving-next", target_device=torch.device("cpu"), @@ -2959,7 +2978,7 @@ def cross_check(self, resolved_artifact, **_kwargs): return resolved_artifact monkeypatch.setattr( - ServingIntegration, + ArtifactRuntimeIntegration, "build_materialization_options", lambda self, **_kwargs: ("swap-options", {}), ) @@ -2975,7 +2994,7 @@ def cross_check(self, resolved_artifact, **_kwargs): runtime_view=RuntimeBindingView(tensor_schema_hash="schema-hash"), ) - attachment = ServingIntegration( + attachment = ArtifactRuntimeIntegration( resolver=_Resolver(), host=IntegrationHost( framework=_ContractFrameworkHost(), @@ -2984,9 +3003,7 @@ def cross_check(self, resolved_artifact, **_kwargs): ), ).reload( current_state, - ExistingServingArtifact( - ServingArtifactLocator.artifact_ref("mi2:test:serving-next") - ), + ExistingRuntimeArtifact(ArtifactLocator.artifact_ref("mi2:test:serving-next")), RequestContext(model_config=SimpleNamespace(model="fake")), model=_Model(), ) @@ -2997,7 +3014,7 @@ def cross_check(self, resolved_artifact, **_kwargs): def test_serving_integration_load_prepared_local_ready_uses_restore(monkeypatch): adapter = _MaterializationAdapter() - integration = ServingIntegration(host=_host_for_adapter(adapter)) + integration = ArtifactRuntimeIntegration(host=_host_for_adapter(adapter)) binding_ref = _binding_ref() class _PreparedRestored: @@ -3039,8 +3056,8 @@ def fake_restore_prepared(**kwargs): ), ) - result = integration._load_existing_serving_artifact( - _DirectServingLoad( + result = integration._load_existing_runtime_artifact( + _DirectRuntimeLoad( resolved_artifact=resolved, model_config=SimpleNamespace(name="model"), target_device=torch.device("cpu"), @@ -3048,7 +3065,7 @@ def fake_restore_prepared(**kwargs): ) ) - assert result.runtime_view.readiness == "serving_local_ready" + assert result.runtime_view.readiness == "runtime_local_ready" assert result.runtime_view.binding_value_ref == binding_ref assert torch.equal(result.model.w.detach(), torch.full((1,), 3.0)) assert restored.transferred @@ -3056,14 +3073,14 @@ def fake_restore_prepared(**kwargs): def test_serving_integration_error_taxonomy_is_structured(): - assert issubclass(ManifestMismatchError, ServingIntegrationError) - assert issubclass(SchemaMismatchError, ServingIntegrationError) - assert issubclass(AdmissionRejectedError, ServingIntegrationError) - assert issubclass(AuthorityValidationError, ServingIntegrationError) - assert issubclass(CapabilityMissingError, ServingIntegrationError) - assert issubclass(PlacementAdmissionError, ServingIntegrationError) - assert issubclass(ArtifactLocatorResolutionError, ServingIntegrationError) - assert issubclass(SourceProviderError, ServingIntegrationError) + assert issubclass(ManifestMismatchError, ArtifactRuntimeIntegrationError) + assert issubclass(SchemaMismatchError, ArtifactRuntimeIntegrationError) + assert issubclass(AdmissionRejectedError, ArtifactRuntimeIntegrationError) + assert issubclass(AuthorityValidationError, ArtifactRuntimeIntegrationError) + assert issubclass(CapabilityMissingError, ArtifactRuntimeIntegrationError) + assert issubclass(PlacementAdmissionError, ArtifactRuntimeIntegrationError) + assert issubclass(ArtifactLocatorResolutionError, ArtifactRuntimeIntegrationError) + assert issubclass(SourceProviderError, ArtifactRuntimeIntegrationError) error = SchemaMismatchError( "bad schema", operation="reload", @@ -3077,98 +3094,66 @@ def test_serving_integration_error_taxonomy_is_structured(): def test_public_runtime_package_boundary_hides_admin_helpers(): - import tensorcast.serving as serving - import tensorcast.serving.admin as serving_admin - import tensorcast.serving.hosts as serving_hosts - import tensorcast.serving.policy as serving_policy - import tensorcast.serving.runtime as serving_runtime - from tensorcast.serving.testing import ( + import tensorcast as tc + import tensorcast.artifact_runtime.admin as runtime_admin + import tensorcast.artifact_runtime.host as runtime_host + from tensorcast.artifact_runtime.testing import ( assert_framework_isolation, - assert_public_runtime_boundary, - ) - - assert serving_runtime.ServingRuntimeSession is ServingRuntimeSession - assert serving_runtime.ServingConfig is ServingConfig - assert serving_runtime.ServingArtifactLocator is ServingArtifactLocator - assert ServingArtifactLocator is serving_policy.ServingArtifactLocator - assert serving_runtime.ServingPolicy is serving_policy.ServingPolicy - assert integration_mod.ServingPolicy is serving_policy.ServingPolicy - assert "ServingRuntimeSession" in serving_runtime.__all__ - assert "FrameworkAdapter" not in serving.__all__ + assert_public_artifact_runtime_boundary, + ) + + assert tc.TensorCastRuntimeConfig is TensorCastRuntimeConfig + assert tc.plan_runtime_start is integration_mod.tc_runtime_config.plan_runtime_start + assert "ServingConfig" not in tc.__all__ + assert "plan_serving_start" not in tc.__all__ + assert not hasattr(tc, "ServingConfig") + assert not hasattr(tc, "plan_serving_start") + assert not hasattr(integration_mod, "ServingPolicy") + assert _find_spec_or_none("tensorcast.serving") is None + assert _find_spec_or_none("tensorcast.serving.runtime") is None + assert _find_spec_or_none("tensorcast.serving.config") is None + assert _find_spec_or_none("tensorcast.serving.contract") is None + assert _find_spec_or_none("tensorcast.serving.hosts") is None + assert _find_spec_or_none("tensorcast.serving.policy") is None + assert _find_spec_or_none("tensorcast.serving.runtime_contract") is None + assert "ServingArtifactLocator" not in tc.__all__ + assert "ServingPolicy" not in tc.__all__ + assert not hasattr(tc, "ServingArtifactLocator") + assert not hasattr(tc, "ServingPolicy") + assert "ArtifactRuntimeSession" not in tc.__all__ + assert not hasattr(tc, "ArtifactRuntimeSession") assert not hasattr(integration_mod, "FrameworkAdapter") - assert not hasattr(ServingIntegration, "framework_adapter") - assert "AdminLocalSourceBootstrap" not in serving_runtime.__all__ - assert "_AdminLocalSourceBootstrap" not in serving_runtime.__all__ - assert "bind_serving_artifact" not in serving_runtime.__all__ - assert not hasattr(ServingIntegration, "bind") - assert not hasattr(ServingIntegration, "swap") - assert not hasattr(ServingIntegration, "restore_retained") - assert not hasattr(ServingIntegration, "restore_prepared_local_ready") - assert serving_admin.AdminLocalSourceBootstrap is AdminLocalSourceBootstrap - assert serving_hosts.IntegrationHost is IntegrationHost - assert serving_hosts.SourceHost is integration_mod.SourceHost - assert serving_hosts.RecipeCachePolicy is RecipeCachePolicy - assert serving.PLACEMENT_IDENTITY_FACTS_SCHEMA_VERSION == ( + assert not hasattr(ArtifactRuntimeIntegration, "framework_adapter") + assert "AdminLocalSourceBootstrap" not in tc.__all__ + assert "_AdminLocalSourceBootstrap" not in tc.__all__ + assert "bind_runtime_artifact" not in tc.__all__ + assert not hasattr(ArtifactRuntimeIntegration, "bind") + assert not hasattr(ArtifactRuntimeIntegration, "swap") + assert not hasattr(ArtifactRuntimeIntegration, "restore_retained") + assert not hasattr(ArtifactRuntimeIntegration, "restore_prepared_local_ready") + assert runtime_admin.AdminLocalSourceBootstrap is AdminLocalSourceBootstrap + assert runtime_host.RuntimeHostCapabilities is IntegrationHost + assert runtime_host.IntegrationHost is IntegrationHost + assert runtime_host.SourceHost is integration_mod.SourceHost + assert runtime_host.RecipeCachePolicy is RecipeCachePolicy + assert runtime_host.PLACEMENT_IDENTITY_FACTS_SCHEMA_VERSION == ( PLACEMENT_IDENTITY_FACTS_SCHEMA_VERSION ) - assert serving_hosts.PLACEMENT_ADMISSION_FACTS_SCHEMA_VERSION == ( + assert runtime_host.PLACEMENT_ADMISSION_FACTS_SCHEMA_VERSION == ( PLACEMENT_ADMISSION_FACTS_SCHEMA_VERSION ) - assert serving_hosts.SOURCE_CATALOG_REQUEST_SCHEMA_VERSION == ( + assert runtime_host.SOURCE_CATALOG_REQUEST_SCHEMA_VERSION == ( SOURCE_CATALOG_REQUEST_SCHEMA_VERSION ) - assert serving.SOURCE_CATALOG_SCHEMA_VERSION == SOURCE_CATALOG_SCHEMA_VERSION - assert serving_hosts.SOURCE_CATALOG_SCHEMA_VERSION == ( - SOURCE_CATALOG_SCHEMA_VERSION - ) - - assert_public_runtime_boundary(serving_runtime) - assert_framework_isolation( - ("tensorcast.serving.runtime", "tensorcast.serving.hosts") - ) - - -def test_serving_root_facade_is_explicit_and_curated(): - import tensorcast.serving as serving - - hidden_names = { - "bind_serving_artifact", - "swap_serving_artifact", - "restore_retained_binding", - "build_materialization_execution_context", - "compile_recipe_from_inputs", - "load_compiled_recipe_cache", - "write_compiled_recipe_cache", - "materialize_recipe_copy_plan_tensors", - "complete_pure_transform_recipe_publication", - "build_binding_finalize_build_intent", - "build_pure_transform_build_intent", - "RecipeBuildSession", - "RecipePublicationContext", - "CompiledServingRecipe", - "PublishedReplicaProjection", - "ReloadResponseProjection", - "RuntimeEndpointProjection", - "SourceSelectionProjection", - "WeightVersionProjection", - "RuntimeAttachmentStore", - "RuntimeAttachmentRecord", - "ModelAttributeRuntimeState", - "ReadinessInventoryAdmissionPolicy", - "aggregate_runtime_view_outputs", - "publication_aggregate", - } - assert hidden_names.isdisjoint(serving.__all__) - assert "ServingRuntimeSession" not in serving.__all__ - assert "IntegrationHost" in serving.__all__ - assert "ConformanceResult" not in serving.__all__ - assert "ServingConfig" in serving.__all__ - assert "RuntimeSettings" in serving.__all__ - assert not hasattr(serving, "__getattr__") + assert runtime_host.SOURCE_CATALOG_SCHEMA_VERSION == (SOURCE_CATALOG_SCHEMA_VERSION) + + assert_public_artifact_runtime_boundary(tc) + assert_framework_isolation(("tensorcast", "tensorcast.artifact_runtime.host")) - hidden_name = "CompiledServingRecipe" - with pytest.raises(AttributeError): - getattr(serving, hidden_name) + +def test_serving_public_package_is_removed(): + assert _find_spec_or_none("tensorcast.serving") is None + assert _find_spec_or_none("tensorcast.serving.runtime") is None def test_source_subject_broadcast_round_trips_non_public_subjects(): @@ -3198,7 +3183,7 @@ def test_source_subject_broadcast_round_trips_non_public_subjects(): "metadata_fingerprint": "meta", } - integration = ServingIntegration() + integration = ArtifactRuntimeIntegration() payload = integration.source_subject_broadcast_payload(subject) assert integration.source_subject_from_broadcast_payload(payload) == restored @@ -3234,7 +3219,7 @@ def broadcast_object(payload, *, src): calls.append((payload, src)) return payload - resolved = ServingIntegration().resolve_source_subject( + resolved = ArtifactRuntimeIntegration().resolve_source_subject( SourceSelector.local_path("/tmp/model"), verify_checksums=True, coordinator=_Coordinator(), @@ -3290,7 +3275,7 @@ def test_runtime_binding_materialization_attaches_and_transfers_ownership( binding_handle=binding, target_device=torch.device("cpu"), tensor_schema_hash="schema", - artifact_profile="serving_artifact", + artifact_profile="runtime_artifact", authority_scope="daemon_mediated_runtime_attachment", ), ), @@ -3447,7 +3432,7 @@ def _local_ready_recipe() -> SimpleNamespace: def _representation_changing_local_ready_recipe() -> SimpleNamespace: recipe = _local_ready_recipe() - recipe.serving_facts = SimpleNamespace( + recipe.runtime_facts = SimpleNamespace( process_after_load_class=FinalizeClass.REPRESENTATION_CHANGING ) recipe.semantic_validation_spec = TensorcastSemanticValidationSpec( @@ -3483,7 +3468,7 @@ def _local_ready_finalize_request(**overrides) -> _LocalReadyFinalize: def test_serving_integration_finalizes_local_ready_runtime_in_core(): adapter = _MaterializationAdapter() - integration = ServingIntegration(host=_host_for_adapter(adapter)) + integration = ArtifactRuntimeIntegration(host=_host_for_adapter(adapter)) model = _MaterializedModel() model_config = SimpleNamespace(name="model-config") binding = _LocalReadyBinding() @@ -3519,7 +3504,7 @@ def test_serving_integration_finalizes_local_ready_runtime_in_core(): assert result.recipe is recipe assert result.binding is binding assert result.current_value.local_serving_ref == ("binding-local:binding-1:value-1") - assert result.runtime_view.readiness == "serving_local_ready" + assert result.runtime_view.readiness == "runtime_local_ready" assert result.runtime_view.source_artifact_ref == "mi2:test:source" report = result.runtime_view.diagnostics["artifact_realization_report"] assert report["target_kind"] == "runtime_attachment" @@ -3550,14 +3535,14 @@ def test_serving_integration_finalizes_local_ready_runtime_in_core(): def test_serving_integration_validates_local_ready_representation_contract(monkeypatch): calls = [] adapter = _MaterializationAdapter() - integration = ServingIntegration(host=_host_for_adapter(adapter)) + integration = ArtifactRuntimeIntegration(host=_host_for_adapter(adapter)) model = _MaterializedModel() model_config = SimpleNamespace( model="fake-model", compute_hash=lambda: "model-hash", ) - placement = ServingPlacement( - topology=ServingTopologyRef( + placement = RuntimePlacement( + topology=RuntimeTopologyRef( schema_topology_digest="digest", logical_topology_ref="fake://topology", ), @@ -3567,7 +3552,7 @@ def test_serving_integration_validates_local_ready_representation_contract(monke ) monkeypatch.setattr( - integration_mod, + contract_mod, "compute_runtime_representation_contract_hash", lambda **kwargs: calls.append(kwargs) or "repr", ) @@ -3620,8 +3605,8 @@ def test_serving_integration_closes_local_ready_binding_on_representation_drift( ): binding = _LocalReadyBinding() model_config = SimpleNamespace(model="fake-model") - placement = ServingPlacement( - topology=ServingTopologyRef( + placement = RuntimePlacement( + topology=RuntimeTopologyRef( schema_topology_digest="digest", logical_topology_ref="fake://topology", ), @@ -3630,14 +3615,14 @@ def test_serving_integration_closes_local_ready_binding_on_representation_drift( identity_payload={"rank": 0}, ) monkeypatch.setattr( - integration_mod, + contract_mod, "compute_runtime_representation_contract_hash", lambda **_kwargs: "actual", ) with pytest.raises(ManifestMismatchError, match="contract hash drifted"): adapter = _MaterializationAdapter() - ServingIntegration( + ArtifactRuntimeIntegration( host=_host_for_adapter(adapter), )._finalize_local_ready_runtime( _LocalReadyFinalize( @@ -3671,8 +3656,10 @@ def test_serving_integration_closes_local_ready_binding_on_representation_drift( def test_serving_integration_rejects_representation_changing_finalize_without_semantic_validation(): binding = _LocalReadyBinding() - with pytest.raises(ServingIntegrationError, match="explicit semantic validation"): - ServingIntegration( + with pytest.raises( + ArtifactRuntimeIntegrationError, match="explicit semantic validation" + ): + ArtifactRuntimeIntegration( host=_host_for_adapter(_MaterializationAdapter()) )._finalize_local_ready_runtime( _local_ready_finalize_request( @@ -3690,10 +3677,10 @@ def test_serving_integration_rejects_representation_changing_finalize_without_co binding = _LocalReadyBinding() with pytest.raises( - ServingIntegrationError, + ArtifactRuntimeIntegrationError, match="requires representation contract validation", ): - ServingIntegration( + ArtifactRuntimeIntegration( host=_host_for_adapter(_MaterializationAdapter()) )._finalize_local_ready_runtime( _local_ready_finalize_request( @@ -3711,8 +3698,8 @@ def test_serving_integration_rejects_representation_changing_finalize_without_re monkeypatch, ): binding = _LocalReadyBinding() - placement = ServingPlacement( - topology=ServingTopologyRef( + placement = RuntimePlacement( + topology=RuntimeTopologyRef( schema_topology_digest="digest", logical_topology_ref="fake://topology", ), @@ -3721,16 +3708,16 @@ def test_serving_integration_rejects_representation_changing_finalize_without_re identity_payload={"rank": 0}, ) monkeypatch.setattr( - ServingIntegration, + ArtifactRuntimeIntegration, "local_ready_representation_contract_hash", lambda _self, **_kwargs: "repr", ) with pytest.raises( - ServingIntegrationError, + ArtifactRuntimeIntegrationError, match="ready same-binding contract proof", ): - ServingIntegration( + ArtifactRuntimeIntegration( host=_host_for_adapter(_MaterializationAdapter()) )._finalize_local_ready_runtime( _local_ready_finalize_request( @@ -3759,7 +3746,7 @@ def test_serving_integration_prepare_local_ready_owns_contract_and_options(monke adapter.align_runtime_tensor_names = ( lambda model, names: align_calls.append(tuple(names)) or 0 ) - integration = ServingIntegration(host=_host_for_adapter(adapter)) + integration = ArtifactRuntimeIntegration(host=_host_for_adapter(adapter)) calls = [] source_bound_contract_state = SimpleNamespace( source_bound_contract_ready=True, @@ -3781,9 +3768,11 @@ def fake_prepare(**kwargs): ) monkeypatch.setattr( - ServingIntegration, "build_materialization_options", fake_build_options + ArtifactRuntimeIntegration, "build_materialization_options", fake_build_options + ) + monkeypatch.setattr( + local_ready_mod, "realize_local_ready_binding_from_source", fake_prepare ) - monkeypatch.setattr(integration_mod, "prepare_local_ready_serving", fake_prepare) result = integration._prepare_local_source_bootstrap( _LocalReadyBootstrap( @@ -3848,7 +3837,7 @@ def runtime_only_tensor_names(self, model): del model return ("runtime_only",) - integration = ServingIntegration( + integration = ArtifactRuntimeIntegration( host=_host_for_adapter( adapter, placement=_PlacementWithExecutionFacts(), @@ -3863,13 +3852,13 @@ def runtime_only_tensor_names(self, model): ) monkeypatch.setattr( - ServingIntegration, + ArtifactRuntimeIntegration, "build_materialization_options", lambda self, **kwargs: calls.append(kwargs) or ("realize-options", {}), ) monkeypatch.setattr( - integration_mod, - "prepare_local_ready_serving", + local_ready_mod, + "realize_local_ready_binding_from_source", lambda **kwargs: SimpleNamespace( binding=_LocalReadyBinding(), update_epoch="epoch-1", @@ -3945,25 +3934,25 @@ def build_recipe(self, **kwargs): ) monkeypatch.setattr( - ServingIntegration, + ArtifactRuntimeIntegration, "resolve_source_subject", lambda self, selector, **kwargs: calls.append(("resolve", selector, kwargs)) or source_subject, ) monkeypatch.setattr( - ServingIntegration, + ArtifactRuntimeIntegration, "build_recipe_session", lambda self, request: calls.append(("session", request)) or _Session(), ) monkeypatch.setattr( - ServingIntegration, + ArtifactRuntimeIntegration, "build_materialization_options", lambda self, **kwargs: calls.append(("options", kwargs)) or ("realize-options", {}), ) monkeypatch.setattr( - integration_mod, - "prepare_local_ready_serving", + local_ready_mod, + "realize_local_ready_binding_from_source", lambda **kwargs: calls.append(("prepare", kwargs)) or SimpleNamespace( binding=_LocalReadyBinding(), @@ -3973,7 +3962,7 @@ def build_recipe(self, **kwargs): ), ) - result = ServingIntegration( + result = ArtifactRuntimeIntegration( host=IntegrationHost( framework=_ContractFrameworkHost(), placement=_ContractPlacementHost(), @@ -4034,7 +4023,7 @@ def build_recipe(self, **kwargs): def test_serving_integration_prepare_local_ready_builds_framework_context(monkeypatch): adapter = _MaterializationAdapter() - integration = ServingIntegration(host=_host_for_adapter(adapter)) + integration = ArtifactRuntimeIntegration(host=_host_for_adapter(adapter)) calls = [] source_bound_contract_state = SimpleNamespace( source_bound_contract_ready=True, @@ -4043,8 +4032,8 @@ def test_serving_integration_prepare_local_ready_builds_framework_context(monkey ) recipe = _representation_changing_local_ready_recipe() model_config = SimpleNamespace(name="model-config") - placement = ServingPlacement( - topology=ServingTopologyRef( + placement = RuntimePlacement( + topology=RuntimeTopologyRef( schema_topology_digest="digest", logical_topology_ref="fake://topology", ), @@ -4063,7 +4052,7 @@ def fake_prepare(**kwargs): ) monkeypatch.setattr( - ServingIntegration, + ArtifactRuntimeIntegration, "prepare_local_ready_manifest_carrier_from_framework_context", lambda self, **kwargs: calls.append(("carrier", kwargs)) or LocalReadyManifestCarrierResult( @@ -4074,16 +4063,18 @@ def fake_prepare(**kwargs): ), ) monkeypatch.setattr( - ServingIntegration, + ArtifactRuntimeIntegration, "build_materialization_options", lambda self, **kwargs: ("realize-options", {}), ) monkeypatch.setattr( - ServingIntegration, + ArtifactRuntimeIntegration, "local_ready_representation_contract_hash", lambda self, **kwargs: "repr", ) - monkeypatch.setattr(integration_mod, "prepare_local_ready_serving", fake_prepare) + monkeypatch.setattr( + local_ready_mod, "realize_local_ready_binding_from_source", fake_prepare + ) result = integration._prepare_local_source_bootstrap( _LocalReadyBootstrap( @@ -4125,7 +4116,7 @@ def fake_prepare(**kwargs): def test_serving_integration_finalizes_local_ready_runtime_runs_semantic_validation(): adapter = _MaterializationAdapter() - integration = ServingIntegration(host=_host_for_adapter(adapter)) + integration = ArtifactRuntimeIntegration(host=_host_for_adapter(adapter)) model_config = SimpleNamespace(name="model-config") result = integration._finalize_local_ready_runtime( @@ -4167,7 +4158,7 @@ def test_serving_integration_finalizes_local_ready_runtime_closes_on_error(): with pytest.raises(SchemaMismatchError, match="tensor set"): adapter = _MaterializationAdapter() - ServingIntegration( + ArtifactRuntimeIntegration( host=_host_for_adapter(adapter), )._finalize_local_ready_runtime( _LocalReadyFinalize( @@ -4193,7 +4184,7 @@ def test_serving_integration_finalizes_local_ready_runtime_closes_on_error(): assert binding.closed -def test_serving_integration_acquire_retained_binding_uses_materialization(): +def test_artifact_runtime_acquire_retained_binding_uses_materialization(): client = _Client() adapter = _MaterializationAdapter() adapter.compute_runtime_tensor_schema_hash = ( @@ -4202,7 +4193,7 @@ def test_serving_integration_acquire_retained_binding_uses_materialization(): adapter.allocate_runtime_only_tensors = ( lambda model, _target_device: _allocate_cpu_runtime_only(model) ) - integration = ServingIntegration(host=_host_for_adapter(adapter)) + integration = ArtifactRuntimeIntegration(host=_host_for_adapter(adapter)) model_config = SimpleNamespace(name="model-config") result = integration._restore_retained_for_intent( @@ -4221,7 +4212,7 @@ def test_serving_integration_acquire_retained_binding_uses_materialization(): assert result.model is not None assert torch.equal(result.model.w.detach(), torch.ones((1,))) assert not result.model.runtime_only.is_meta - assert result.runtime_view.readiness == "serving_local_ready" + assert result.runtime_view.readiness == "runtime_local_ready" assert result.runtime_view.tensor_schema_hash == "schema-hash" assert result.runtime_view.binding_value_ref == _binding_ref() report = result.runtime_view.diagnostics["artifact_realization_report"] @@ -4246,16 +4237,16 @@ def test_serving_integration_acquire_retained_binding_uses_materialization(): assert result.runtime_state.release_contract.released is True -def test_serving_integration_acquire_retained_binding_rejects_published_ready(): +def test_artifact_runtime_acquire_retained_binding_rejects_published_ready(): authority = _authority() - authority = ParsedRetainedServingBindingAuthority( + authority = ParsedRetainedRealizationAuthority( **{ **authority.__dict__, - "readiness": "serving_published_ready", + "readiness": "runtime_published_ready", } ) adapter = _MaterializationAdapter() - integration = ServingIntegration(host=_host_for_adapter(adapter)) + integration = ArtifactRuntimeIntegration(host=_host_for_adapter(adapter)) with pytest.raises(RestoreBindingError, match="swap-capable"): integration._restore_retained_for_intent( @@ -4302,11 +4293,11 @@ def ensure_client(self): def test_bind_and_swap_return_attach_ready_results(): resolved = SimpleNamespace(artifact=_Artifact(), tensor_names=("w",)) - result = bind_serving_artifact( + result = bind_runtime_artifact( resolved_artifact=resolved, tensor_names=("w",), device=torch.device("cuda:0"), - serving_runtime_policy=None, + runtime_artifact_policy=None, options=None, ) @@ -4320,10 +4311,10 @@ def swap(self, artifact, **kwargs): binding = _SwapBinding() binding.tensors[SERVING_MANIFEST_TENSOR_NAME] = torch.ones((1,), dtype=torch.uint8) - swap_result = swap_serving_artifact( + swap_result = swap_runtime_artifact( binding=binding, resolved_artifact=resolved, - serving_runtime_policy="policy", + runtime_artifact_policy="policy", options="options", ) @@ -4331,7 +4322,7 @@ def swap(self, artifact, **kwargs): assert isinstance(binding.swapped[0], _Subset) assert binding.swapped[0].names == ("w", SERVING_MANIFEST_TENSOR_NAME) assert binding.swapped[1] == { - "serving_runtime_policy": "policy", + "runtime_artifact_policy": "policy", "options": "options", } @@ -4369,7 +4360,7 @@ def test_restore_retained_binding_keeps_runtime_owned_attachment(): def test_restore_retained_binding_rejects_member_mismatch(): - expected_member = ServingBindingMemberRef( + expected_member = RuntimeBindingMemberRef( member_id="other-member", member_index=0, member_count=1, diff --git a/tests/python/test_serving_readiness.py b/tests/python/artifact_runtime/test_readiness.py similarity index 87% rename from tests/python/test_serving_readiness.py rename to tests/python/artifact_runtime/test_readiness.py index ca0dceb7..26f25df1 100644 --- a/tests/python/test_serving_readiness.py +++ b/tests/python/artifact_runtime/test_readiness.py @@ -2,21 +2,21 @@ from types import SimpleNamespace -from tensorcast.serving._runtime_impl.lifecycle import ( +from tensorcast.artifact_runtime.lifecycle import ( AdmissionRequest, FrameworkIdentity, PlacementAdmissionFacts, PlacementIdentityFacts, RuntimeProfile, ) -from tensorcast.serving.readiness import ( +from tensorcast.artifact_runtime.readiness import ( ReadinessInventoryAdmissionPolicy, is_binding_finalize_publication_allowlisted, is_pure_transform_publication_allowlisted, is_runtime_bind_swap_allowlisted, - serving_support_level_display_name, + runtime_support_level_display_name, ) -from tensorcast.types import FinalizeClass, ServingSupportLevel +from tensorcast.types import FinalizeClass, RuntimeSupportLevel def _row(**overrides): @@ -24,9 +24,9 @@ def _row(**overrides): "family": "fake", "process_after_load_class": FinalizeClass.RUNTIME_ONLY, "post_bind_finalize_class": FinalizeClass.RUNTIME_ONLY, - "support_level": ServingSupportLevel.RUNTIME_BIND_SWAP_READY, + "support_level": RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY, "pure_transform_candidate": True, - "serving_only_runtime_allowed": True, + "runtime_bind_swap_allowed": True, } values.update(overrides) return SimpleNamespace(**values) @@ -36,7 +36,7 @@ def test_readiness_helpers_accept_framework_inventory_rows() -> None: row = _row() assert ( - serving_support_level_display_name(row.support_level) + runtime_support_level_display_name(row.support_level) == "runtime_bind_swap_ready" ) assert is_pure_transform_publication_allowlisted(row) is True diff --git a/tests/python/test_serving_runtime.py b/tests/python/artifact_runtime/test_runtime_config.py similarity index 98% rename from tests/python/test_serving_runtime.py rename to tests/python/artifact_runtime/test_runtime_config.py index b3b62d3d..34a050e6 100644 --- a/tests/python/test_serving_runtime.py +++ b/tests/python/artifact_runtime/test_runtime_config.py @@ -4,7 +4,7 @@ import pytest -from tensorcast.serving import ( +from tensorcast.artifact_runtime.config import ( DEFAULT_RUNTIME_PROFILE, RuntimeSettings, resolve_runtime_config_profile, diff --git a/tests/python/test_serving_runtime_contract.py b/tests/python/artifact_runtime/test_runtime_contract.py similarity index 97% rename from tests/python/test_serving_runtime_contract.py rename to tests/python/artifact_runtime/test_runtime_contract.py index 09c1465d..11071b15 100644 --- a/tests/python/test_serving_runtime_contract.py +++ b/tests/python/artifact_runtime/test_runtime_contract.py @@ -4,7 +4,7 @@ from types import SimpleNamespace -from tensorcast.serving.runtime_contract import ( +from tensorcast.artifact_runtime.contract import ( SourceBoundContractState, read_source_bound_contract_state, ) diff --git a/tests/python/test_serving_builder_source_catalog.py b/tests/python/artifact_runtime/test_source.py similarity index 99% rename from tests/python/test_serving_builder_source_catalog.py rename to tests/python/artifact_runtime/test_source.py index 9576e86c..a1a2ed73 100644 --- a/tests/python/test_serving_builder_source_catalog.py +++ b/tests/python/artifact_runtime/test_source.py @@ -7,7 +7,7 @@ from safetensors.torch import save_file from tensorcast.api.store.types import CanonicalIndex, CanonicalIndexEntry -from tensorcast.serving.source_catalog import ( +from tensorcast.artifact_runtime.source import ( SOURCE_CATALOG_SCHEMA_VERSION, SourceCatalog, SourceManifest, diff --git a/tests/python/test_serving_state.py b/tests/python/artifact_runtime/test_state.py similarity index 94% rename from tests/python/test_serving_state.py rename to tests/python/artifact_runtime/test_state.py index b6bc026b..3784f1ed 100644 --- a/tests/python/test_serving_state.py +++ b/tests/python/artifact_runtime/test_state.py @@ -2,18 +2,18 @@ from types import SimpleNamespace -from tensorcast.serving.runtime_attachment import ( +from tensorcast.artifact_runtime.attachment import ( RuntimeAttachment, RuntimeBindingState, RuntimeBindingView, ) -from tensorcast.serving.runtime_view import ( - RuntimeWorkerView, -) -from tensorcast.serving.state import ( +from tensorcast.artifact_runtime.state import ( ModelAttributeRuntimeState, RuntimeAttachmentStore, ) +from tensorcast.artifact_runtime.view import ( + RuntimeWorkerView, +) def _attachment(value_id: str) -> RuntimeAttachment: @@ -28,7 +28,7 @@ def _attachment(value_id: str) -> RuntimeAttachment: representation_contract_hash=f"repr-{value_id}", tensor_schema_hash=f"schema-{value_id}", binding_value_ref=binding_value_ref, - readiness="serving", + readiness="runtime_ready", ) return RuntimeAttachment( model=object(), diff --git a/tests/python/test_serving_runtime_view.py b/tests/python/artifact_runtime/test_view.py similarity index 97% rename from tests/python/test_serving_runtime_view.py rename to tests/python/artifact_runtime/test_view.py index eb0042bf..f3e12ac8 100644 --- a/tests/python/test_serving_runtime_view.py +++ b/tests/python/artifact_runtime/test_view.py @@ -2,7 +2,7 @@ import pytest -from tensorcast.serving.runtime_view import aggregate_runtime_view_outputs +from tensorcast.artifact_runtime.view import aggregate_runtime_view_outputs def test_runtime_view_aggregate_reports_partial_publication(): diff --git a/tests/python/daemon/test_prefetch_serving_binding_real_cuda_e2e.py b/tests/python/daemon/test_prefetch_serving_binding_real_cuda_e2e.py index 2b8c939d..ef78caaa 100644 --- a/tests/python/daemon/test_prefetch_serving_binding_real_cuda_e2e.py +++ b/tests/python/daemon/test_prefetch_serving_binding_real_cuda_e2e.py @@ -19,8 +19,8 @@ from tensorcast.api._device import device_uuid_for from tensorcast.api.store.owned_binding_slot import restore_owned_binding_tensors from tensorcast.api.store.runtime import StoreRuntimeContext -from tensorcast.api.store.serving_binding_reference_consumer import ( - ReferenceServingTensorSpec, +from tensorcast.api.store.runtime_realization_reference_consumer import ( + ReferenceRuntimeTensorSpec, acquire_reference_binding_response, build_reference_resolved_spec, build_reference_tensor_index_bytes, @@ -32,9 +32,9 @@ from tensorcast.daemon_ctl import DaemonCtl from tensorcast.types import ( PrefetchRetentionPolicy, - ServingBindingMemberRef, - ServingBindingSetTarget, - ServingTopologyRef, + RealizationTargetSet, + RuntimeBindingMemberRef, + RuntimeTopologyRef, ) from tests.python.utils.daemon import start_daemon_binary from tests.python.utils.ports import get_free_port @@ -51,20 +51,20 @@ from tensorcast.api.store.owned_binding_slot import restore_owned_binding_tensors from tensorcast.api.store.runtime import StoreRuntimeContext -from tensorcast.api.store.serving_binding_reference_consumer import ( +from tensorcast.api.store.runtime_realization_reference_consumer import ( acquire_reference_binding_response, ) from tensorcast.daemon_ctl import DaemonCtl from tensorcast.proto.operation.v1 import operation_pb2 -from tensorcast.types import PrefetchedServingBinding, ServingBindingTarget +from tensorcast.types import PrefetchHandoff, RealizationTarget daemon_addr, target_path, prefetched_path = sys.argv[1:4] target_proto = operation_pb2.ServingBindingTarget() target_proto.ParseFromString(open(target_path, "rb").read()) prefetched_proto = operation_pb2.PrefetchServingBindingResult() prefetched_proto.ParseFromString(open(prefetched_path, "rb").read()) -target = ServingBindingTarget.from_proto(target_proto) -prefetched = PrefetchedServingBinding.from_proto(prefetched_proto) +target = RealizationTarget.from_proto(target_proto) +prefetched = PrefetchHandoff.from_proto(prefetched_proto) runtime = StoreRuntimeContext(daemon_addr) client = DaemonCtl(daemon_addr) @@ -99,20 +99,20 @@ from tensorcast.api.store.owned_binding_slot import restore_owned_binding_tensors from tensorcast.api.store.runtime import StoreRuntimeContext -from tensorcast.api.store.serving_binding_reference_consumer import ( +from tensorcast.api.store.runtime_realization_reference_consumer import ( acquire_reference_binding_response, ) from tensorcast.daemon_ctl import DaemonCtl from tensorcast.proto.operation.v1 import operation_pb2 -from tensorcast.types import PrefetchedServingBindingSet, ServingBindingSetTarget +from tensorcast.types import PrefetchHandoffSet, RealizationTargetSet daemon_addr, target_path, prefetched_path = sys.argv[1:4] target_proto = operation_pb2.ServingBindingSetTarget() target_proto.ParseFromString(open(target_path, "rb").read()) prefetched_proto = operation_pb2.PrefetchServingBindingSetResult() prefetched_proto.ParseFromString(open(prefetched_path, "rb").read()) -target_set = ServingBindingSetTarget.from_proto(target_proto) -prefetched_set = PrefetchedServingBindingSet.from_proto(prefetched_proto) +target_set = RealizationTargetSet.from_proto(target_proto) +prefetched_set = PrefetchHandoffSet.from_proto(prefetched_proto) targets_by_member = {target.member.member_id: target for target in target_set.members} runtime = StoreRuntimeContext(daemon_addr) @@ -152,7 +152,7 @@ def _skip_without_real_cuda() -> None: def _write_single_float_artifact(artifact_dir: Path, value: float) -> None: - tensor = ReferenceServingTensorSpec( + tensor = ReferenceRuntimeTensorSpec( name="alpha", size_bytes=4, dtype="torch.float32", @@ -248,7 +248,7 @@ def test_prefetch_serving_binding_real_cuda_worker_read_and_release(tmp_path) -> ).source assert source.artifact_id.startswith("msa1:") assert source.canonical_index_bytes == build_reference_tensor_index_bytes( - ReferenceServingTensorSpec( + ReferenceRuntimeTensorSpec( name="alpha", size_bytes=4, dtype="torch.float32", @@ -262,7 +262,7 @@ def test_prefetch_serving_binding_real_cuda_worker_read_and_release(tmp_path) -> artifact_selection_digest=source.trusted_content_artifact_id or source.artifact_id, device_uuid=device_uuid_for(0), - tensor=ReferenceServingTensorSpec( + tensor=ReferenceRuntimeTensorSpec( name="alpha", size_bytes=4, dtype="torch.float32", @@ -344,7 +344,7 @@ def test_prefetch_serving_binding_set_real_cuda_worker_reads_members(tmp_path) - source_root = tmp_path / "public-source-root" artifact_dir = source_root / "model" expected_value = 7.5 - tensor = ReferenceServingTensorSpec( + tensor = ReferenceRuntimeTensorSpec( name="alpha", size_bytes=4, dtype="torch.float32", @@ -369,13 +369,13 @@ def test_prefetch_serving_binding_set_real_cuda_worker_reads_members(tmp_path) - selection_digest = source.trusted_content_artifact_id or source.artifact_id members = [] cache_root = tmp_path / "resolved-spec-cache" - topology = ServingTopologyRef( + topology = RuntimeTopologyRef( schema_topology_digest="vllm-tp2-schema", admission_topology_digest="vllm-tp2-admission", logical_topology_ref="vllm://parallelism?tp=2&pp=1&dp=1", ) for index in range(2): - member = ServingBindingMemberRef( + member = RuntimeBindingMemberRef( member_id=f"dp0:pp0:tp{index}", member_index=index, member_count=2, @@ -401,7 +401,7 @@ def test_prefetch_serving_binding_set_real_cuda_worker_reads_members(tmp_path) - device_uuid=device_uuid_for(0), ) ) - target_set = ServingBindingSetTarget( + target_set = RealizationTargetSet( runtime="vllm", source=members[0].source, topology=topology, diff --git a/tests/python/examples/test_serving_runtime_reference_framework.py b/tests/python/examples/test_runtime_reference_framework.py similarity index 72% rename from tests/python/examples/test_serving_runtime_reference_framework.py rename to tests/python/examples/test_runtime_reference_framework.py index 6e249700..1a81146c 100644 --- a/tests/python/examples/test_serving_runtime_reference_framework.py +++ b/tests/python/examples/test_runtime_reference_framework.py @@ -9,14 +9,14 @@ _EXAMPLE = ( Path(__file__).resolve().parents[3] / "examples" - / "serving_runtime_reference_framework" + / "runtime_reference_framework" / "reference_framework.py" ) def _load_example_module(): spec = importlib.util.spec_from_file_location( - "serving_runtime_reference_framework", + "runtime_reference_framework", _EXAMPLE, ) assert spec is not None @@ -26,7 +26,7 @@ def _load_example_module(): return module -def test_reference_framework_uses_public_serving_surfaces_only(): +def test_reference_framework_uses_public_artifact_runtime_surfaces_only(): module = ast.parse(_EXAMPLE.read_text(encoding="utf-8"), filename=str(_EXAMPLE)) imported: set[str] = set() for node in ast.walk(module): @@ -40,10 +40,11 @@ def test_reference_framework_uses_public_serving_surfaces_only(): for name in imported if name == "vllm" or name.startswith("vllm.") - or name == "tensorcast.serving.integration" - or name.startswith("tensorcast.serving.builder") - or name.startswith("tensorcast.serving.admin") - or name.startswith("tensorcast.serving._runtime_impl") + or name == "tensorcast.serving" + or name.startswith("tensorcast.serving.") + or name.startswith("tensorcast.artifact_runtime.recipe.builder") + or name.startswith("tensorcast.artifact_runtime.admin") + or name.startswith("tensorcast.artifact_runtime.lifecycle") } assert forbidden == set() @@ -53,7 +54,7 @@ def test_reference_framework_runs_level1_conformance(): result = module.run_level1_conformance() - assert result.level == "level1-runtime" + assert result.level == "level1-artifact-runtime" assert result.checks["direct_start"] assert result.checks["reload"] assert result.checks["describe"] diff --git a/tests/python/node_agent/test_plan_execution.py b/tests/python/node_agent/test_plan_execution.py index 6270c64b..64d9a98e 100644 --- a/tests/python/node_agent/test_plan_execution.py +++ b/tests/python/node_agent/test_plan_execution.py @@ -15,7 +15,7 @@ from tensorcast.api.store import ( BuilderMode, RepresentationPublishSpec, - ServingBuildIntent, + RuntimeArtifactBuildIntent, build_pure_transform_publication_bundle_from_registered_artifact, build_pure_transform_transform_spec, compute_pure_transform_representation_contract_hash, @@ -41,8 +41,8 @@ from tensorcast.proto.node_agent.v1 import node_agent_pb2 from tensorcast.proto.plan.v1 import plan_pb2 from tensorcast.types import ( + RuntimeArtifactManifest, ServerConfig, - ServingArtifactManifest, build_serving_manifest_ref, ) @@ -227,7 +227,7 @@ def register(self, tensors, key, policy): # noqa: ANN001 manifest_tensor = tensors["__tensorcast_meta__.manifest_json"] assert manifest_tensor.dtype == torch.uint8 assert manifest_tensor.ndim == 1 - manifest = ServingArtifactManifest.from_bytes( + manifest = RuntimeArtifactManifest.from_bytes( bytes(manifest_tensor.tolist()) ) assert manifest.framework_name == "torch" @@ -236,7 +236,7 @@ def register(self, tensors, key, policy): # noqa: ANN001 result = adapter.execute_transform_register( spec=build_pure_transform_transform_spec( transform_name="identity.v1", - build_intent=ServingBuildIntent( + build_intent=RuntimeArtifactBuildIntent( builder_mode=BuilderMode.PURE_TRANSFORM, framework_name="torch", adapter_version="adapter-v1", @@ -809,7 +809,7 @@ def test_node_agent_servicer_serializes_pure_transform_publication_result() -> N instance_id="inst-1", engine="test", register_identity_transform=False ) bundle = build_pure_transform_publication_bundle_from_registered_artifact( - build_intent=ServingBuildIntent( + build_intent=RuntimeArtifactBuildIntent( representation_contract_hash="bafkrepresentation", builder_mode=BuilderMode.PURE_TRANSFORM, framework_name="torch", diff --git a/tests/python/test_assembly_attempt.py b/tests/python/test_assembly_attempt.py index 6603dee0..6790131b 100644 --- a/tests/python/test_assembly_attempt.py +++ b/tests/python/test_assembly_attempt.py @@ -16,16 +16,16 @@ AssemblyCloseoutContract, AssemblyRequirementSetRef, PublishedModelVersion, - RegisteredServingPublication, + RegisteredRuntimeArtifactPublication, RepresentationPublishContract, RepresentationPublishSpec, - ServingArtifactManifest, - ServingBuildIntent, + RuntimeArtifactBuildIntent, + RuntimeArtifactManifest, Store, build_binding_finalize_admission_facts, build_representation_publish_requirements, build_serving_manifest_ref, - prepare_pure_transform_serving_registration, + prepare_pure_transform_runtime_registration, ) from tensorcast.api.store.artifact import Artifact from tensorcast.api.store.common import ( @@ -44,8 +44,8 @@ AssemblyAttemptRef, BindingValueRef, BuilderMode, - ServingPublicationSubject, - ServingSupportLevel, + RuntimePublicationSubject, + RuntimeSupportLevel, ) @@ -192,8 +192,8 @@ def _canonical_index_bytes() -> bytes: return b'{"w":[0,4,[1],[1],"torch.float32",0]}' -def _serving_build_intent() -> ServingBuildIntent: - return ServingBuildIntent( +def _serving_build_intent() -> RuntimeArtifactBuildIntent: + return RuntimeArtifactBuildIntent( representation_contract_hash="bafkrepresentation", builder_mode=BuilderMode.PURE_TRANSFORM, framework_name="torch", @@ -204,8 +204,8 @@ def _serving_build_intent() -> ServingBuildIntent: ) -def _binding_finalize_build_intent() -> ServingBuildIntent: - return ServingBuildIntent( +def _binding_finalize_build_intent() -> RuntimeArtifactBuildIntent: + return RuntimeArtifactBuildIntent( representation_contract_hash="bafkbindingrepr", builder_mode=BuilderMode.BINDING_FINALIZE, framework_name="torch", @@ -217,7 +217,7 @@ def _binding_finalize_build_intent() -> ServingBuildIntent: def _representation_publish_bundle() -> RepresentationPublishSpec: - manifest = ServingArtifactManifest( + manifest = RuntimeArtifactManifest( framework_name="torch", adapter_version="adapter-v1", serving_abi_version="abi-v1", @@ -230,7 +230,7 @@ def _representation_publish_bundle() -> RepresentationPublishSpec: build_pipeline_version="pipeline-v1", ) contract = RepresentationPublishContract( - subject=ServingPublicationSubject( + subject=RuntimePublicationSubject( serving_artifact_id="mi2:test:serving", ), serving_manifest_ref=build_serving_manifest_ref(), @@ -286,7 +286,7 @@ def test_register_pure_transform_publication_registers_manifest_bearing_artifact store = Store("fake://daemon", runtime=runtime) tensors = {"w": torch.ones((1,), dtype=torch.float32)} build_intent = _serving_build_intent() - prepared = prepare_pure_transform_serving_registration( + prepared = prepare_pure_transform_runtime_registration( build_intent=build_intent, source_artifact=None, tensors=tensors, @@ -390,7 +390,7 @@ def test_complete_pure_transform_publication_runs_register_and_closeout() -> Non store = Store("fake://daemon", runtime=runtime) tensors = {"w": torch.ones((1,), dtype=torch.float32)} build_intent = _serving_build_intent() - prepared = prepare_pure_transform_serving_registration( + prepared = prepare_pure_transform_runtime_registration( build_intent=build_intent, source_artifact=None, tensors=tensors, @@ -457,7 +457,7 @@ def test_complete_pure_transform_publication_canonical_full_routes_source_artifa runtime = FakeRuntime(client) store = Store("fake://daemon", runtime=runtime) captured: dict[str, object] = {} - prepared = prepare_pure_transform_serving_registration( + prepared = prepare_pure_transform_runtime_registration( build_intent=_serving_build_intent(), source_artifact=source_artifact, tensors={"w": torch.ones((1,), dtype=torch.float32)}, @@ -466,13 +466,13 @@ def test_complete_pure_transform_publication_canonical_full_routes_source_artifa def _register( tensors: dict[str, torch.Tensor], **kwargs: object, - ) -> RegisteredServingPublication: + ) -> RegisteredRuntimeArtifactPublication: del tensors del kwargs bundle = _representation_publish_bundle().model_copy( update={"contract_family": "canonical_full"} ) - return RegisteredServingPublication( + return RegisteredRuntimeArtifactPublication( registered_artifact=RegisteredArtifact( artifact_id="mi2:test:serving", replica=ReplicaInfo( @@ -577,13 +577,13 @@ def test_complete_pure_transform_publication_routes_structural_view_contribution def _register_publication( tensors: dict[str, torch.Tensor], **kwargs: object, - ) -> RegisteredServingPublication: + ) -> RegisteredRuntimeArtifactPublication: del tensors del kwargs bundle = _representation_publish_bundle().model_copy( update={"contract_family": "pp"} ) - return RegisteredServingPublication( + return RegisteredRuntimeArtifactPublication( registered_artifact=RegisteredArtifact( artifact_id="mi2:test:serving", replica=ReplicaInfo( @@ -596,7 +596,7 @@ def _register_publication( canonical_index=canonical_index_from_bytes(_canonical_index_bytes()), lease=None, ), - prepared_registration=prepare_pure_transform_serving_registration( + prepared_registration=prepare_pure_transform_runtime_registration( build_intent=_serving_build_intent(), source_artifact=source_artifact, tensors={"w": torch.ones((1,), dtype=torch.float32)}, @@ -801,7 +801,7 @@ def test_start_representation_publish_attempt_provisions_binding_subject_layout_ canonical_index = canonical_index_from_bytes( b'{"serving.weight":[0,4,[1],[1],"torch.float32",0]}' ) - manifest = ServingArtifactManifest( + manifest = RuntimeArtifactManifest( framework_name="torch", adapter_version="adapter-vbinding", serving_abi_version="abi-vbinding", @@ -814,7 +814,7 @@ def test_start_representation_publish_attempt_provisions_binding_subject_layout_ build_pipeline_version="pipeline-vbinding", ) contract = RepresentationPublishContract( - subject=ServingPublicationSubject( + subject=RuntimePublicationSubject( binding_value_ref=BindingValueRef( binding_id="binding-1", binding_layout_id="layout-1", @@ -839,7 +839,7 @@ def test_start_representation_publish_attempt_provisions_binding_subject_layout_ representation_publish_contract=contract, ), admission_facts=build_binding_finalize_admission_facts( - support_level=ServingSupportLevel.BUILDER_PUBLICATION_READY, + support_level=RuntimeSupportLevel.BUILDER_PUBLICATION_READY, same_binding_fast_path_validated=True, ), contract_family="canonical_full", @@ -1291,7 +1291,7 @@ def test_representation_publish_closeout_contract_accepts_matching_typed_child() serving_version_key="models/demo/serving/v3", serving_manifest_ref=build_serving_manifest_ref(), representation_publish_contract=RepresentationPublishContract( - subject=ServingPublicationSubject( + subject=RuntimePublicationSubject( serving_artifact_id="mi2:test:serving", ), serving_manifest_ref=build_serving_manifest_ref(), @@ -1331,7 +1331,7 @@ def test_representation_publish_closeout_contract_rejects_outer_serving_artifact serving_artifact_id="mi2:test:serving", serving_manifest_ref=build_serving_manifest_ref(), representation_publish_contract=RepresentationPublishContract( - subject=ServingPublicationSubject( + subject=RuntimePublicationSubject( serving_artifact_id="mi2:test:serving", ), serving_manifest_ref=build_serving_manifest_ref(), @@ -1348,7 +1348,7 @@ def test_representation_publish_closeout_contract_accepts_binding_subject_child( kind="representation_publish", serving_manifest_ref=build_serving_manifest_ref(), representation_publish_contract=RepresentationPublishContract( - subject=ServingPublicationSubject( + subject=RuntimePublicationSubject( binding_value_ref=BindingValueRef( binding_id="binding-1", binding_layout_id="layout-1", diff --git a/tests/python/test_binding.py b/tests/python/test_binding.py index 1cfcb237..c5d1fc38 100644 --- a/tests/python/test_binding.py +++ b/tests/python/test_binding.py @@ -38,8 +38,8 @@ HashLocation, IdentityMintStrategy, PublishedModelVersion, + RuntimeArtifactPolicy, ServerConfig, - ServingRuntimePolicy, SourceBoundCapability, SourceBoundPlanDiagnostics, VramRegionHandle, @@ -322,7 +322,7 @@ def commit_binding_artifact(self, **kwargs: Any) -> Any: current_value=self._make_binding_value( binding_id=binding_id, selection=selection, - ) + ), ) def begin_binding_update(self, **kwargs: Any) -> Any: @@ -701,7 +701,7 @@ def test_binding_swap_forwards_first_class_execution_topology( assert execution_topology.source_sharing_domain == "node-a" assert ( refill_call["collective_policy"] - == store_daemon_pb2.COLLECTIVE_POLICY_REQUIRE_COLLECTIVE + == store_daemon_pb2.COLLECTIVE_POLICY_COLLECTIVE_FIRST ) assert "clid=same-host-tp-load" not in str(refill_call["operation_id"]) @@ -1103,6 +1103,42 @@ def test_binding_realize_from_accepts_rank_zero_collective_group( assert "clid=same-host-tp-load" not in str(refill_call["operation_id"]) +def test_binding_realize_from_defaults_collective_group_to_collective_first( + monkeypatch: pytest.MonkeyPatch, +) -> None: + store, _runtime, client = _setup_store(monkeypatch) + artifact = store.artifact(artifact_id="artifact-1") + layout = artifact.bind(device="cuda:0", packing="byte_space").layout + binding = store.create_binding(layout, ownership="daemon", device="cuda:0") + + binding.realize_from( + artifact, + realization_plan=( + store_mod.BindingRealizationEntry( + op="copy", + source_name="alpha", + dst_name="alpha", + ), + ), + options=GetArtifactOptions( + execution_topology=ExecutionTopologyContext( + collective_group=CollectiveLoadGroup( + group_id="same-host-tp-load", + world_size=8, + rank=0, + ), + source_locality="shared_source", + ) + ), + ) + + refill_call = client.refill_calls[-1] + assert ( + refill_call["collective_policy"] + == store_daemon_pb2.COLLECTIVE_POLICY_COLLECTIVE_FIRST + ) + + def test_binding_realize_from_serializes_partial_const_fill_ranges( monkeypatch: pytest.MonkeyPatch, ) -> None: @@ -1855,7 +1891,7 @@ def _wait_attempt( result = store.complete_binding_finalize_publication_from_binding( binding, - build_intent=store_mod.ServingBuildIntent( + build_intent=store_mod.RuntimeArtifactBuildIntent( builder_mode=store_mod.BuilderMode.BINDING_FINALIZE, framework_name="pytest", adapter_version="adapter-v1", @@ -1865,7 +1901,7 @@ def _wait_attempt( source_artifact_ref="mi2:test:source", ), admission_facts=store_mod.build_binding_finalize_admission_facts( - support_level=store_mod.ServingSupportLevel.BUILDER_PUBLICATION_READY, + support_level=store_mod.RuntimeSupportLevel.BUILDER_PUBLICATION_READY, same_binding_fast_path_validated=True, ), contract_family="canonical_full", @@ -2280,10 +2316,10 @@ def test_binding_swap_coerces_published_model_version_into_runtime_policy() -> N serving_manifest_ref=build_serving_manifest_ref("__alt_manifest__.json"), ) - binding.swap("artifact-2", serving_runtime_policy=version) + binding.swap("artifact-2", runtime_artifact_policy=version) assert len(slot.swap_calls) == 1 - assert slot.swap_calls[0]["serving_runtime_policy"] == ServingRuntimePolicy( + assert slot.swap_calls[0]["runtime_artifact_policy"] == RuntimeArtifactPolicy( require_manifest=True, serving_manifest_ref="tensor:__alt_manifest__.json", expected_representation_contract_hash="bafkrepresentation", @@ -2291,6 +2327,21 @@ def test_binding_swap_coerces_published_model_version_into_runtime_policy() -> N ) +def test_binding_swap_uses_only_runtime_artifact_policy_name() -> None: + slot = _FakeBindingSlot() + binding = Binding(slot) + neutral_policy = RuntimeArtifactPolicy(serving_manifest_ref="tensor:a.json") + + with pytest.raises(TypeError, match="serving_runtime_policy"): + binding.swap( + "artifact-2", + runtime_artifact_policy=neutral_policy, + serving_runtime_policy=neutral_policy, + ) + + assert slot.swap_calls == [] + + def test_bind_does_not_delegate_to_bind_into(monkeypatch: pytest.MonkeyPatch) -> None: store, _runtime, _client = _setup_store(monkeypatch) artifact = store.artifact(artifact_id="artifact-1") diff --git a/tests/python/test_dense_piece_assembly_sealing_acceptance.py b/tests/python/test_dense_piece_assembly_sealing_acceptance.py index 64e8da7d..8b2667e1 100644 --- a/tests/python/test_dense_piece_assembly_sealing_acceptance.py +++ b/tests/python/test_dense_piece_assembly_sealing_acceptance.py @@ -28,7 +28,7 @@ AssemblyReadinessPolicy, AssemblyRequirementSetRef, BuilderMode, - ServingBuildIntent, + RuntimeArtifactBuildIntent, Store, build_serving_manifest_ref, ) @@ -1043,7 +1043,7 @@ def test_complete_pure_transform_publication_publishes_serving_lineage( result = store.complete_pure_transform_publication( source_tensors, - build_intent=ServingBuildIntent( + build_intent=RuntimeArtifactBuildIntent( builder_mode=BuilderMode.PURE_TRANSFORM, framework_name="torch", adapter_version="adapter-v1", @@ -1124,7 +1124,7 @@ def test_complete_pure_transform_publication_structural_pp_publishes_serving_lin result = store.complete_pure_transform_publication( source_tensors, - build_intent=ServingBuildIntent( + build_intent=RuntimeArtifactBuildIntent( builder_mode=BuilderMode.PURE_TRANSFORM, framework_name="torch", adapter_version="adapter-v1", @@ -1196,7 +1196,7 @@ def test_complete_pure_transform_publication_serving_binding_swap( source_handle_v1 = store.artifact(artifact_id=source_artifact_v1) result_v1 = store.complete_pure_transform_publication( _artifact_tensor_dict(store, artifact_id=source_artifact_v1), - build_intent=ServingBuildIntent( + build_intent=RuntimeArtifactBuildIntent( builder_mode=BuilderMode.PURE_TRANSFORM, framework_name="torch", adapter_version="adapter-v1", @@ -1230,7 +1230,7 @@ def test_complete_pure_transform_publication_serving_binding_swap( source_handle_v2 = store.artifact(artifact_id=source_artifact_v2) result_v2 = store.complete_pure_transform_publication( _artifact_tensor_dict(store, artifact_id=source_artifact_v2), - build_intent=ServingBuildIntent( + build_intent=RuntimeArtifactBuildIntent( builder_mode=BuilderMode.PURE_TRANSFORM, framework_name="torch", adapter_version="adapter-v1", @@ -1253,7 +1253,7 @@ def test_complete_pure_transform_publication_serving_binding_swap( binding = store.artifact(key="models/demo/serving/swap/v1").bind( device="cuda:0", packing="byte_space", - serving_runtime_policy=result_v1.require_serving_runtime_policy(), + runtime_artifact_policy=result_v1.require_runtime_artifact_policy(), ) torch.testing.assert_close( binding.tensors["weights"].cpu(), @@ -1267,7 +1267,7 @@ def test_complete_pure_transform_publication_serving_binding_swap( ) binding.swap( store.artifact(artifact_id=str(result_v2.serving_artifact_id)), - serving_runtime_policy=result_v2.require_serving_runtime_policy(), + runtime_artifact_policy=result_v2.require_runtime_artifact_policy(), ) synchronize_cuda() torch.testing.assert_close( diff --git a/tests/python/test_pytorch_module_binding.py b/tests/python/test_pytorch_module_binding.py index 90bfa93b..217356ac 100644 --- a/tests/python/test_pytorch_module_binding.py +++ b/tests/python/test_pytorch_module_binding.py @@ -6,6 +6,7 @@ import torch from torch import nn +from tensorcast.artifact_runtime.host import TorchTensorHost from tensorcast.pytorch.module_binding import ( TorchModuleAdapterMixin, align_runtime_binding_exclude_names, @@ -19,22 +20,18 @@ snapshot_tensor_invariants, validate_tensor_invariants, ) -from tensorcast.serving.hosts import TorchTensorHost class _TaggedParameter(nn.Parameter): pass -def test_attach_tensors_materializes_meta_parameter_aliases_and_subclass( -) -> None: - +def test_attach_tensors_materializes_meta_parameter_aliases_and_subclass() -> None: class _Model(nn.Module): - def __init__(self) -> None: super().__init__() self.w = _TaggedParameter( - torch.empty((2, ), device="meta", dtype=torch.float32), + torch.empty((2,), device="meta", dtype=torch.float32), requires_grad=False, ) self.alias = self.w @@ -62,13 +59,12 @@ def __init__(self) -> None: def test_attach_tensors_materializes_meta_buffer_aliases() -> None: - class _Model(nn.Module): - def __init__(self) -> None: super().__init__() self.register_buffer( - "b", torch.empty((2, ), device="meta", dtype=torch.float32)) + "b", torch.empty((2,), device="meta", dtype=torch.float32) + ) self.register_buffer("alias_b", self.b) self.captured = [self.b] @@ -94,58 +90,54 @@ def __init__(self) -> None: def test_attach_tensors_skips_reserved_tensorcast_names() -> None: model = nn.Module() - model.register_parameter("w", nn.Parameter(torch.zeros((1, )))) + model.register_parameter("w", nn.Parameter(torch.zeros((1,)))) result = attach_tensors_to_module( model, { - "w": - torch.ones((1, )), - "__tensorcast_meta__.manifest_json": - torch.ones((4, ), dtype=torch.uint8), + "w": torch.ones((1,)), + "__tensorcast_meta__.manifest_json": torch.ones((4,), dtype=torch.uint8), }, replace_meta_params=False, fail_on_missing=False, ) - assert result.attached == ("w", ) - assert result.skipped == ("__tensorcast_meta__.manifest_json", ) - assert torch.equal(model.w, torch.ones((1, ))) + assert result.attached == ("w",) + assert result.skipped == ("__tensorcast_meta__.manifest_json",) + assert torch.equal(model.w, torch.ones((1,))) def test_attach_tensors_fail_closed_on_missing_and_unexpected_names() -> None: model = nn.Module() - model.register_parameter("w", nn.Parameter(torch.zeros((1, )))) - model.register_buffer("b", torch.zeros((1, ))) + model.register_parameter("w", nn.Parameter(torch.zeros((1,)))) + model.register_buffer("b", torch.zeros((1,))) with pytest.raises(RuntimeError, match="missing required"): attach_tensors_to_module( model, - {"w": torch.ones((1, ))}, + {"w": torch.ones((1,))}, replace_meta_params=False, ) with pytest.raises(RuntimeError, match="unexpected tensor names"): attach_tensors_to_module( model, - {"unexpected": torch.ones((1, ))}, + {"unexpected": torch.ones((1,))}, replace_meta_params=False, fail_on_missing=False, ) -def test_collect_module_tensors_handles_excludes_reserved_and_duplicates( -) -> None: +def test_collect_module_tensors_handles_excludes_reserved_and_duplicates() -> None: model = nn.Module() - model.register_parameter( - "w", nn.Parameter(torch.ones((1, ), dtype=torch.float32))) + model.register_parameter("w", nn.Parameter(torch.ones((1,), dtype=torch.float32))) model.register_parameter("alias", model.w) model.register_parameter( "other_meta", - nn.Parameter(torch.empty((1, ), device="meta", dtype=torch.float32)), + nn.Parameter(torch.empty((1,), device="meta", dtype=torch.float32)), ) reserved = nn.Module() - reserved.register_buffer("manifest_json", torch.ones((1, ))) + reserved.register_buffer("manifest_json", torch.ones((1,))) model.add_module("__tensorcast_meta__", reserved) with pytest.raises(RuntimeError, match="reserved names"): @@ -167,23 +159,17 @@ def test_collect_module_tensors_handles_excludes_reserved_and_duplicates( def test_allocate_unbound_module_tensors_materializes_aliases() -> None: - class _Model(nn.Module): - def __init__(self) -> None: super().__init__() self.w = nn.Parameter( - torch.empty_strided((2, 3), (3, 1), - device="meta", - dtype=torch.float16), + torch.empty_strided((2, 3), (3, 1), device="meta", dtype=torch.float16), requires_grad=False, ) self.alias = self.w self.register_buffer( "b", - torch.empty_strided((4, ), (1, ), - device="meta", - dtype=torch.float32), + torch.empty_strided((4,), (1,), device="meta", dtype=torch.float32), ) self.register_buffer("alias_b", self.b) @@ -203,7 +189,7 @@ def __init__(self) -> None: assert model.w.shape == (2, 3) assert model.w.stride() == (3, 1) assert model.w.dtype == torch.float16 - assert model.b.shape == (4, ) + assert model.b.shape == (4,) assert model.b.dtype == torch.float32 assert allocated["w"].data_ptr() == model.w.data.data_ptr() assert allocated["b"].data_ptr() == model.b.data_ptr() @@ -211,8 +197,8 @@ def __init__(self) -> None: def test_align_and_assert_runtime_tensor_names() -> None: model = nn.Module() - model.register_parameter("w", nn.Parameter(torch.ones((1, )))) - model.register_buffer("runtime_only", torch.ones((1, ))) + model.register_parameter("w", nn.Parameter(torch.ones((1,)))) + model.register_buffer("runtime_only", torch.ones((1,))) captured: list[tuple[str, ...]] = [] count = align_runtime_binding_exclude_names( @@ -223,64 +209,59 @@ def test_align_and_assert_runtime_tensor_names() -> None: ) assert count == 1 - assert captured == [("runtime_only", )] + assert captured == [("runtime_only",)] assert collect_module_tensor_names(model) == {"w", "runtime_only"} assert_runtime_tensors_match_expected_names({"w": model.w}, {"w"}) with pytest.raises(RuntimeError, match="tensor set mismatch"): - assert_runtime_tensors_match_expected_names({"w": model.w}, - {"missing"}) + assert_runtime_tensors_match_expected_names({"w": model.w}, {"missing"}) def test_assert_module_tensors_are_meta_reports_materialized_tensors() -> None: meta_model = nn.Module() meta_model.register_parameter( "w", - nn.Parameter(torch.empty((1, ), device="meta")), + nn.Parameter(torch.empty((1,), device="meta")), ) assert_module_tensors_are_meta(meta_model, context="test context") materialized = nn.Module() - materialized.register_parameter("w", nn.Parameter(torch.ones((2, )))) + materialized.register_parameter("w", nn.Parameter(torch.ones((2,)))) with pytest.raises(RuntimeError, match="test context"): assert_module_tensors_are_meta(materialized, context="test context") def test_runtime_tensor_schema_hash_and_invariants() -> None: - tensors = {"w": torch.ones((2, ), dtype=torch.float32)} + tensors = {"w": torch.ones((2,), dtype=torch.float32)} schema_hash = compute_runtime_tensor_schema_hash(tensors) before = snapshot_tensor_invariants(tensors) validate_tensor_invariants(before, tensors) assert schema_hash - changed = {"w": torch.ones((3, ), dtype=torch.float32)} + changed = {"w": torch.ones((3,), dtype=torch.float32)} with pytest.raises(RuntimeError, match="invariant changed"): validate_tensor_invariants(before, changed) def test_torch_module_adapter_mixin_provides_default_binding_ops() -> None: - class _Adapter(TorchModuleAdapterMixin): - - def runtime_only_tensor_names(self, - model: nn.Module) -> tuple[str, ...]: + def runtime_only_tensor_names(self, model: nn.Module) -> tuple[str, ...]: del model - return ("runtime_only", ) + return ("runtime_only",) model = nn.Module() model.register_parameter( "w", - nn.Parameter(torch.ones((1, ), dtype=torch.float32), - requires_grad=False), + nn.Parameter(torch.ones((1,), dtype=torch.float32), requires_grad=False), ) model.register_buffer( "runtime_only", - torch.empty((1, ), device="meta", dtype=torch.float32), + torch.empty((1,), device="meta", dtype=torch.float32), ) adapter = _Adapter() tensors = adapter.collect_runtime_binding_tensors(model) - assert tuple(tensors) == ("w", ) + assert tuple(tensors) == ("w",) assert adapter.compute_runtime_tensor_schema_hash(tensors) bound = torch.tensor([2.0], dtype=torch.float32) @@ -296,15 +277,11 @@ def runtime_only_tensor_names(self, adapter.validate_tensor_invariants(invariants, {"w": model.w}) -def test_torch_module_adapter_mixin_rehydrates_runtime_only_tensors( -) -> None: - +def test_torch_module_adapter_mixin_rehydrates_runtime_only_tensors() -> None: class _Adapter(TorchModuleAdapterMixin): - - def runtime_only_tensor_names(self, - model: nn.Module) -> tuple[str, ...]: + def runtime_only_tensor_names(self, model: nn.Module) -> tuple[str, ...]: del model - return ("runtime_only", ) + return ("runtime_only",) def rehydrate_runtime_only_tensors( self, @@ -313,14 +290,14 @@ def rehydrate_runtime_only_tensors( target_device: torch.device, ) -> Mapping[str, torch.Tensor]: assert set(allocated) == {"runtime_only"} - tensor = torch.full((2, ), 7.0, device=target_device) + tensor = torch.full((2,), 7.0, device=target_device) model._buffers["runtime_only"] = tensor return {"runtime_only": tensor} model = nn.Module() model.register_buffer( "runtime_only", - torch.empty((2, ), device="meta", dtype=torch.float32), + torch.empty((2,), device="meta", dtype=torch.float32), ) allocated = _Adapter().allocate_runtime_only_tensors( @@ -328,18 +305,15 @@ def rehydrate_runtime_only_tensors( torch.device("cpu"), ) - assert torch.equal(model.runtime_only, torch.full((2, ), 7.0)) - assert torch.equal(allocated["runtime_only"], torch.full((2, ), 7.0)) + assert torch.equal(model.runtime_only, torch.full((2,), 7.0)) + assert torch.equal(allocated["runtime_only"], torch.full((2,), 7.0)) def test_torch_tensor_host_rehydrates_runtime_only_tensors() -> None: - class _Surface(TorchTensorHost): - - def runtime_only_tensor_names(self, - model: object) -> tuple[str, ...]: + def runtime_only_tensor_names(self, model: object) -> tuple[str, ...]: del model - return ("runtime_only", ) + return ("runtime_only",) def rehydrate_runtime_only_tensors( self, @@ -348,14 +322,14 @@ def rehydrate_runtime_only_tensors( target_device: object, ) -> Mapping[str, object]: del allocated - tensor = torch.full((2, ), 11.0, device=target_device) + tensor = torch.full((2,), 11.0, device=target_device) model._buffers["runtime_only"] = tensor return {"runtime_only": tensor} model = nn.Module() model.register_buffer( "runtime_only", - torch.empty((2, ), device="meta", dtype=torch.float32), + torch.empty((2,), device="meta", dtype=torch.float32), ) allocated = _Surface().allocate_runtime_only_tensors( @@ -363,5 +337,5 @@ def rehydrate_runtime_only_tensors( torch.device("cpu"), ) - assert torch.equal(model.runtime_only, torch.full((2, ), 11.0)) - assert torch.equal(allocated["runtime_only"], torch.full((2, ), 11.0)) + assert torch.equal(model.runtime_only, torch.full((2,), 11.0)) + assert torch.equal(allocated["runtime_only"], torch.full((2,), 11.0)) diff --git a/tests/python/test_pytorch_trace_capture.py b/tests/python/test_pytorch_trace_capture.py index 6ef58a13..683f52da 100644 --- a/tests/python/test_pytorch_trace_capture.py +++ b/tests/python/test_pytorch_trace_capture.py @@ -8,8 +8,8 @@ from torch import nn from tensorcast.pytorch.trace_capture import TraceActivation, trace_model_load -from tensorcast.serving.builder.materialization import apply_copy_plan -from tensorcast.serving.builder.trace_ir import MultiRange, Range +from tensorcast.artifact_runtime.recipe.materialization import apply_copy_plan +from tensorcast.artifact_runtime.recipe.trace_ir import MultiRange, Range @dataclass(frozen=True) diff --git a/tests/python/test_serving_publication_types.py b/tests/python/test_runtime_publication_types.py similarity index 72% rename from tests/python/test_serving_publication_types.py rename to tests/python/test_runtime_publication_types.py index ab32c823..8c7c3a4b 100644 --- a/tests/python/test_serving_publication_types.py +++ b/tests/python/test_runtime_publication_types.py @@ -2,6 +2,7 @@ from __future__ import annotations +import importlib.util import inspect from pathlib import Path @@ -18,13 +19,13 @@ build_pure_transform_publication_bundle_from_registered_artifact, build_pure_transform_publication_spec, build_pure_transform_transform_spec, - build_serving_publication_bundle_from_registered_artifact, + build_runtime_artifact_publication_bundle_from_registered_artifact, compute_pure_transform_representation_contract_hash, - compute_serving_tensor_schema_hash, - count_canonical_serving_tensors, - prepare_binding_finalize_serving_registration, - prepare_pure_transform_serving_registration, - prepare_serving_registration, + compute_runtime_artifact_tensor_schema_hash, + count_canonical_runtime_tensors, + prepare_binding_finalize_runtime_registration, + prepare_pure_transform_runtime_registration, + prepare_runtime_artifact_registration, ) from tensorcast.api.store.handles import RegisteredArtifact from tensorcast.api.store.types import ( @@ -43,14 +44,14 @@ PublishedModelVersion, RepresentationPublishContract, RepresentationPublishSpec, - ServingAdmissionFacts, - ServingArtifactManifest, - ServingBuildIntent, - ServingPublicationSubject, - ServingRuntimePolicy, - ServingSupportLevel, + RuntimeAdmissionFacts, + RuntimeArtifactBuildIntent, + RuntimeArtifactManifest, + RuntimeArtifactPolicy, + RuntimePublicationSubject, + RuntimeSupportLevel, build_serving_manifest_ref, - coerce_serving_runtime_policy, + coerce_runtime_artifact_policy, parse_serving_manifest_ref, ) from tensorcast.types import ArtifactDescriptor as PublishedArtifactDescriptor @@ -69,9 +70,8 @@ def _canonical_index( ) -def test_serving_build_digest_ignores_source_and_semantic_hash_inputs( -) -> None: - intent_a = ServingBuildIntent( +def test_serving_build_digest_ignores_source_and_semantic_hash_inputs() -> None: + intent_a = RuntimeArtifactBuildIntent( representation_contract_hash="bafksemantic-a", builder_mode=BuilderMode.BINDING_FINALIZE, framework_name="torch", @@ -80,7 +80,7 @@ def test_serving_build_digest_ignores_source_and_semantic_hash_inputs( build_pipeline_version="pipeline-v1", source_artifact_ref="mi2:source-a", ) - intent_b = ServingBuildIntent( + intent_b = RuntimeArtifactBuildIntent( representation_contract_hash="bafksemantic-b", builder_mode=BuilderMode.BINDING_FINALIZE, framework_name="torch", @@ -90,26 +90,42 @@ def test_serving_build_digest_ignores_source_and_semantic_hash_inputs( source_artifact_ref="mi2:source-b", ) - assert (intent_a.compute_serving_build_digest() == - intent_b.compute_serving_build_digest()) + assert ( + intent_a.compute_serving_build_digest() + == intent_b.compute_serving_build_digest() + ) -def test_tensorcast_top_level_exports_cover_vllm_serving_contract() -> None: - assert tc.prepare_serving_registration is prepare_serving_registration - assert (tc.prepare_binding_finalize_serving_registration - is prepare_binding_finalize_serving_registration) - assert (tc.build_serving_publication_bundle_from_registered_artifact - is build_serving_publication_bundle_from_registered_artifact) +def test_tensorcast_top_level_hides_runtime_publication_helpers() -> None: + assert not hasattr(tc, "prepare_runtime_artifact_registration") + assert not hasattr(tc, "prepare_binding_finalize_runtime_registration") + assert not hasattr( + tc, + "build_runtime_artifact_publication_bundle_from_registered_artifact", + ) + assert not hasattr(tc, "RuntimeAdmissionFacts") + assert not hasattr(tc, "ServingAdmissionFacts") + assert not hasattr(tc, "ServingSupportLevel") + assert not hasattr(tc, "ServingPublicationSubject") + assert not hasattr(tc, "prepare_serving_registration") + assert not hasattr(tc, "prepare_binding_finalize_serving_registration") + assert not hasattr(tc, "prepare_pure_transform_serving_registration") + assert not hasattr(tc, "build_serving_publication_bundle") + assert not hasattr(tc, "SERVING_BUILD_DIGEST_VERSION") assert tc.PublishedModelVersion is PublishedModelVersion assert tc.RepresentationPublishContract is RepresentationPublishContract - assert tc.ServingAdmissionFacts is ServingAdmissionFacts - assert tc.ServingArtifactManifest is ServingArtifactManifest - assert tc.SERVING_BUILD_DIGEST_VERSION == SERVING_BUILD_DIGEST_VERSION - assert tc.ServingRuntimePolicy is ServingRuntimePolicy + assert tc.RuntimeArtifactManifest is RuntimeArtifactManifest + assert tc.RuntimeArtifactPolicy is RuntimeArtifactPolicy + assert SERVING_BUILD_DIGEST_VERSION == "tensorcast.serving_build_digest.v1" + +def test_legacy_serving_builder_module_path_is_removed() -> None: + spec = importlib.util.find_spec("tensorcast.api.store.serving_builder") + assert spec is None -def test_serving_artifact_manifest_round_trips_via_json_payload() -> None: - intent = ServingBuildIntent( + +def test_runtime_artifact_manifest_round_trips_via_json_payload() -> None: + intent = RuntimeArtifactBuildIntent( representation_contract_hash="bafksemantic", builder_mode=BuilderMode.BINDING_FINALIZE, framework_name="torch", @@ -118,23 +134,22 @@ def test_serving_artifact_manifest_round_trips_via_json_payload() -> None: build_pipeline_version="pipeline-v2", source_artifact_ref="mi2:source", ) - manifest = ServingArtifactManifest.from_build_intent( + manifest = RuntimeArtifactManifest.from_build_intent( intent=intent, tensor_schema_hash="bafktensorschema", canonical_tensor_count=17, ) - restored = ServingArtifactManifest.from_bytes(manifest.to_bytes()) + restored = RuntimeArtifactManifest.from_bytes(manifest.to_bytes()) assert restored == manifest assert restored.serving_manifest_ref == build_serving_manifest_ref() - assert restored.serving_build_digest == intent.compute_serving_build_digest( - ) + assert restored.serving_build_digest == intent.compute_serving_build_digest() assert restored.serving_build_digest_version == SERVING_BUILD_DIGEST_VERSION def test_representation_publish_contract_matches_serving_manifest() -> None: - intent = ServingBuildIntent( + intent = RuntimeArtifactBuildIntent( representation_contract_hash="bafksemantic", builder_mode=BuilderMode.PURE_TRANSFORM, framework_name="torch", @@ -142,48 +157,53 @@ def test_representation_publish_contract_matches_serving_manifest() -> None: serving_abi_version="abi-v3", build_pipeline_version="pipeline-v3", ) - manifest = ServingArtifactManifest.from_build_intent( + manifest = RuntimeArtifactManifest.from_build_intent( intent=intent, tensor_schema_hash="bafktensorschema", canonical_tensor_count=9, ) contract = RepresentationPublishContract( - subject=ServingPublicationSubject( - serving_artifact_id="mi2:test:serving", ), + subject=RuntimePublicationSubject( + serving_artifact_id="mi2:test:serving", + ), serving_manifest_ref=build_serving_manifest_ref(), representation_contract_hash=manifest.representation_contract_hash, serving_build_digest=manifest.serving_build_digest, ) contract.validate_against_manifest(manifest) - assert (parse_serving_manifest_ref( - contract.serving_manifest_ref) == "__tensorcast_meta__.manifest_json") + assert ( + parse_serving_manifest_ref(contract.serving_manifest_ref) + == "__tensorcast_meta__.manifest_json" + ) runtime_policy = contract.to_runtime_policy() assert runtime_policy.require_manifest is True assert runtime_policy.serving_manifest_ref == build_serving_manifest_ref() - assert (runtime_policy.expected_representation_contract_hash == - manifest.representation_contract_hash) + assert ( + runtime_policy.expected_representation_contract_hash + == manifest.representation_contract_hash + ) -def test_serving_admission_facts_require_fast_path_validation() -> None: - with pytest.raises(ValueError, - match="same_binding_fast_path_validated=True"): - ServingAdmissionFacts( +def test_runtime_admission_facts_require_fast_path_validation() -> None: + with pytest.raises(ValueError, match="same_binding_fast_path_validated=True"): + RuntimeAdmissionFacts( finalize_class=FinalizeClass.REPRESENTATION_CHANGING, - support_level=ServingSupportLevel.BUILDER_PUBLICATION_READY, + support_level=RuntimeSupportLevel.BUILDER_PUBLICATION_READY, same_binding_fast_path_validated=False, ) -def test_representation_publish_contract_accepts_binding_value_subject( -) -> None: +def test_representation_publish_contract_accepts_binding_value_subject() -> None: contract = RepresentationPublishContract( - subject=ServingPublicationSubject(binding_value_ref=BindingValueRef( - binding_id="binding-1", - binding_layout_id="layout-1", - binding_value_id="value-1", - seal_generation=7, - )), + subject=RuntimePublicationSubject( + binding_value_ref=BindingValueRef( + binding_id="binding-1", + binding_layout_id="layout-1", + binding_value_id="value-1", + seal_generation=7, + ) + ), serving_manifest_ref=build_serving_manifest_ref(), representation_contract_hash="bafkrepresentation", serving_build_digest="bafkbuilddigest", @@ -192,20 +212,22 @@ def test_representation_publish_contract_accepts_binding_value_subject( assert contract.serving_artifact_id is None assert contract.binding_value_ref is not None restored = RepresentationPublishContract.from_publication_proto( - contract.to_publication_proto()) + contract.to_publication_proto() + ) assert restored.binding_value_ref is not None assert restored.binding_value_ref.binding_id == "binding-1" -def test_binding_subject_contract_rejects_runtime_policy_until_promoted( -) -> None: +def test_binding_subject_contract_rejects_runtime_policy_until_promoted() -> None: contract = RepresentationPublishContract( - subject=ServingPublicationSubject(binding_value_ref=BindingValueRef( - binding_id="binding-2", - binding_layout_id="layout-2", - binding_value_id="value-2", - seal_generation=3, - )), + subject=RuntimePublicationSubject( + binding_value_ref=BindingValueRef( + binding_id="binding-2", + binding_layout_id="layout-2", + binding_value_id="value-2", + seal_generation=3, + ) + ), serving_manifest_ref=build_serving_manifest_ref(), representation_contract_hash="bafkrepresentation", serving_build_digest="bafkbuilddigest", @@ -215,34 +237,37 @@ def test_binding_subject_contract_rejects_runtime_policy_until_promoted( contract.to_runtime_policy() -def test_build_binding_finalize_admission_facts_requires_same_binding_proof( -) -> None: +def test_build_binding_finalize_admission_facts_requires_same_binding_proof() -> None: facts = build_binding_finalize_admission_facts( - support_level=ServingSupportLevel.BUILDER_PUBLICATION_READY, + support_level=RuntimeSupportLevel.BUILDER_PUBLICATION_READY, same_binding_fast_path_validated=True, ) assert facts.finalize_class == FinalizeClass.REPRESENTATION_CHANGING assert facts.same_binding_fast_path_validated is True - assert facts.support_level == ServingSupportLevel.BUILDER_PUBLICATION_READY + assert facts.support_level == RuntimeSupportLevel.BUILDER_PUBLICATION_READY -def test_build_binding_finalize_publication_bundle_has_no_artifact_subject_parameter( -) -> (None): - assert ("serving_artifact" not in inspect.signature( - build_binding_finalize_publication_bundle).parameters) +def test_build_binding_finalize_publication_bundle_has_no_artifact_subject_parameter() -> ( + None +): + assert ( + "runtime_artifact" + not in inspect.signature(build_binding_finalize_publication_bundle).parameters + ) canonical_index = _canonical_index( CanonicalIndexEntry( name="weights", dtype=torch.float16, - shape=(8, ), - stride=(1, ), + shape=(8,), + stride=(1,), storage_offset=0, segment_offset=0, size_bytes=16, - ), ) - intent = ServingBuildIntent( + ), + ) + intent = RuntimeArtifactBuildIntent( representation_contract_hash="bafkbindingrepr", framework_name="torch", adapter_version="adapter-mounted-source", @@ -257,25 +282,27 @@ def test_build_binding_finalize_publication_bundle_has_no_artifact_subject_param build_intent=intent, canonical_index=canonical_index, admission_facts=build_binding_finalize_admission_facts( - support_level=ServingSupportLevel.BUILDER_PUBLICATION_READY, + support_level=RuntimeSupportLevel.BUILDER_PUBLICATION_READY, same_binding_fast_path_validated=True, ), ) -def test_build_binding_finalize_publication_bundle_accepts_binding_value_subject( -) -> (None): +def test_build_binding_finalize_publication_bundle_accepts_binding_value_subject() -> ( + None +): canonical_index = _canonical_index( CanonicalIndexEntry( name="weights", dtype=torch.float16, - shape=(8, ), - stride=(1, ), + shape=(8,), + stride=(1,), storage_offset=0, segment_offset=0, size_bytes=16, - ), ) - intent = ServingBuildIntent( + ), + ) + intent = RuntimeArtifactBuildIntent( representation_contract_hash="bafkbindingrepr", builder_mode=BuilderMode.BINDING_FINALIZE, framework_name="torch", @@ -296,7 +323,7 @@ def test_build_binding_finalize_publication_bundle_accepts_binding_value_subject ), canonical_index=canonical_index, admission_facts=build_binding_finalize_admission_facts( - support_level=ServingSupportLevel.BUILDER_PUBLICATION_READY, + support_level=RuntimeSupportLevel.BUILDER_PUBLICATION_READY, same_binding_fast_path_validated=True, ), ) @@ -306,58 +333,64 @@ def test_build_binding_finalize_publication_bundle_accepts_binding_value_subject assert bundle.representation_publish_contract.binding_value_ref is not None -def test_compute_serving_tensor_schema_hash_excludes_reserved_manifest_tensor( -) -> None: +def test_compute_runtime_artifact_tensor_schema_hash_excludes_reserved_manifest_tensor() -> ( + None +): canonical_without_manifest = CanonicalIndex( - entries=(CanonicalIndexEntry( - name="weights", - dtype=torch.float16, - shape=(8, ), - stride=(1, ), - storage_offset=0, - segment_offset=0, - size_bytes=16, - ), ), + entries=( + CanonicalIndexEntry( + name="weights", + dtype=torch.float16, + shape=(8,), + stride=(1,), + storage_offset=0, + segment_offset=0, + size_bytes=16, + ), + ), total_size_bytes=16, avbs_hash="bafkavbs", ) canonical_with_manifest = CanonicalIndex( - entries=canonical_without_manifest.entries + (CanonicalIndexEntry( - name="__tensorcast_meta__.manifest_json", - dtype=torch.uint8, - shape=(32, ), - stride=(1, ), - storage_offset=0, - segment_offset=16, - size_bytes=32, - ), ), + entries=canonical_without_manifest.entries + + ( + CanonicalIndexEntry( + name="__tensorcast_meta__.manifest_json", + dtype=torch.uint8, + shape=(32,), + stride=(1,), + storage_offset=0, + segment_offset=16, + size_bytes=32, + ), + ), total_size_bytes=48, avbs_hash="bafkavbs", ) - assert compute_serving_tensor_schema_hash( - canonical_with_manifest) == compute_serving_tensor_schema_hash( - canonical_without_manifest) - assert count_canonical_serving_tensors(canonical_with_manifest) == 1 + assert compute_runtime_artifact_tensor_schema_hash( + canonical_with_manifest + ) == compute_runtime_artifact_tensor_schema_hash(canonical_without_manifest) + assert count_canonical_runtime_tensors(canonical_with_manifest) == 1 -def test_compute_pure_transform_representation_contract_hash_accepts_tensor_mapping( -) -> (None): +def test_compute_pure_transform_representation_contract_hash_accepts_tensor_mapping() -> ( + None +): source_canonical_index = _canonical_index( CanonicalIndexEntry( name="weights", dtype=torch.float16, - shape=(8, ), - stride=(1, ), + shape=(8,), + stride=(1,), storage_offset=0, segment_offset=0, size_bytes=16, - ), ) + ), + ) serving_tensors = { - "weights": - torch.empty((8, ), dtype=torch.float16), - SERVING_MANIFEST_TENSOR_NAME: - torch.tensor( + "weights": torch.empty((8,), dtype=torch.float16), + SERVING_MANIFEST_TENSOR_NAME: torch.tensor( list(b'{"schema_version":1}'), dtype=torch.uint8, ), @@ -373,25 +406,25 @@ def test_compute_pure_transform_representation_contract_hash_accepts_tensor_mapp CanonicalIndexEntry( name="weights", dtype=torch.float16, - shape=(8, ), - stride=(1, ), + shape=(8,), + stride=(1,), storage_offset=0, segment_offset=0, size_bytes=16, - ), ), + ), + ), ) assert hash_from_tensors == hash_from_index -def test_build_pure_transform_publication_bundle_from_registered_artifact( -) -> None: +def test_build_pure_transform_publication_bundle_from_registered_artifact() -> None: canonical_index = _canonical_index( CanonicalIndexEntry( name="weights", dtype=torch.float16, - shape=(8, ), - stride=(1, ), + shape=(8,), + stride=(1,), storage_offset=0, segment_offset=0, size_bytes=16, @@ -399,8 +432,8 @@ def test_build_pure_transform_publication_bundle_from_registered_artifact( CanonicalIndexEntry( name="__tensorcast_meta__.manifest_json", dtype=torch.uint8, - shape=(64, ), - stride=(1, ), + shape=(64,), + stride=(1,), storage_offset=0, segment_offset=16, size_bytes=64, @@ -419,7 +452,7 @@ def test_build_pure_transform_publication_bundle_from_registered_artifact( canonical_index=canonical_index, lease=None, ) - intent = ServingBuildIntent( + intent = RuntimeArtifactBuildIntent( representation_contract_hash="bafkrepresentation", builder_mode=BuilderMode.PURE_TRANSFORM, framework_name="torch", @@ -441,27 +474,37 @@ def test_build_pure_transform_publication_bundle_from_registered_artifact( assert bundle.manifest_tensor_name == "__tensorcast_meta__.manifest_json" assert bundle.serving_manifest_ref == build_serving_manifest_ref() assert bundle.serving_manifest.canonical_tensor_count == 1 - assert (bundle.serving_manifest.tensor_schema_hash == - compute_serving_tensor_schema_hash(canonical_index)) - assert (bundle.representation_publish_contract.serving_artifact_id == - "mi2:test:serving") - assert (bundle.closeout_contract.representation_publish_contract == - bundle.representation_publish_contract) + assert ( + bundle.serving_manifest.tensor_schema_hash + == compute_runtime_artifact_tensor_schema_hash(canonical_index) + ) + assert ( + bundle.representation_publish_contract.serving_artifact_id == "mi2:test:serving" + ) + assert ( + bundle.closeout_contract.representation_publish_contract + == bundle.representation_publish_contract + ) assert bundle.closeout_contract.kind == "representation_publish" - assert (ServingArtifactManifest.from_bytes( - bundle.serving_manifest_bytes) == bundle.serving_manifest) - assert (bundle.representation_publish_contract.serving_build_digest_version - == SERVING_BUILD_DIGEST_VERSION) + assert ( + RuntimeArtifactManifest.from_bytes(bundle.serving_manifest_bytes) + == bundle.serving_manifest + ) + assert ( + bundle.representation_publish_contract.serving_build_digest_version + == SERVING_BUILD_DIGEST_VERSION + ) -def test_compute_pure_transform_representation_contract_hash_normalizes_logical_topology( -) -> (None): +def test_compute_pure_transform_representation_contract_hash_normalizes_logical_topology() -> ( + None +): source_index = _canonical_index( CanonicalIndexEntry( name="weights", dtype=torch.float16, - shape=(8, ), - stride=(1, ), + shape=(8,), + stride=(1,), storage_offset=0, segment_offset=0, size_bytes=16, @@ -469,8 +512,8 @@ def test_compute_pure_transform_representation_contract_hash_normalizes_logical_ CanonicalIndexEntry( name="bias", dtype=torch.float16, - shape=(8, ), - stride=(1, ), + shape=(8,), + stride=(1,), storage_offset=0, segment_offset=16, size_bytes=16, @@ -480,8 +523,8 @@ def test_compute_pure_transform_representation_contract_hash_normalizes_logical_ CanonicalIndexEntry( name="bias", dtype=torch.float16, - shape=(8, ), - stride=(1, ), + shape=(8,), + stride=(1,), storage_offset=0, segment_offset=16, size_bytes=16, @@ -489,8 +532,8 @@ def test_compute_pure_transform_representation_contract_hash_normalizes_logical_ CanonicalIndexEntry( name="weights", dtype=torch.float16, - shape=(8, ), - stride=(1, ), + shape=(8,), + stride=(1,), storage_offset=0, segment_offset=0, size_bytes=16, @@ -500,44 +543,43 @@ def test_compute_pure_transform_representation_contract_hash_normalizes_logical_ hash_a = compute_pure_transform_representation_contract_hash( source_artifact=source_index, serving_artifact=serving_index, - logical_topology_json= - '{"family":"tp","version":"v1","dimensions":[{"name":"tp","size":2},{"name":"pp","size":1}]}', + logical_topology_json='{"family":"tp","version":"v1","dimensions":[{"name":"tp","size":2},{"name":"pp","size":1}]}', ) hash_b = compute_pure_transform_representation_contract_hash( source_artifact=source_index, serving_artifact=serving_index, - logical_topology_json= - '{"dimensions":[{"name":"pp","size":1},{"name":"tp","size":2}],"version":"v1","family":"tp"}', + logical_topology_json='{"dimensions":[{"name":"pp","size":1},{"name":"tp","size":2}],"version":"v1","family":"tp"}', ) hash_c = compute_pure_transform_representation_contract_hash( source_artifact=source_index, serving_artifact=serving_index, - logical_topology_json= - '{"family":"tp","version":"v1","dimensions":[{"name":"tp","size":4},{"name":"pp","size":1}]}', + logical_topology_json='{"family":"tp","version":"v1","dimensions":[{"name":"tp","size":4},{"name":"pp","size":1}]}', ) assert hash_a == hash_b assert hash_a != hash_c -def test_build_pure_transform_publication_bundle_auto_derives_representation_hash( -) -> (None): +def test_build_pure_transform_publication_bundle_auto_derives_representation_hash() -> ( + None +): source_index = _canonical_index( CanonicalIndexEntry( name="weights", dtype=torch.float16, - shape=(8, ), - stride=(1, ), + shape=(8,), + stride=(1,), storage_offset=0, segment_offset=0, size_bytes=16, - ), ) + ), + ) serving_index = _canonical_index( CanonicalIndexEntry( name="weights", dtype=torch.float16, - shape=(8, ), - stride=(1, ), + shape=(8,), + stride=(1,), storage_offset=0, segment_offset=0, size_bytes=16, @@ -545,8 +587,8 @@ def test_build_pure_transform_publication_bundle_auto_derives_representation_has CanonicalIndexEntry( name="__tensorcast_meta__.manifest_json", dtype=torch.uint8, - shape=(64, ), - stride=(1, ), + shape=(64,), + stride=(1,), storage_offset=0, segment_offset=16, size_bytes=64, @@ -565,7 +607,7 @@ def test_build_pure_transform_publication_bundle_auto_derives_representation_has canonical_index=serving_index, lease=None, ) - intent = ServingBuildIntent( + intent = RuntimeArtifactBuildIntent( builder_mode=BuilderMode.PURE_TRANSFORM, framework_name="torch", adapter_version="adapter-v4-auto", @@ -581,35 +623,34 @@ def test_build_pure_transform_publication_bundle_auto_derives_representation_has serving_artifact=registered_artifact, source_version_key="models/demo/source/auto", serving_version_key="models/demo/serving/auto", - logical_topology_json= - '{"family":"tp","version":"v1","dimensions":[{"name":"tp","size":2}]}', + logical_topology_json='{"family":"tp","version":"v1","dimensions":[{"name":"tp","size":2}]}', ) expected_hash = compute_pure_transform_representation_contract_hash( source_artifact=source_index, serving_artifact=registered_artifact, - logical_topology_json= - '{"family":"tp","version":"v1","dimensions":[{"name":"tp","size":2}]}', + logical_topology_json='{"family":"tp","version":"v1","dimensions":[{"name":"tp","size":2}]}', ) assert bundle.representation_publish_contract.representation_contract_hash == ( - expected_hash) + expected_hash + ) assert bundle.contract_family == "pp" assert bundle.serving_manifest.representation_contract_hash == expected_hash -def test_prepare_pure_transform_serving_registration_embeds_manifest_tensor( -) -> None: +def test_prepare_pure_transform_runtime_registration_embeds_manifest_tensor() -> None: source_index = _canonical_index( CanonicalIndexEntry( name="weights", dtype=torch.float16, - shape=(8, ), - stride=(1, ), + shape=(8,), + stride=(1,), storage_offset=0, segment_offset=0, size_bytes=16, - ), ) - intent = ServingBuildIntent( + ), + ) + intent = RuntimeArtifactBuildIntent( builder_mode=BuilderMode.PURE_TRANSFORM, framework_name="torch", adapter_version="adapter-v4-prep", @@ -618,7 +659,7 @@ def test_prepare_pure_transform_serving_registration_embeds_manifest_tensor( source_artifact_ref="mi2:test:prep-source", ) - prepared = prepare_pure_transform_serving_registration( + prepared = prepare_pure_transform_runtime_registration( build_intent=intent, source_artifact=source_index, tensors={"weights": torch.zeros(8, dtype=torch.float16)}, @@ -628,21 +669,28 @@ def test_prepare_pure_transform_serving_registration_embeds_manifest_tensor( assert "__tensorcast_meta__.manifest_json" in prepared.tensors assert len(prepared.serving_manifest_bytes) % 8 == 0 assert prepared.canonical_index.total_size_bytes == sum( - int(entry.size_bytes) for entry in prepared.canonical_index.entries) - assert (ServingArtifactManifest.from_bytes( - prepared.serving_manifest_bytes) == prepared.serving_manifest) - assert (ServingArtifactManifest.from_bytes( - bytes(prepared.tensors["__tensorcast_meta__.manifest_json"].tolist())) - == prepared.serving_manifest) + int(entry.size_bytes) for entry in prepared.canonical_index.entries + ) + assert ( + RuntimeArtifactManifest.from_bytes(prepared.serving_manifest_bytes) + == prepared.serving_manifest + ) + assert ( + RuntimeArtifactManifest.from_bytes( + bytes(prepared.tensors["__tensorcast_meta__.manifest_json"].tolist()) + ) + == prepared.serving_manifest + ) assert prepared.representation_contract_hash == ( compute_pure_transform_representation_contract_hash( source_artifact=source_index, serving_artifact=prepared.canonical_index, - )) + ) + ) -def test_prepare_serving_registration_supports_binding_finalize() -> None: - intent = ServingBuildIntent( +def test_prepare_runtime_artifact_registration_supports_binding_finalize() -> None: + intent = RuntimeArtifactBuildIntent( builder_mode=BuilderMode.BINDING_FINALIZE, framework_name="torch", adapter_version="adapter-v4-binding", @@ -651,7 +699,7 @@ def test_prepare_serving_registration_supports_binding_finalize() -> None: source_artifact_ref="mi2:test:binding-source", ) - prepared = prepare_serving_registration( + prepared = prepare_runtime_artifact_registration( build_intent=intent, tensors={"weights": torch.ones(8, dtype=torch.float16)}, representation_contract_hash="bafkbindingrepr", @@ -666,9 +714,10 @@ def test_prepare_serving_registration_supports_binding_finalize() -> None: assert prepared.representation_contract_hash == "bafkbindingrepr" -def test_prepare_binding_finalize_serving_registration_requires_binding_finalize( -) -> (None): - intent = ServingBuildIntent( +def test_prepare_binding_finalize_runtime_registration_requires_binding_finalize() -> ( + None +): + intent = RuntimeArtifactBuildIntent( builder_mode=BuilderMode.PURE_TRANSFORM, framework_name="torch", adapter_version="adapter-v4-wrong", @@ -678,16 +727,17 @@ def test_prepare_binding_finalize_serving_registration_requires_binding_finalize ) with pytest.raises(Exception, match="builder_mode=BINDING_FINALIZE"): - prepare_binding_finalize_serving_registration( + prepare_binding_finalize_runtime_registration( build_intent=intent, tensors={"weights": torch.ones(8, dtype=torch.float16)}, representation_contract_hash="bafkbindingrepr", ) -def test_prepare_binding_finalize_serving_registration_supports_binding_finalize( -) -> (None): - intent = ServingBuildIntent( +def test_prepare_binding_finalize_runtime_registration_supports_binding_finalize() -> ( + None +): + intent = RuntimeArtifactBuildIntent( builder_mode=BuilderMode.BINDING_FINALIZE, framework_name="torch", adapter_version="adapter-v4-binding-helper", @@ -696,7 +746,7 @@ def test_prepare_binding_finalize_serving_registration_supports_binding_finalize source_artifact_ref="mi2:test:binding-source", ) - prepared = prepare_binding_finalize_serving_registration( + prepared = prepare_binding_finalize_runtime_registration( build_intent=intent, tensors={"weights": torch.ones(8, dtype=torch.float16)}, representation_contract_hash="bafkbindingrepr", @@ -705,14 +755,16 @@ def test_prepare_binding_finalize_serving_registration_supports_binding_finalize assert prepared.serving_manifest.builder_mode == BuilderMode.BINDING_FINALIZE assert prepared.serving_manifest.topology_admission_digest == "bafktopology" - manifest_from_tensor = ServingArtifactManifest.from_bytes( - bytes(prepared.tensors[prepared.manifest_tensor_name].tolist())) + manifest_from_tensor = RuntimeArtifactManifest.from_bytes( + bytes(prepared.tensors[prepared.manifest_tensor_name].tolist()) + ) assert manifest_from_tensor.topology_admission_digest == "bafktopology" -def test_prepare_binding_finalize_serving_registration_rejects_stale_manifest_topology( -) -> None: - intent = ServingBuildIntent( +def test_prepare_binding_finalize_runtime_registration_rejects_stale_manifest_topology() -> ( + None +): + intent = RuntimeArtifactBuildIntent( builder_mode=BuilderMode.BINDING_FINALIZE, framework_name="torch", adapter_version="adapter-v4-binding-helper", @@ -720,14 +772,14 @@ def test_prepare_binding_finalize_serving_registration_rejects_stale_manifest_to build_pipeline_version="pipeline-v4-binding-helper", source_artifact_ref="mi2:test:binding-source", ) - prepared = prepare_binding_finalize_serving_registration( + prepared = prepare_binding_finalize_runtime_registration( build_intent=intent, tensors={"weights": torch.ones(8, dtype=torch.float16)}, representation_contract_hash="bafkbindingrepr", ) with pytest.raises(ArtifactError, match="topology_admission_digest"): - prepare_binding_finalize_serving_registration( + prepare_binding_finalize_runtime_registration( build_intent=intent, tensors=dict(prepared.tensors), representation_contract_hash="bafkbindingrepr", @@ -735,11 +787,13 @@ def test_prepare_binding_finalize_serving_registration_rejects_stale_manifest_to ) -def test_prepare_serving_registration_keeps_manifest_on_tensor_device( -) -> None: - device = (torch.device("cuda:0") - if torch.cuda.is_available() else torch.device("cpu")) - intent = ServingBuildIntent( +def test_prepare_runtime_artifact_registration_keeps_manifest_on_tensor_device() -> ( + None +): + device = ( + torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") + ) + intent = RuntimeArtifactBuildIntent( builder_mode=BuilderMode.BINDING_FINALIZE, framework_name="torch", adapter_version="adapter-v4-device", @@ -748,25 +802,25 @@ def test_prepare_serving_registration_keeps_manifest_on_tensor_device( source_artifact_ref="mi2:test:binding-source", ) - prepared = prepare_serving_registration( + prepared = prepare_runtime_artifact_registration( build_intent=intent, tensors={"weights": torch.ones(8, dtype=torch.float16, device=device)}, representation_contract_hash="bafkbindingrepr", ) assert prepared.tensors["weights"].device == device - assert prepared.tensors[ - "__tensorcast_meta__.manifest_json"].device == device + assert prepared.tensors["__tensorcast_meta__.manifest_json"].device == device -def test_build_serving_publication_bundle_from_registered_artifact_rejects_binding_finalize( -) -> (None): +def test_build_runtime_artifact_publication_bundle_from_registered_artifact_rejects_binding_finalize() -> ( + None +): canonical_index = _canonical_index( CanonicalIndexEntry( name="weights", dtype=torch.float16, - shape=(8, ), - stride=(1, ), + shape=(8,), + stride=(1,), storage_offset=0, segment_offset=0, size_bytes=16, @@ -774,8 +828,8 @@ def test_build_serving_publication_bundle_from_registered_artifact_rejects_bindi CanonicalIndexEntry( name="__tensorcast_meta__.manifest_json", dtype=torch.uint8, - shape=(64, ), - stride=(1, ), + shape=(64,), + stride=(1,), storage_offset=0, segment_offset=16, size_bytes=64, @@ -794,7 +848,7 @@ def test_build_serving_publication_bundle_from_registered_artifact_rejects_bindi canonical_index=canonical_index, lease=None, ) - intent = ServingBuildIntent( + intent = RuntimeArtifactBuildIntent( representation_contract_hash="bafkbindingrepr", builder_mode=BuilderMode.BINDING_FINALIZE, framework_name="torch", @@ -805,7 +859,7 @@ def test_build_serving_publication_bundle_from_registered_artifact_rejects_bindi ) with pytest.raises(ValueError, match="binding_value_ref subject"): - build_serving_publication_bundle_from_registered_artifact( + build_runtime_artifact_publication_bundle_from_registered_artifact( build_intent=intent, serving_artifact=registered_artifact, source_version_key="models/demo/source/v4", @@ -813,14 +867,13 @@ def test_build_serving_publication_bundle_from_registered_artifact_rejects_bindi ) -def test_build_binding_finalize_publication_bundle_uses_admission_facts( -) -> None: +def test_build_binding_finalize_publication_bundle_uses_admission_facts() -> None: canonical_index = _canonical_index( CanonicalIndexEntry( name="weights", dtype=torch.float16, - shape=(8, ), - stride=(1, ), + shape=(8,), + stride=(1,), storage_offset=0, segment_offset=0, size_bytes=16, @@ -828,15 +881,15 @@ def test_build_binding_finalize_publication_bundle_uses_admission_facts( CanonicalIndexEntry( name="__tensorcast_meta__.manifest_json", dtype=torch.uint8, - shape=(64, ), - stride=(1, ), + shape=(64,), + stride=(1,), storage_offset=0, segment_offset=16, size_bytes=64, ), total_size_bytes=80, ) - intent = ServingBuildIntent( + intent = RuntimeArtifactBuildIntent( representation_contract_hash="bafkbindingrepr", builder_mode=BuilderMode.BINDING_FINALIZE, framework_name="torch", @@ -846,7 +899,7 @@ def test_build_binding_finalize_publication_bundle_uses_admission_facts( source_artifact_ref="mi2:test:binding-source", ) admission_facts = build_binding_finalize_admission_facts( - support_level=ServingSupportLevel.RUNTIME_BIND_SWAP_READY, + support_level=RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY, topology_admission_digest="bafktopology", same_binding_fast_path_validated=True, ) @@ -869,14 +922,15 @@ def test_build_binding_finalize_publication_bundle_uses_admission_facts( assert bundle.admission_facts == admission_facts -def test_build_binding_finalize_publication_bundle_rejects_serving_key_without_runtime_ready( -) -> (None): +def test_build_binding_finalize_publication_bundle_rejects_serving_key_without_runtime_ready() -> ( + None +): canonical_index = _canonical_index( CanonicalIndexEntry( name="weights", dtype=torch.float16, - shape=(8, ), - stride=(1, ), + shape=(8,), + stride=(1,), storage_offset=0, segment_offset=0, size_bytes=16, @@ -884,15 +938,15 @@ def test_build_binding_finalize_publication_bundle_rejects_serving_key_without_r CanonicalIndexEntry( name="__tensorcast_meta__.manifest_json", dtype=torch.uint8, - shape=(64, ), - stride=(1, ), + shape=(64,), + stride=(1,), storage_offset=0, segment_offset=16, size_bytes=64, ), total_size_bytes=80, ) - intent = ServingBuildIntent( + intent = RuntimeArtifactBuildIntent( representation_contract_hash="bafkbindingrepr", builder_mode=BuilderMode.BINDING_FINALIZE, framework_name="torch", @@ -914,13 +968,13 @@ def test_build_binding_finalize_publication_bundle_rejects_serving_key_without_r canonical_index=canonical_index, serving_version_key="models/demo/serving/v4", admission_facts=build_binding_finalize_admission_facts( - support_level=ServingSupportLevel.BUILDER_PUBLICATION_READY, + support_level=RuntimeSupportLevel.BUILDER_PUBLICATION_READY, same_binding_fast_path_validated=True, ), ) -def test_published_model_version_builds_serving_runtime_policy() -> None: +def test_published_model_version_builds_runtime_artifact_policy() -> None: version = PublishedModelVersion( assembly_id="cgid:test-assembly", source_artifact_id="mi2:test:source", @@ -935,22 +989,20 @@ def test_published_model_version_builds_serving_runtime_policy() -> None: ), representation_contract_hash="bafkrepresentation", serving_build_digest="bafkbuilddigest", - serving_manifest_ref=build_serving_manifest_ref( - "__serving_manifest__.json"), + serving_manifest_ref=build_serving_manifest_ref("__serving_manifest__.json"), ) - policy = version.require_serving_runtime_policy() + policy = version.require_runtime_artifact_policy() - assert isinstance(policy, ServingRuntimePolicy) + assert isinstance(policy, RuntimeArtifactPolicy) assert policy.require_manifest is True assert policy.serving_manifest_ref == "tensor:__serving_manifest__.json" assert policy.expected_representation_contract_hash == "bafkrepresentation" assert policy.expected_serving_build_digest == "bafkbuilddigest" -def test_coerce_serving_runtime_policy_accepts_manifest_lineage_models( -) -> None: - manifest = ServingArtifactManifest( +def test_coerce_runtime_artifact_policy_accepts_manifest_lineage_models() -> None: + manifest = RuntimeArtifactManifest( framework_name="torch", adapter_version="adapter-v6", serving_abi_version="abi-v6", @@ -958,26 +1010,25 @@ def test_coerce_serving_runtime_policy_accepts_manifest_lineage_models( serving_build_digest="bafkbuilddigest", tensor_schema_hash="bafktensorschema", canonical_tensor_count=1, - serving_manifest_ref=build_serving_manifest_ref( - "__alt_manifest__.json"), + serving_manifest_ref=build_serving_manifest_ref("__alt_manifest__.json"), builder_mode=BuilderMode.PURE_TRANSFORM, build_pipeline_version="pipeline-v6", ) - policy = coerce_serving_runtime_policy(manifest) + policy = coerce_runtime_artifact_policy(manifest) - assert isinstance(policy, ServingRuntimePolicy) + assert isinstance(policy, RuntimeArtifactPolicy) assert policy.serving_manifest_ref == "tensor:__alt_manifest__.json" assert policy.expected_representation_contract_hash == "bafkrepresentation" assert policy.expected_serving_build_digest == "bafkbuilddigest" -def test_coerce_serving_runtime_policy_accepts_contract_and_version() -> None: +def test_coerce_runtime_artifact_policy_accepts_contract_and_version() -> None: contract = RepresentationPublishContract( - subject=ServingPublicationSubject( - serving_artifact_id="mi2:test:serving", ), - serving_manifest_ref=build_serving_manifest_ref( - "__alt_manifest__.json"), + subject=RuntimePublicationSubject( + serving_artifact_id="mi2:test:serving", + ), + serving_manifest_ref=build_serving_manifest_ref("__alt_manifest__.json"), representation_contract_hash="bafkrepresentation", serving_build_digest="bafkbuilddigest", ) @@ -995,14 +1046,13 @@ def test_coerce_serving_runtime_policy_accepts_contract_and_version() -> None: ), representation_contract_hash="bafkrepresentation", serving_build_digest="bafkbuilddigest", - serving_manifest_ref=build_serving_manifest_ref( - "__alt_manifest__.json"), + serving_manifest_ref=build_serving_manifest_ref("__alt_manifest__.json"), ) - contract_policy = coerce_serving_runtime_policy(contract) - version_policy = coerce_serving_runtime_policy(version) + contract_policy = coerce_runtime_artifact_policy(contract) + version_policy = coerce_runtime_artifact_policy(version) - assert contract_policy == ServingRuntimePolicy( + assert contract_policy == RuntimeArtifactPolicy( require_manifest=True, serving_manifest_ref="tensor:__alt_manifest__.json", expected_representation_contract_hash="bafkrepresentation", @@ -1011,9 +1061,10 @@ def test_coerce_serving_runtime_policy_accepts_contract_and_version() -> None: assert version_policy == contract_policy -def test_coerce_serving_runtime_policy_accepts_runtime_ready_representation_publish_spec( -) -> (None): - manifest = ServingArtifactManifest( +def test_coerce_runtime_artifact_policy_accepts_runtime_ready_representation_publish_spec() -> ( + None +): + manifest = RuntimeArtifactManifest( framework_name="torch", adapter_version="adapter-v6-runtime", serving_abi_version="abi-v6-runtime", @@ -1025,8 +1076,9 @@ def test_coerce_serving_runtime_policy_accepts_runtime_ready_representation_publ build_pipeline_version="pipeline-v6-runtime", ) contract = RepresentationPublishContract( - subject=ServingPublicationSubject( - serving_artifact_id="mi2:test:serving", ), + subject=RuntimePublicationSubject( + serving_artifact_id="mi2:test:serving", + ), serving_manifest_ref=build_serving_manifest_ref(), representation_contract_hash="bafkrepresentation", serving_build_digest="bafkbuilddigest", @@ -1042,15 +1094,15 @@ def test_coerce_serving_runtime_policy_accepts_runtime_ready_representation_publ kind="representation_publish", representation_publish_contract=contract, ), - admission_facts=ServingAdmissionFacts( + admission_facts=RuntimeAdmissionFacts( finalize_class=FinalizeClass.RUNTIME_ONLY, - support_level=ServingSupportLevel.RUNTIME_BIND_SWAP_READY, + support_level=RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY, ), ) - policy = coerce_serving_runtime_policy(spec) + policy = coerce_runtime_artifact_policy(spec) - assert policy == ServingRuntimePolicy( + assert policy == RuntimeArtifactPolicy( require_manifest=True, serving_manifest_ref=build_serving_manifest_ref(), expected_representation_contract_hash="bafkrepresentation", @@ -1058,9 +1110,10 @@ def test_coerce_serving_runtime_policy_accepts_runtime_ready_representation_publ ) -def test_coerce_serving_runtime_policy_rejects_builder_only_representation_publish_spec( -) -> (None): - manifest = ServingArtifactManifest( +def test_coerce_runtime_artifact_policy_rejects_builder_only_representation_publish_spec() -> ( + None +): + manifest = RuntimeArtifactManifest( framework_name="torch", adapter_version="adapter-v6-runtime-blocked", serving_abi_version="abi-v6-runtime-blocked", @@ -1072,8 +1125,9 @@ def test_coerce_serving_runtime_policy_rejects_builder_only_representation_publi build_pipeline_version="pipeline-v6-runtime-blocked", ) contract = RepresentationPublishContract( - subject=ServingPublicationSubject( - serving_artifact_id="mi2:test:serving", ), + subject=RuntimePublicationSubject( + serving_artifact_id="mi2:test:serving", + ), serving_manifest_ref=build_serving_manifest_ref(), representation_contract_hash="bafkrepresentation", serving_build_digest="bafkbuilddigest", @@ -1088,18 +1142,18 @@ def test_coerce_serving_runtime_policy_rejects_builder_only_representation_publi kind="representation_publish", representation_publish_contract=contract, ), - admission_facts=ServingAdmissionFacts( + admission_facts=RuntimeAdmissionFacts( finalize_class=FinalizeClass.RUNTIME_ONLY, - support_level=ServingSupportLevel.BUILDER_PUBLICATION_READY, + support_level=RuntimeSupportLevel.BUILDER_PUBLICATION_READY, ), ) with pytest.raises(ValueError, match="RUNTIME_BIND_SWAP_READY"): - coerce_serving_runtime_policy(spec) + coerce_runtime_artifact_policy(spec) def test_build_pure_transform_transform_spec_wraps_transform_args() -> None: - intent = ServingBuildIntent( + intent = RuntimeArtifactBuildIntent( representation_contract_hash="bafkrepresentation", builder_mode=BuilderMode.PURE_TRANSFORM, framework_name="torch", @@ -1126,9 +1180,8 @@ def test_build_pure_transform_transform_spec_wraps_transform_args() -> None: assert spec.publication_spec.serving_version_key == "models/demo/serving/v6" -def test_build_pure_transform_transform_spec_can_omit_representation_hash( -) -> None: - intent = ServingBuildIntent( +def test_build_pure_transform_transform_spec_can_omit_representation_hash() -> None: + intent = RuntimeArtifactBuildIntent( builder_mode=BuilderMode.PURE_TRANSFORM, framework_name="torch", adapter_version="adapter-v6-auto", @@ -1146,12 +1199,12 @@ def test_build_pure_transform_transform_spec_can_omit_representation_hash( def test_build_pure_transform_publication_spec_wraps_typed_inputs() -> None: - admission_facts = ServingAdmissionFacts( + admission_facts = RuntimeAdmissionFacts( finalize_class=FinalizeClass.RUNTIME_ONLY, - support_level=ServingSupportLevel.RUNTIME_BIND_SWAP_READY, + support_level=RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY, topology_admission_digest="bafktopology", ) - intent = ServingBuildIntent( + intent = RuntimeArtifactBuildIntent( builder_mode=BuilderMode.PURE_TRANSFORM, framework_name="torch", adapter_version="adapter-v7", @@ -1164,9 +1217,8 @@ def test_build_pure_transform_publication_spec_wraps_typed_inputs() -> None: contract_family="canonical_full", source_version_key="models/demo/source/v7", serving_version_key="models/demo/serving/v7", - serving_manifest_ref=build_serving_manifest_ref( - "__alt_manifest__.json"), - structural_view_ids=("view-a", ), + serving_manifest_ref=build_serving_manifest_ref("__alt_manifest__.json"), + structural_view_ids=("view-a",), admission_facts=admission_facts, ) @@ -1175,7 +1227,7 @@ def test_build_pure_transform_publication_spec_wraps_typed_inputs() -> None: assert publication_spec.source_version_key == "models/demo/source/v7" assert publication_spec.serving_version_key == "models/demo/serving/v7" assert publication_spec.serving_manifest_ref == "tensor:__alt_manifest__.json" - assert publication_spec.structural_view_ids == ("view-a", ) + assert publication_spec.structural_view_ids == ("view-a",) assert publication_spec.admission_facts == admission_facts @@ -1184,9 +1236,9 @@ def test_pure_transform_publication_no_longer_exposes_string_arg_fallback() -> N removed_markers: list[str] = [] for path in ( - Path("tensorcast/api/store/serving_builder.py"), + Path("tensorcast/api/store/publication_builder.py"), Path("tensorcast/engine_adapter/adapter.py"), - Path("tensorcast/serving/builder/publication.py"), + Path("tensorcast/artifact_runtime/recipe/publication.py"), ): text = path.read_text(encoding="utf-8") if "tc_serving_" in text or "build_pure_transform_serving_args" in text: @@ -1195,14 +1247,15 @@ def test_pure_transform_publication_no_longer_exposes_string_arg_fallback() -> N assert removed_markers == [] -def test_representation_publish_spec_round_trips_admission_facts_and_digest_version( -) -> (None): +def test_representation_publish_spec_round_trips_admission_facts_and_digest_version() -> ( + None +): canonical_index = _canonical_index( CanonicalIndexEntry( name="weights", dtype=torch.float16, - shape=(8, ), - stride=(1, ), + shape=(8,), + stride=(1,), storage_offset=0, segment_offset=0, size_bytes=16, @@ -1210,8 +1263,8 @@ def test_representation_publish_spec_round_trips_admission_facts_and_digest_vers CanonicalIndexEntry( name="__tensorcast_meta__.manifest_json", dtype=torch.uint8, - shape=(64, ), - stride=(1, ), + shape=(64,), + stride=(1,), storage_offset=0, segment_offset=16, size_bytes=64, @@ -1230,13 +1283,13 @@ def test_representation_publish_spec_round_trips_admission_facts_and_digest_vers canonical_index=canonical_index, lease=None, ) - admission_facts = ServingAdmissionFacts( + admission_facts = RuntimeAdmissionFacts( finalize_class=FinalizeClass.RUNTIME_ONLY, - support_level=ServingSupportLevel.RUNTIME_BIND_SWAP_READY, + support_level=RuntimeSupportLevel.RUNTIME_BIND_SWAP_READY, topology_admission_digest="bafktopology", ) bundle = build_pure_transform_publication_bundle_from_registered_artifact( - build_intent=ServingBuildIntent( + build_intent=RuntimeArtifactBuildIntent( representation_contract_hash="bafkrepresentation", builder_mode=BuilderMode.PURE_TRANSFORM, framework_name="torch", @@ -1253,18 +1306,21 @@ def test_representation_publish_spec_round_trips_admission_facts_and_digest_vers restored = RepresentationPublishSpec.from_proto(bundle.to_proto()) assert restored.admission_facts == admission_facts - assert (restored.representation_publish_contract. - serving_build_digest_version == SERVING_BUILD_DIGEST_VERSION) + assert ( + restored.representation_publish_contract.serving_build_digest_version + == SERVING_BUILD_DIGEST_VERSION + ) -def test_topology_admission_digest_does_not_change_representation_or_build_identity( -) -> (None): +def test_topology_admission_digest_does_not_change_representation_or_build_identity() -> ( + None +): canonical_index = _canonical_index( CanonicalIndexEntry( name="weights", dtype=torch.float16, - shape=(8, ), - stride=(1, ), + shape=(8,), + stride=(1,), storage_offset=0, segment_offset=0, size_bytes=16, @@ -1272,15 +1328,15 @@ def test_topology_admission_digest_does_not_change_representation_or_build_ident CanonicalIndexEntry( name="__tensorcast_meta__.manifest_json", dtype=torch.uint8, - shape=(64, ), - stride=(1, ), + shape=(64,), + stride=(1,), storage_offset=0, segment_offset=16, size_bytes=64, ), total_size_bytes=80, ) - intent = ServingBuildIntent( + intent = RuntimeArtifactBuildIntent( representation_contract_hash="bafkbindingrepr", builder_mode=BuilderMode.BINDING_FINALIZE, framework_name="torch", @@ -1300,7 +1356,7 @@ def test_topology_admission_digest_does_not_change_representation_or_build_ident publication_subject=binding_value, canonical_index=canonical_index, admission_facts=build_binding_finalize_admission_facts( - support_level=ServingSupportLevel.BUILDER_PUBLICATION_READY, + support_level=RuntimeSupportLevel.BUILDER_PUBLICATION_READY, topology_admission_digest="bafktopology-a", same_binding_fast_path_validated=True, ), @@ -1310,17 +1366,20 @@ def test_topology_admission_digest_does_not_change_representation_or_build_ident publication_subject=binding_value, canonical_index=canonical_index, admission_facts=build_binding_finalize_admission_facts( - support_level=ServingSupportLevel.BUILDER_PUBLICATION_READY, + support_level=RuntimeSupportLevel.BUILDER_PUBLICATION_READY, topology_admission_digest="bafktopology-b", same_binding_fast_path_validated=True, ), ) - assert (bundle_a.representation_publish_contract. - representation_contract_hash == bundle_b. - representation_publish_contract.representation_contract_hash) - assert (bundle_a.representation_publish_contract.serving_build_digest == - bundle_b.representation_publish_contract.serving_build_digest) + assert ( + bundle_a.representation_publish_contract.representation_contract_hash + == bundle_b.representation_publish_contract.representation_contract_hash + ) + assert ( + bundle_a.representation_publish_contract.serving_build_digest + == bundle_b.representation_publish_contract.serving_build_digest + ) assert bundle_a.serving_manifest.topology_admission_digest == "bafktopology-a" assert bundle_b.serving_manifest.topology_admission_digest == "bafktopology-b" assert bundle_a.admission_facts != bundle_b.admission_facts diff --git a/tests/python/test_serving_fake_framework_boundary.py b/tests/python/test_serving_fake_framework_boundary.py deleted file mode 100644 index 5dc437ce..00000000 --- a/tests/python/test_serving_fake_framework_boundary.py +++ /dev/null @@ -1,670 +0,0 @@ -# Copyright (c) 2026, TensorCast Team. - -from contextlib import contextmanager -from types import SimpleNamespace - -import torch - -import tensorcast.serving._runtime_impl.lifecycle as integration_mod -from tensorcast.serving._runtime_impl.lifecycle import ( - FrameworkIdentity, - IntegrationHost, - MaterializationExecutionFacts, - PlacementAdmissionFacts, - PlacementIdentityFacts, - PlacementMemberFacts, - ServingIntegration, - SourceSelector, -) -from tensorcast.serving.admin import AdminLocalSourceBootstrap -from tensorcast.serving.builder.compiler import ( - CompiledServingRecipe, - TensorcastSemanticValidationSpec, - TensorcastServingFacts, - TensorSchemaEntry, -) -from tensorcast.serving.builder.trace_ir import TracePlan -from tensorcast.serving.recipe_build import ( - RecipeBuildSession, - ServingBindingPlan, -) -from tensorcast.serving.retained_binding import ( - ParsedRetainedServingBindingAuthority, - RetainedServingBindingExpectedDigests, -) -from tensorcast.serving.runtime import ( - BootstrapPolicy, - ExistingServingArtifact, - RequestContext, - RetainedBindingAcquire, - ServingArtifactLocator, -) -from tensorcast.types import ( - BindingReservationCapability, - BindingValueRef, - FinalizeClass, - ServingArtifactManifest, - ServingBindingMemberRef, - ServingSupportLevel, -) - - -class _FakeArtifactView: - def __init__(self, parent, names=None): - self.parent = parent - self.names = tuple(names or ()) - - def bind(self, **kwargs): - binding = _FakeBinding() - binding.names = self.names - binding.kwargs = kwargs - return binding - - -class _FakeArtifact: - def subset(self, names): - return _FakeArtifactView(self, names) - - -class _FakeBinding: - def __init__(self): - self.tensors = {"w": torch.ones((1,), dtype=torch.float16)} - self.binding_layout_id = "layout-1" - self.realized = None - self.swapped = None - self.closed = False - - def realize_from(self, source_view, *, realization_plan, options): - self.realized = (source_view, realization_plan, options) - return "epoch-1" - - def swap(self, artifact, **kwargs): - self.swapped = (artifact, kwargs) - self.tensors = {"w": torch.full((1,), 2.0, dtype=torch.float16)} - return self - - def freeze_current(self, *, update_epoch, source_artifact_ref): - return SimpleNamespace( - binding_id="binding-1", - binding_layout_id=self.binding_layout_id, - binding_value_id="value-1", - seal_generation=1, - update_epoch=update_epoch, - source_artifact_ref=source_artifact_ref, - local_serving_ref="binding-local:fake", - ) - - def close(self): - self.closed = True - - -class _FakeRestoredRetainedBinding: - def __init__(self): - self.tensors = {"w": torch.ones((1,), dtype=torch.float16)} - self.binding_layout_id = "layout-1" - self.binding_value_ref = SimpleNamespace( - binding_id="binding-1", - binding_layout_id="layout-1", - binding_value_id="value-1", - seal_generation=1, - ) - self.reservation_bytes = 4096 - self.closed = False - self.transferred = False - - def transfer_to_runtime(self): - self.transferred = True - return SimpleNamespace(close=lambda: None) - - def close(self): - self.closed = True - - -def _retained_authority() -> ParsedRetainedServingBindingAuthority: - member = ServingBindingMemberRef( - member_id="member-0", - member_index=0, - member_count=1, - group_id="group-1", - ) - binding_ref = BindingValueRef( - binding_id="binding-1", - binding_layout_id="layout-1", - binding_value_id="value-1", - seal_generation=1, - ) - capability = BindingReservationCapability( - capability_id="capability-1", - binding_value_ref=binding_ref, - daemon_id="daemon-1", - daemon_session_id="session-1", - device_uuid="gpu-0", - member=member, - reservation_bytes=4096, - scope_digest="scope-1", - ) - return ParsedRetainedServingBindingAuthority( - group_id="group-1", - local_serving_ref="binding-local:fake", - binding_value_ref=binding_ref, - reservation_capability=capability, - daemon_id="daemon-1", - daemon_session_id="session-1", - device_uuid="gpu-0", - member=member, - reservation_bytes=4096, - expected=RetainedServingBindingExpectedDigests( - target_layout_hash="layout-hash", - tensor_schema_hash="fake-schema", - serving_build_digest="build-digest", - resolved_spec_digest="spec-digest", - ), - readiness="serving_local_ready", - verification_state="local_only", - ) - - -class _FakeSource: - def subset(self, names): - return ("subset", tuple(names)) - - -class _FakeRuntimeModel: - def __init__(self): - self.tensors = {"w": torch.empty((1,), dtype=torch.float16, device="meta")} - - -class _FakeFrameworkHost: - def identity(self, model_config): - del model_config - return FrameworkIdentity( - framework_name="fakefw", - framework_version="fakefw-v1", - adapter_version="adapter-v1", - serving_abi_version="abi-v1", - ) - - def prepare_model_construction(self, framework_config, model_config): - del framework_config, model_config - - def build_meta_model(self, framework_config, model_config): - del framework_config, model_config - return _FakeRuntimeModel() - - def build_runtime_model(self, framework_config, model_config, target_device): - del framework_config, model_config, target_device - return _FakeRuntimeModel() - - def assert_model_ready_for_runtime_binding(self, model, *, context): - del context - assert "w" in model.tensors - - def semantic_probes(self, model, model_config): - del model, model_config - return {} - - -class _FakePlacementHost: - def identity_facts(self, framework_config): - del framework_config - return PlacementIdentityFacts( - tensor_parallel_rank=0, - tensor_parallel_size=1, - pipeline_parallel_rank=0, - pipeline_parallel_size=1, - data_parallel_rank=0, - data_parallel_size=1, - ) - - def admission_facts(self, framework_config): - del framework_config - return PlacementAdmissionFacts() - - def member_facts(self, framework_config): - del framework_config - return PlacementMemberFacts( - runtime_rank=0, - runtime_world_size=1, - member_id="member-0", - member_index=0, - member_count=1, - group_id_hint="group-1", - ) - - def execution_facts(self, framework_config): - del framework_config - return MaterializationExecutionFacts( - collective_rank=0, - collective_world_size=1, - tensor_parallel_ranks=(0,), - ) - - -class _FakeTensorSurface: - def runtime_only_tensor_names(self, model): - del model - return () - - def align_runtime_tensor_names(self, model, expected_names): - assert set(expected_names) == set(model.tensors) - return 0 - - def collect_runtime_tensors(self, model, *, remove_duplicate=False): - del remove_duplicate - return dict(model.tensors) - - def collect_runtime_tensor_view(self, tensors): - del tensors - return () - - def compute_runtime_tensor_schema_hash(self, tensors, *, remove_duplicate=False): - del tensors, remove_duplicate - return "fake-schema" - - def attach_bound_tensors(self, model, tensors, *, replace_meta_params): - del replace_meta_params - model.tensors.update(tensors) - return model - - def allocate_runtime_only_tensors(self, model, target_device): - del model, target_device - return {} - - def snapshot_tensor_invariants(self, tensors): - return tuple(sorted(tensors)) - - def validate_tensor_invariants(self, before, after): - assert before == tuple(sorted(after)) - - -def _realization_plan_proto(): - from tensorcast.proto.daemon.v2 import store_daemon_pb2 - - plan = store_daemon_pb2.BindingRealizationPlan() - entry = plan.entries.add(dst_name="w") - entry.op_kind = store_daemon_pb2.BINDING_REALIZATION_OP_KIND_COPY - entry.source_name = "w" - return plan.SerializeToString(deterministic=True) - - -def _recipe(): - return CompiledServingRecipe( - compile_key="compile", - source_artifact_ref="mi2:source", - source_metadata_fingerprint="meta", - serving_facts=TensorcastServingFacts( - framework_name="fakefw", - framework_version="fakefw-v1", - adapter_version="adapter-v1", - serving_abi_version="abi-v1", - support_level=ServingSupportLevel.RUNTIME_BIND_SWAP_READY, - runtime_only_tensor_names=(), - process_after_load_class=FinalizeClass.RUNTIME_ONLY, - post_bind_finalize_class=FinalizeClass.RUNTIME_ONLY, - ), - trace_plan=TracePlan( - copy_plan=[], - expected_src_names={"w"}, - expected_dst_names={"w"}, - tensorcast_slices={}, - src_hull={}, - ), - tensor_schema=( - TensorSchemaEntry( - name="w", - dtype="torch.float16", - shape=(1,), - stride=(1,), - ), - ), - source_hull=(), - realization_plan=(), - realization_fallback_plan=(), - topology_ref=None, - member_ref=None, - semantic_validation_spec=TensorcastSemanticValidationSpec.empty(), - realization_plan_proto=_realization_plan_proto(), - realization_plan_count=1, - ) - - -def test_fake_second_framework_core_generated_ids_are_framework_neutral(): - group_id = integration_mod.build_collective_group_id( - artifact_ref="mi2:fake:serving", - operation_scope="fakefw.realize", - tp_ranks=(0, 1), - contract_identity="repr", - ) - assert group_id.startswith("tensorcast-") - assert "vllm" not in group_id - - _contract_hash, manifest_bytes = ( - integration_mod.prepare_same_binding_manifest_carrier( - _recipe(), - manifest_tensor_name="__tensorcast_meta__.manifest", - representation_contract_hash="repr", - topology_admission_digest="topology-digest", - ) - ) - manifest = ServingArtifactManifest.from_bytes(manifest_bytes) - lower_manifest = manifest_bytes.lower() - assert integration_mod.LOCAL_READY_BOOTSTRAP_BUILD_PIPELINE_VERSION == ( - "tensorcast-bootstrap-v1" - ) - assert manifest.topology_admission_digest == "topology-digest" - assert ( - integration_mod.LOCAL_READY_BOOTSTRAP_BUILD_PIPELINE_VERSION.encode() - in manifest_bytes - ) - assert b"vllm" not in lower_manifest - - -def test_fake_second_framework_uses_host_intent_lifecycle(monkeypatch): - identity = ServingBindingPlan( - model_hash="hash", - model_id="fake-model", - model_revision=None, - dtype="torch.float16", - runtime_version="fake-runtime-v1", - framework_name="fakefw", - framework_version="fakefw-v1", - adapter_version="adapter-v1", - serving_abi_version="abi-v1", - trace_cache_schema_version=1, - tp_rank=0, - tp_world_size=1, - ) - session = RecipeBuildSession(identity) - assert session.recipe_cache_key(metadata_fingerprint="meta") - - monkeypatch.setattr( - integration_mod, - "read_source_bound_contract_state", - lambda: SimpleNamespace( - source_bound_contract_ready=True, - source_bound_contract_version=4, - source_bound_capability_names=("collective",), - ), - ) - monkeypatch.setattr( - ServingIntegration, - "build_materialization_options", - lambda self, **kwargs: ("realize-options", kwargs), - ) - direct_resolve_calls = [] - - class _FakeResolver: - def resolve(self, artifact_ref): - direct_resolve_calls.append(("resolve", artifact_ref)) - return SimpleNamespace( - artifact=_FakeArtifact(), - artifact_ref=artifact_ref, - tensor_names=("w",), - manifest=SimpleNamespace( - representation_contract_hash="repr-direct", - source_artifact_ref="mi2:source", - serving_build_digest="build-direct", - ), - ) - - def cross_check(self, resolved_artifact, **kwargs): - direct_resolve_calls.append(("cross_check", kwargs)) - return resolved_artifact - - host = IntegrationHost( - framework=_FakeFrameworkHost(), - placement=_FakePlacementHost(), - tensor_surface=_FakeTensorSurface(), - ) - direct_attachment = ServingIntegration( - resolver=_FakeResolver(), - host=host, - ).start( - ExistingServingArtifact(ServingArtifactLocator.artifact_ref("mi2:serving")), - RequestContext( - framework_config=SimpleNamespace(), - model_config=SimpleNamespace(model="fake-model"), - target_device=torch.device("cuda:0"), - ), - ) - direct_payload = direct_attachment.view.endpoint.to_weight_version_payload() - assert direct_attachment.state.runtime_view.readiness == "serving" - assert direct_payload["serving_artifact_ref"] == "mi2:serving" - assert direct_payload["source_artifact_ref"] == "mi2:source" - assert direct_resolve_calls[1][1]["expected_tensor_schema_hash"] == "fake-schema" - reload_attachment = ServingIntegration( - resolver=_FakeResolver(), - host=host, - ).reload( - direct_attachment.state, - ExistingServingArtifact( - ServingArtifactLocator.artifact_ref("mi2:serving-next") - ), - RequestContext( - framework_config=SimpleNamespace(), - model_config=SimpleNamespace(model="fake-model"), - ), - model=direct_attachment.model, - ) - reload_payload = reload_attachment.view.endpoint.to_weight_version_payload() - reload_response = reload_attachment.view.endpoint.to_reload_response_payload() - assert reload_payload["serving_artifact_ref"] == "mi2:serving-next" - assert reload_response == { - "schema_version": 1, - "serving_artifact_ref": "mi2:serving-next", - "representation_contract_hash": "repr-direct", - "serving_build_digest": "build-direct", - "readiness": "serving", - } - assert direct_attachment.state.binding.swapped[1]["options"] == "realize-options" - described = ServingIntegration(host=host).describe(reload_attachment.state) - assert ( - described.endpoint.to_weight_version_payload()["serving_artifact_ref"] - == "mi2:serving-next" - ) - - host_binding = _FakeBinding() - host_model = _FakeRuntimeModel() - attachment = ServingIntegration(host=host).start( - AdminLocalSourceBootstrap( - source_selector=SourceSelector.local_path("/tmp/fake-model"), - bootstrap_policy=BootstrapPolicy(), - recipe=_recipe(), - source_subject=_FakeSource(), - source_artifact_ref="mi2:source", - model=host_model, - binding_factory=lambda *args, **kwargs: host_binding, - ), - RequestContext( - framework_config=SimpleNamespace(), - model_config=SimpleNamespace(model="fake-model"), - target_device=torch.device("cuda:0"), - ), - ) - assert attachment.model is host_model - assert attachment.state.runtime_view.readiness == "serving_local_ready" - payload = attachment.view.endpoint.to_weight_version_payload() - assert payload["source_artifact_ref"] == "mi2:source" - assert payload["family"] == "generic" - assert payload["tp_rank"] == 0 - assert attachment.prepared is not None - assert host_binding.realized is not None - assert host_binding.realized[2] == "realize-options" - - retained_calls = [] - restored = _FakeRestoredRetainedBinding() - - @contextmanager - def fake_restore_retained(**kwargs): - retained_calls.append(kwargs) - yield restored - - monkeypatch.setattr( - integration_mod, "restore_retained_binding", fake_restore_retained - ) - retained_attachment = ServingIntegration(host=host).start( - RetainedBindingAcquire(authority=_retained_authority()), - RequestContext( - framework_config=SimpleNamespace(), - model_config=SimpleNamespace(model="fake-model"), - target_device=torch.device("cuda:0"), - ), - ) - retained_payload = retained_attachment.view.endpoint.to_weight_version_payload() - assert retained_attachment.state.runtime_view.readiness == "serving_local_ready" - assert retained_payload["local_serving_ref"] == "binding-local:fake" - assert retained_payload["binding_value_ref"]["binding_value_id"] == "value-1" - assert retained_calls[0]["expected_member"].member_index == 0 - assert restored.transferred - - -def test_fake_second_framework_uses_public_runtime_session(monkeypatch): - import tensorcast.serving.hosts as tc_hosts - import tensorcast.serving.runtime as tc_runtime - from tensorcast.serving.testing import assert_framework_isolation - - monkeypatch.setattr( - tc_runtime.RuntimeSettings, "ensure_initialized", lambda self: None - ) - monkeypatch.setattr( - integration_mod, - "read_source_bound_contract_state", - lambda: SimpleNamespace( - source_bound_contract_ready=True, - source_bound_contract_version=4, - source_bound_capability_names=("collective",), - ), - ) - monkeypatch.setattr( - integration_mod.ServingIntegration, - "build_materialization_options", - lambda self, **kwargs: ("runtime-options", kwargs), - ) - - class _Resolver: - def resolve(self, artifact_ref): - return SimpleNamespace( - artifact=_FakeArtifact(), - artifact_ref=artifact_ref, - tensor_names=("w",), - manifest=SimpleNamespace( - representation_contract_hash=f"repr:{artifact_ref}", - source_artifact_ref="mi2:source", - serving_build_digest=f"build:{artifact_ref}", - ), - ) - - def cross_check(self, resolved_artifact, **kwargs): - return resolved_artifact - - host = tc_hosts.IntegrationHost( - framework=_FakeFrameworkHost(), - placement=_FakePlacementHost(), - tensor_surface=_FakeTensorSurface(), - ) - session = tc_runtime.ServingRuntimeSession.from_config( - { - "bootstrap": { - "mode": "disabled", - }, - "serving": { - "artifact_locator": { - "kind": "artifact_ref", - "value": "mi2:serving", - }, - }, - }, - host=host, - resolver=_Resolver(), - ) - - attachment = session.start( - tc_runtime.RequestContext( - framework_config=SimpleNamespace(), - model_config=SimpleNamespace(model="fake-model"), - target_device=torch.device("cuda:0"), - ) - ) - reloaded = session.reload( - current_attachment=attachment, - artifact_locator=tc_runtime.ServingArtifactLocator.artifact_ref( - "mi2:serving-next" - ), - policy=tc_runtime.ServingPolicy(), - context=tc_runtime.RequestContext( - framework_config=SimpleNamespace(), - model_config=SimpleNamespace(model="fake-model"), - ), - model=attachment.model, - ) - - assert ( - attachment.view.endpoint.to_weight_version_payload()["serving_artifact_ref"] - == "mi2:serving" - ) - assert ( - reloaded.view.endpoint.to_reload_response_payload()["serving_artifact_ref"] - == "mi2:serving-next" - ) - assert_framework_isolation( - ("tensorcast.serving.runtime", "tensorcast.serving.hosts") - ) - - -def test_fake_second_framework_runtime_conformance_kit(): - import tensorcast.serving.hosts as tc_hosts - import tensorcast.serving.runtime as tc_runtime - from tensorcast.serving.testing import ( - assert_level1_runtime_conformance, - assert_level2_local_bootstrap_conformance, - assert_level3_retained_binding_conformance, - ) - - result = assert_level1_runtime_conformance(tc_runtime, tc_hosts) - - assert result.checks["direct_start"] - assert result.checks["reload"] - assert result.checks["describe"] - assert result.checks["source_capability_not_required"] - assert result.checks["source_catalog_not_required"] - assert result.checks["rejects_local_reload_artifact_locator"] - assert result.checks["rejects_untyped_reload_artifact_locator"] - assert result.checks["rejects_untyped_reload_policy"] - - local = assert_level2_local_bootstrap_conformance(tc_runtime, tc_hosts) - assert local.checks["missing_source_catalog_fails_closed"] - assert local.checks["source_catalog_request_core_owned"] - assert local.checks["recipe_build_receives_core_catalog"] - assert local.checks["missing_trace_capability_is_explicit"] - assert local.checks["local_path_is_not_reload_artifact_locator"] - - retained = assert_level3_retained_binding_conformance(tc_runtime, tc_hosts) - assert retained.checks["retained_acquire_public_start"] - assert retained.checks["retained_acquire_uses_host_member"] - assert retained.checks["retained_acquire_transfers_ownership"] - assert retained.checks["missing_authority_fails_closed"] - assert retained.checks["authority_mismatch_fails_closed"] - assert retained.checks["failure_path_used_retained_restore"] - assert retained.checks["failure_cleanup_closes_untransferred_handle"] - assert retained.checks["rejects_arbitrary_retained_authority"] - - -def test_conformance_failure_summary_includes_onboarding_hint(): - from tensorcast.serving.testing import ConformanceResult - - result = ConformanceResult( - checks={"direct_start": False}, - messages={"direct_start": "provide a tensor surface"}, - level="level1-runtime", - ) - - try: - result.assert_passed() - except AssertionError as exc: - message = str(exc) - else: - raise AssertionError("expected conformance failure") - - assert "level1-runtime" in message - assert "direct_start" in message - assert "provide a tensor surface" in message